pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +7 -1
- pygpt_net/app_core.py +3 -1
- pygpt_net/config.py +3 -1
- pygpt_net/controller/__init__.py +9 -2
- pygpt_net/controller/audio/audio.py +38 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +23 -62
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/lang/custom.py +2 -2
- pygpt_net/controller/media/__init__.py +12 -0
- pygpt_net/controller/media/media.py +115 -0
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +293 -0
- pygpt_net/controller/ui/mode.py +23 -2
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +14 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +56 -5
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +160 -0
- pygpt_net/core/render/web/body.py +24 -3
- pygpt_net/core/text/utils.py +54 -2
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +54 -0
- pygpt_net/core/video/__init__.py +12 -0
- pygpt_net/core/video/video.py +290 -0
- pygpt_net/data/config/config.json +26 -5
- pygpt_net/data/config/models.json +221 -103
- pygpt_net/data/config/settings.json +244 -6
- pygpt_net/data/css/web-blocks.css +6 -0
- pygpt_net/data/css/web-chatgpt.css +6 -0
- pygpt_net/data/css/web-chatgpt_wide.css +6 -0
- pygpt_net/data/locale/locale.de.ini +35 -7
- pygpt_net/data/locale/locale.en.ini +56 -17
- pygpt_net/data/locale/locale.es.ini +35 -7
- pygpt_net/data/locale/locale.fr.ini +35 -7
- pygpt_net/data/locale/locale.it.ini +35 -7
- pygpt_net/data/locale/locale.pl.ini +38 -7
- pygpt_net/data/locale/locale.uk.ini +35 -7
- pygpt_net/data/locale/locale.zh.ini +31 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
- pygpt_net/item/model.py +22 -1
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +76 -7
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/video.py +364 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +241 -178
- pygpt_net/provider/core/model/patch.py +28 -2
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/web/duckduck_search.py +212 -0
- pygpt_net/ui/layout/toolbox/audio.py +55 -0
- pygpt_net/ui/layout/toolbox/footer.py +14 -42
- pygpt_net/ui/layout/toolbox/image.py +7 -13
- pygpt_net/ui/layout/toolbox/raw.py +52 -0
- pygpt_net/ui/layout/toolbox/split.py +48 -0
- pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
- pygpt_net/ui/layout/toolbox/video.py +49 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
|
@@ -6,21 +6,33 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.31 04:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
|
+
from typing import Optional
|
|
12
13
|
from typing import List, Tuple
|
|
13
14
|
|
|
14
15
|
from bs4 import UnicodeDammit
|
|
15
|
-
|
|
16
|
+
|
|
16
17
|
import time
|
|
17
18
|
import numpy as np
|
|
18
19
|
import wave
|
|
19
20
|
|
|
20
|
-
from PySide6.QtMultimedia import
|
|
21
|
-
from PySide6.QtCore import QTimer, QObject,
|
|
22
|
-
|
|
21
|
+
from PySide6.QtMultimedia import QMediaDevices, QAudioFormat, QAudioSource, QAudio
|
|
22
|
+
from PySide6.QtCore import QTimer, QObject, QLoggingCategory
|
|
23
|
+
|
|
24
|
+
from pygpt_net.core.events import RealtimeEvent
|
|
23
25
|
|
|
26
|
+
from .realtime import RealtimeSession
|
|
27
|
+
from ..shared import (
|
|
28
|
+
qaudio_dtype,
|
|
29
|
+
qaudio_norm_factor,
|
|
30
|
+
qaudio_to_s16le,
|
|
31
|
+
convert_s16_pcm,
|
|
32
|
+
build_rt_input_delta_event,
|
|
33
|
+
build_output_volume_event,
|
|
34
|
+
)
|
|
35
|
+
from .player import NativePlayer
|
|
24
36
|
|
|
25
37
|
class NativeBackend(QObject):
|
|
26
38
|
|
|
@@ -72,6 +84,23 @@ class NativeBackend(QObject):
|
|
|
72
84
|
self._dtype = None
|
|
73
85
|
self._norm = None
|
|
74
86
|
|
|
87
|
+
self._rt_session: Optional[RealtimeSession] = None
|
|
88
|
+
self._rt_signals = None # set by core.audio.output on initialize()
|
|
89
|
+
|
|
90
|
+
# dedicated player wrapper (file playback + envelope metering)
|
|
91
|
+
self._player = NativePlayer(window=self.window, chunk_ms=self.chunk_ms)
|
|
92
|
+
|
|
93
|
+
# Reduce WASAPI debug spam on Windows-like backends (non-invasive).
|
|
94
|
+
try:
|
|
95
|
+
QLoggingCategory.setFilterRules(
|
|
96
|
+
"qt.multimedia.wasapi.debug=false\n"
|
|
97
|
+
"qt.multimedia.audio.debug=false\n"
|
|
98
|
+
"qt.multimedia.wasapi.info=false\n"
|
|
99
|
+
"qt.multimedia.audio.info=false"
|
|
100
|
+
)
|
|
101
|
+
except Exception:
|
|
102
|
+
pass
|
|
103
|
+
|
|
75
104
|
def init(self):
|
|
76
105
|
"""
|
|
77
106
|
Initialize audio input backend.
|
|
@@ -167,13 +196,19 @@ class NativeBackend(QObject):
|
|
|
167
196
|
if self.audio_io_device is not None:
|
|
168
197
|
self.audio_io_device.readyRead.disconnect(self.process_audio_input)
|
|
169
198
|
except (TypeError, RuntimeError):
|
|
170
|
-
# ignore if already disconnected or device gone
|
|
199
|
+
# ignore if already disconnected or device gone
|
|
171
200
|
pass
|
|
172
201
|
|
|
173
|
-
|
|
202
|
+
try:
|
|
203
|
+
self.audio_source.stop()
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
174
206
|
self.audio_source = None
|
|
175
207
|
self.audio_io_device = None
|
|
176
208
|
|
|
209
|
+
# Emit final input chunk marker for realtime consumers
|
|
210
|
+
self._emit_rt_input_delta(b"", final=True)
|
|
211
|
+
|
|
177
212
|
# Save frames to file (if any)
|
|
178
213
|
if self.frames:
|
|
179
214
|
self.save_audio_file(self.path)
|
|
@@ -181,7 +216,7 @@ class NativeBackend(QObject):
|
|
|
181
216
|
else:
|
|
182
217
|
print("No audio data recorded")
|
|
183
218
|
|
|
184
|
-
# reset input volume on stop to visually indicate end of recording
|
|
219
|
+
# reset input volume on stop to visually indicate end of recording
|
|
185
220
|
self.reset_audio_level()
|
|
186
221
|
|
|
187
222
|
return result
|
|
@@ -245,6 +280,12 @@ class NativeBackend(QObject):
|
|
|
245
280
|
desired.setSampleFormat(QAudioFormat.SampleFormat.Int16)
|
|
246
281
|
if device.isFormatSupported(desired):
|
|
247
282
|
audio_format = desired
|
|
283
|
+
else:
|
|
284
|
+
# Choose nearest format to avoid silent captures on Windows.
|
|
285
|
+
try:
|
|
286
|
+
audio_format = device.nearestFormat(desired)
|
|
287
|
+
except Exception:
|
|
288
|
+
pass
|
|
248
289
|
|
|
249
290
|
try:
|
|
250
291
|
audio_source = QAudioSource(device, audio_format)
|
|
@@ -319,17 +360,34 @@ class NativeBackend(QObject):
|
|
|
319
360
|
desired.setSampleFormat(QAudioFormat.SampleFormat.Int16)
|
|
320
361
|
if audio_input_device.isFormatSupported(desired):
|
|
321
362
|
audio_format = desired
|
|
363
|
+
else:
|
|
364
|
+
# Use nearest supported format to requested (important on Windows/WASAPI).
|
|
365
|
+
try:
|
|
366
|
+
near = audio_input_device.nearestFormat(desired)
|
|
367
|
+
if near is not None:
|
|
368
|
+
audio_format = near
|
|
369
|
+
except Exception:
|
|
370
|
+
pass
|
|
322
371
|
|
|
323
372
|
self.actual_audio_format = audio_format
|
|
324
|
-
self._dtype =
|
|
325
|
-
self._norm =
|
|
373
|
+
self._dtype = qaudio_dtype(self.actual_audio_format.sampleFormat())
|
|
374
|
+
self._norm = qaudio_norm_factor(self.actual_audio_format.sampleFormat())
|
|
326
375
|
|
|
327
376
|
try:
|
|
328
377
|
self.audio_source = QAudioSource(audio_input_device, audio_format)
|
|
378
|
+
|
|
379
|
+
# Configure buffer size based on desired latency
|
|
329
380
|
bs = int(audio_format.sampleRate() * audio_format.channelCount() * audio_format.bytesPerSample() * (float(self.latency_ms) / 1000.0))
|
|
330
381
|
if bs < 4096:
|
|
331
382
|
bs = 4096
|
|
332
383
|
self.audio_source.setBufferSize(bs)
|
|
384
|
+
|
|
385
|
+
# reduce notify interval to improve responsiveness
|
|
386
|
+
try:
|
|
387
|
+
self.audio_source.setNotifyInterval(max(5, min(50, int(self.latency_ms))))
|
|
388
|
+
except Exception:
|
|
389
|
+
pass
|
|
390
|
+
|
|
333
391
|
except Exception as e:
|
|
334
392
|
self.disconnected = True
|
|
335
393
|
print(f"Failed to create audio source: {e}")
|
|
@@ -349,11 +407,10 @@ class NativeBackend(QObject):
|
|
|
349
407
|
|
|
350
408
|
def process_audio_input(self):
|
|
351
409
|
"""Process incoming audio data"""
|
|
352
|
-
# guard against late calls after stop or missing device
|
|
410
|
+
# guard against late calls after stop or missing device
|
|
353
411
|
if not self._is_recording or self.audio_io_device is None:
|
|
354
412
|
return
|
|
355
413
|
|
|
356
|
-
# add seconds to stop timer
|
|
357
414
|
data = self.audio_io_device.readAll()
|
|
358
415
|
if data.isEmpty():
|
|
359
416
|
return
|
|
@@ -366,8 +423,8 @@ class NativeBackend(QObject):
|
|
|
366
423
|
|
|
367
424
|
# Determine the correct dtype and normalization factor
|
|
368
425
|
sample_format = self.actual_audio_format.sampleFormat()
|
|
369
|
-
dtype = self._dtype if self._dtype is not None else
|
|
370
|
-
normalization_factor = self._norm if self._norm is not None else
|
|
426
|
+
dtype = self._dtype if self._dtype is not None else qaudio_dtype(sample_format)
|
|
427
|
+
normalization_factor = self._norm if self._norm is not None else qaudio_norm_factor(sample_format)
|
|
371
428
|
|
|
372
429
|
# Convert bytes to NumPy array of the appropriate type
|
|
373
430
|
samples = np.frombuffer(data_bytes, dtype=dtype)
|
|
@@ -394,6 +451,15 @@ class NativeBackend(QObject):
|
|
|
394
451
|
# Update the level bar widget
|
|
395
452
|
self.update_audio_level(level_percent)
|
|
396
453
|
|
|
454
|
+
# --- emit realtime input delta (PCM16 LE) ---
|
|
455
|
+
# Always standardize to Int16 for provider compatibility; do not resample here.
|
|
456
|
+
try:
|
|
457
|
+
s16 = qaudio_to_s16le(data_bytes, sample_format)
|
|
458
|
+
self._emit_rt_input_delta(s16, final=False)
|
|
459
|
+
except Exception:
|
|
460
|
+
# avoid interrupting UI/recording on conversion issues
|
|
461
|
+
self._emit_rt_input_delta(data_bytes, final=False)
|
|
462
|
+
|
|
397
463
|
# Handle loop recording
|
|
398
464
|
if self.loop and self.stop_callback is not None:
|
|
399
465
|
stop_interval = int(self.window.core.config.get('audio.input.stop_interval', 10))
|
|
@@ -450,12 +516,14 @@ class NativeBackend(QObject):
|
|
|
450
516
|
else:
|
|
451
517
|
raise ValueError("Unsupported sample format")
|
|
452
518
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
519
|
+
try:
|
|
520
|
+
with wave.open(filename, 'wb') as wf:
|
|
521
|
+
wf.setnchannels(channels)
|
|
522
|
+
wf.setsampwidth(sample_size)
|
|
523
|
+
wf.setframerate(frame_rate)
|
|
524
|
+
wf.writeframes(out_bytes)
|
|
525
|
+
except:
|
|
526
|
+
pass
|
|
459
527
|
|
|
460
528
|
def get_dtype_from_sample_format(self, sample_format):
|
|
461
529
|
"""
|
|
@@ -463,16 +531,7 @@ class NativeBackend(QObject):
|
|
|
463
531
|
|
|
464
532
|
:param sample_format: QAudioFormat.SampleFormat
|
|
465
533
|
"""
|
|
466
|
-
|
|
467
|
-
return np.uint8
|
|
468
|
-
elif sample_format == QAudioFormat.SampleFormat.Int16:
|
|
469
|
-
return np.int16
|
|
470
|
-
elif sample_format == QAudioFormat.SampleFormat.Int32:
|
|
471
|
-
return np.int32
|
|
472
|
-
elif sample_format == QAudioFormat.SampleFormat.Float:
|
|
473
|
-
return np.float32
|
|
474
|
-
else:
|
|
475
|
-
raise ValueError("Unsupported sample format")
|
|
534
|
+
return qaudio_dtype(sample_format)
|
|
476
535
|
|
|
477
536
|
def get_normalization_factor(self, sample_format):
|
|
478
537
|
"""
|
|
@@ -480,16 +539,7 @@ class NativeBackend(QObject):
|
|
|
480
539
|
|
|
481
540
|
:param sample_format: QAudioFormat.SampleFormat
|
|
482
541
|
"""
|
|
483
|
-
|
|
484
|
-
return 255.0
|
|
485
|
-
elif sample_format == QAudioFormat.SampleFormat.Int16:
|
|
486
|
-
return 32768.0
|
|
487
|
-
elif sample_format == QAudioFormat.SampleFormat.Int32:
|
|
488
|
-
return float(2 ** 31)
|
|
489
|
-
elif sample_format == QAudioFormat.SampleFormat.Float:
|
|
490
|
-
return 1.0
|
|
491
|
-
else:
|
|
492
|
-
raise ValueError("Unsupported sample format")
|
|
542
|
+
return qaudio_norm_factor(sample_format)
|
|
493
543
|
|
|
494
544
|
def play_after(
|
|
495
545
|
self,
|
|
@@ -507,69 +557,19 @@ class NativeBackend(QObject):
|
|
|
507
557
|
:param signals: Signals to emit on playback
|
|
508
558
|
:return: True if started
|
|
509
559
|
"""
|
|
510
|
-
|
|
511
|
-
self.
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
num_device = 0
|
|
519
|
-
selected_device = devices[num_device] if num_device < len(devices) else devices[0]
|
|
520
|
-
self.audio_output.setDevice(selected_device)
|
|
521
|
-
|
|
522
|
-
if self.AUTO_CONVERT_TO_WAV:
|
|
523
|
-
if audio_file.lower().endswith('.mp3'):
|
|
524
|
-
tmp_dir = self.window.core.audio.get_cache_dir()
|
|
525
|
-
base_name = os.path.splitext(os.path.basename(audio_file))[0]
|
|
526
|
-
dst_file = os.path.join(tmp_dir, "_" + base_name + ".wav")
|
|
527
|
-
wav_file = self.window.core.audio.mp3_to_wav(audio_file, dst_file)
|
|
528
|
-
if wav_file:
|
|
529
|
-
audio_file = wav_file
|
|
530
|
-
|
|
531
|
-
def check_stop():
|
|
532
|
-
if stopped():
|
|
533
|
-
self.player.stop()
|
|
534
|
-
self.stop_timers()
|
|
535
|
-
signals.volume_changed.emit(0)
|
|
536
|
-
else:
|
|
537
|
-
if self.player:
|
|
538
|
-
if self.player.playbackState() == QMediaPlayer.StoppedState:
|
|
539
|
-
self.player.stop()
|
|
540
|
-
self.stop_timers()
|
|
541
|
-
signals.volume_changed.emit(0)
|
|
542
|
-
|
|
543
|
-
self.envelope = self.calculate_envelope(audio_file, self.chunk_ms)
|
|
544
|
-
self.player = QMediaPlayer()
|
|
545
|
-
self.player.setAudioOutput(self.audio_output)
|
|
546
|
-
self.player.setSource(QUrl.fromLocalFile(audio_file))
|
|
547
|
-
self.player.play()
|
|
548
|
-
|
|
549
|
-
self.playback_timer = QTimer()
|
|
550
|
-
self.playback_timer.setInterval(100)
|
|
551
|
-
self.playback_timer.timeout.connect(check_stop)
|
|
552
|
-
self.volume_timer = QTimer(self)
|
|
553
|
-
self.volume_timer.setInterval(10) # every 100 ms
|
|
554
|
-
self.volume_timer.timeout.connect(
|
|
555
|
-
lambda: self.update_volume(signals)
|
|
560
|
+
# delegate to player wrapper to keep logic isolated
|
|
561
|
+
self._player.play_after(
|
|
562
|
+
audio_file=audio_file,
|
|
563
|
+
event_name=event_name,
|
|
564
|
+
stopped=stopped,
|
|
565
|
+
signals=signals,
|
|
566
|
+
auto_convert_to_wav=self.AUTO_CONVERT_TO_WAV,
|
|
567
|
+
select_output_device=self._select_output_device,
|
|
556
568
|
)
|
|
557
569
|
|
|
558
|
-
self.playback_timer.start()
|
|
559
|
-
self.volume_timer.start()
|
|
560
|
-
signals.volume_changed.emit(0)
|
|
561
|
-
signals.playback.emit(event_name)
|
|
562
|
-
|
|
563
570
|
def stop_timers(self):
|
|
564
|
-
"""
|
|
565
|
-
|
|
566
|
-
"""
|
|
567
|
-
if self.playback_timer is not None:
|
|
568
|
-
self.playback_timer.stop()
|
|
569
|
-
self.playback_timer = None
|
|
570
|
-
if self.volume_timer is not None:
|
|
571
|
-
self.volume_timer.stop()
|
|
572
|
-
self.volume_timer = None
|
|
571
|
+
"""Stop playback timers."""
|
|
572
|
+
self._player.stop_timers()
|
|
573
573
|
|
|
574
574
|
def play(
|
|
575
575
|
self,
|
|
@@ -596,9 +596,9 @@ class NativeBackend(QObject):
|
|
|
596
596
|
:param signals: Signals object to emit stop event.
|
|
597
597
|
:return: True if stopped successfully.
|
|
598
598
|
"""
|
|
599
|
-
if self.
|
|
600
|
-
self.
|
|
601
|
-
self.
|
|
599
|
+
if self._rt_session:
|
|
600
|
+
self._rt_session.stop()
|
|
601
|
+
self._player.stop(signals=signals)
|
|
602
602
|
return False
|
|
603
603
|
|
|
604
604
|
def calculate_envelope(
|
|
@@ -611,23 +611,10 @@ class NativeBackend(QObject):
|
|
|
611
611
|
|
|
612
612
|
:param audio_file: Path to the audio file
|
|
613
613
|
:param chunk_ms: Size of each chunk in milliseconds
|
|
614
|
+
:return: List of volume levels (0-100) for each chunk
|
|
614
615
|
"""
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
envelope = []
|
|
618
|
-
|
|
619
|
-
for ms in range(0, len(audio), chunk_ms):
|
|
620
|
-
chunk = audio[ms:ms + chunk_ms]
|
|
621
|
-
rms = chunk.rms
|
|
622
|
-
if rms > 0:
|
|
623
|
-
db = 20 * np.log10(rms / max_amplitude)
|
|
624
|
-
else:
|
|
625
|
-
db = -60
|
|
626
|
-
db = max(-60, min(0, db))
|
|
627
|
-
volume = ((db + 60) / 60) * 100
|
|
628
|
-
envelope.append(volume)
|
|
629
|
-
|
|
630
|
-
return envelope
|
|
616
|
+
from ..shared import compute_envelope_from_file
|
|
617
|
+
return compute_envelope_from_file(audio_file, chunk_ms)
|
|
631
618
|
|
|
632
619
|
def update_volume(self, signals=None):
|
|
633
620
|
"""
|
|
@@ -635,13 +622,7 @@ class NativeBackend(QObject):
|
|
|
635
622
|
|
|
636
623
|
:param signals: Signals object to emit volume changed event.
|
|
637
624
|
"""
|
|
638
|
-
|
|
639
|
-
index = int(pos / self.chunk_ms)
|
|
640
|
-
if index < len(self.envelope):
|
|
641
|
-
volume = self.envelope[index]
|
|
642
|
-
else:
|
|
643
|
-
volume = 0
|
|
644
|
-
signals.volume_changed.emit(volume)
|
|
625
|
+
self._player.update_volume(signals)
|
|
645
626
|
|
|
646
627
|
def get_input_devices(self) -> List[Tuple[int, str]]:
|
|
647
628
|
"""
|
|
@@ -695,4 +676,322 @@ class NativeBackend(QObject):
|
|
|
695
676
|
index = devices.index(default_device)
|
|
696
677
|
except ValueError:
|
|
697
678
|
index = None
|
|
698
|
-
return index, None
|
|
679
|
+
return index, None
|
|
680
|
+
|
|
681
|
+
# ---- REALTIME ----
|
|
682
|
+
|
|
683
|
+
def _select_output_device(self):
|
|
684
|
+
"""
|
|
685
|
+
Select the audio output device based on configuration.
|
|
686
|
+
|
|
687
|
+
:return: QAudioDevice
|
|
688
|
+
"""
|
|
689
|
+
devices = QMediaDevices.audioOutputs()
|
|
690
|
+
if devices:
|
|
691
|
+
try:
|
|
692
|
+
num_device = int(self.window.core.config.get('audio.output.device', 0))
|
|
693
|
+
except Exception:
|
|
694
|
+
num_device = 0
|
|
695
|
+
return devices[num_device] if 0 <= num_device < len(devices) else devices[0]
|
|
696
|
+
return QMediaDevices.defaultAudioOutput()
|
|
697
|
+
|
|
698
|
+
def _sample_format_from_mime(self, mime: Optional[str]) -> QAudioFormat.SampleFormat:
|
|
699
|
+
"""
|
|
700
|
+
Determine sample format from MIME type.
|
|
701
|
+
|
|
702
|
+
:param mime: MIME type string
|
|
703
|
+
:return: QAudioFormat.SampleFormat
|
|
704
|
+
"""
|
|
705
|
+
s = (mime or "audio/pcm").lower()
|
|
706
|
+
if "float" in s or "f32" in s:
|
|
707
|
+
return QAudioFormat.SampleFormat.Float
|
|
708
|
+
if "pcm" in s:
|
|
709
|
+
if "32" in s or "s32" in s or "int32" in s:
|
|
710
|
+
return QAudioFormat.SampleFormat.Int32
|
|
711
|
+
if "8" in s or "u8" in s:
|
|
712
|
+
return QAudioFormat.SampleFormat.UInt8
|
|
713
|
+
return QAudioFormat.SampleFormat.Int16
|
|
714
|
+
if "l16" in s:
|
|
715
|
+
return QAudioFormat.SampleFormat.Int16
|
|
716
|
+
return QAudioFormat.SampleFormat.Int16
|
|
717
|
+
|
|
718
|
+
def _make_format(
|
|
719
|
+
self,
|
|
720
|
+
rate: int,
|
|
721
|
+
channels: int,
|
|
722
|
+
sample_format: QAudioFormat.SampleFormat
|
|
723
|
+
) -> QAudioFormat:
|
|
724
|
+
"""
|
|
725
|
+
Create QAudioFormat from parameters.
|
|
726
|
+
|
|
727
|
+
:param rate: Sample rate
|
|
728
|
+
:param channels: Number of channels
|
|
729
|
+
:param sample_format: Sample format
|
|
730
|
+
:return: QAudioFormat
|
|
731
|
+
"""
|
|
732
|
+
fmt = QAudioFormat()
|
|
733
|
+
fmt.setSampleRate(int(rate))
|
|
734
|
+
fmt.setChannelCount(int(channels))
|
|
735
|
+
fmt.setSampleFormat(sample_format)
|
|
736
|
+
return fmt
|
|
737
|
+
|
|
738
|
+
def _emit_output_volume(self, value: int) -> None:
|
|
739
|
+
"""
|
|
740
|
+
Emit output volume change event.
|
|
741
|
+
|
|
742
|
+
:param value: Volume level (0-100)
|
|
743
|
+
"""
|
|
744
|
+
if not self._rt_signals:
|
|
745
|
+
return
|
|
746
|
+
self._rt_signals.response.emit(build_output_volume_event(int(value)))
|
|
747
|
+
|
|
748
|
+
def _ensure_rt_session(
|
|
749
|
+
self,
|
|
750
|
+
mime: str,
|
|
751
|
+
rate: Optional[int],
|
|
752
|
+
channels: Optional[int]
|
|
753
|
+
) -> RealtimeSession:
|
|
754
|
+
"""
|
|
755
|
+
Ensure a realtime audio playback session exists with the device's preferred (or nearest) format.
|
|
756
|
+
Keep it simple: prefer Int16, reuse session if format unchanged.
|
|
757
|
+
|
|
758
|
+
:param mime: MIME type of the audio data
|
|
759
|
+
:param rate: Sample rate of the audio data
|
|
760
|
+
:param channels: Number of channels in the audio data
|
|
761
|
+
:return: RealtimeSession
|
|
762
|
+
"""
|
|
763
|
+
device = self._select_output_device()
|
|
764
|
+
|
|
765
|
+
# NOTE: start from device preferred format and coerce to Int16 if supported
|
|
766
|
+
fmt = device.preferredFormat()
|
|
767
|
+
try:
|
|
768
|
+
if fmt.sampleFormat() != QAudioFormat.SampleFormat.Int16:
|
|
769
|
+
test = QAudioFormat()
|
|
770
|
+
test.setSampleRate(fmt.sampleRate())
|
|
771
|
+
test.setChannelCount(fmt.channelCount())
|
|
772
|
+
test.setSampleFormat(QAudioFormat.SampleFormat.Int16)
|
|
773
|
+
if device.isFormatSupported(test):
|
|
774
|
+
fmt = test
|
|
775
|
+
else:
|
|
776
|
+
try:
|
|
777
|
+
fmt = device.nearestFormat(test)
|
|
778
|
+
except Exception:
|
|
779
|
+
pass
|
|
780
|
+
except Exception:
|
|
781
|
+
pass
|
|
782
|
+
|
|
783
|
+
# reuse current session if same format
|
|
784
|
+
if self._rt_session is not None:
|
|
785
|
+
try:
|
|
786
|
+
ef = self._rt_session.format
|
|
787
|
+
if (ef.sampleRate() == fmt.sampleRate()
|
|
788
|
+
and ef.channelCount() == fmt.channelCount()
|
|
789
|
+
and ef.sampleFormat() == fmt.sampleFormat()):
|
|
790
|
+
return self._rt_session
|
|
791
|
+
except Exception:
|
|
792
|
+
pass
|
|
793
|
+
# NOTE: hard stop old one (we keep things simple)
|
|
794
|
+
try:
|
|
795
|
+
self._rt_session.stop()
|
|
796
|
+
except Exception:
|
|
797
|
+
pass
|
|
798
|
+
self._rt_session = None
|
|
799
|
+
|
|
800
|
+
session = RealtimeSession(
|
|
801
|
+
device=device,
|
|
802
|
+
fmt=fmt,
|
|
803
|
+
parent=self,
|
|
804
|
+
volume_emitter=self._emit_output_volume
|
|
805
|
+
)
|
|
806
|
+
# NOTE: when device actually stops (buffer empty), inform UI
|
|
807
|
+
session.on_stopped = lambda: (
|
|
808
|
+
self._rt_signals and self._rt_signals.response.emit(
|
|
809
|
+
RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_END, {"source": "device"})
|
|
810
|
+
),
|
|
811
|
+
setattr(self, "_rt_session", None)
|
|
812
|
+
)
|
|
813
|
+
self._rt_session = session
|
|
814
|
+
return session
|
|
815
|
+
|
|
816
|
+
def _convert_pcm_for_output(
|
|
817
|
+
self,
|
|
818
|
+
data: bytes,
|
|
819
|
+
in_rate: int,
|
|
820
|
+
in_channels: int,
|
|
821
|
+
out_fmt: QAudioFormat
|
|
822
|
+
) -> bytes:
|
|
823
|
+
"""
|
|
824
|
+
Minimal PCM converter to device format:
|
|
825
|
+
- assumes input is S16LE,
|
|
826
|
+
- converts channels (mono<->stereo) and sample rate,
|
|
827
|
+
- keeps Int16; if device uses UInt8/Float, adapts sample width and bias.
|
|
828
|
+
|
|
829
|
+
:param data: Input PCM data (assumed S16LE)
|
|
830
|
+
:param in_rate: Input sample rate
|
|
831
|
+
:param in_channels: Input number of channels
|
|
832
|
+
:param out_fmt: Desired output QAudioFormat
|
|
833
|
+
:return: Converted PCM data
|
|
834
|
+
"""
|
|
835
|
+
if not data:
|
|
836
|
+
return b""
|
|
837
|
+
|
|
838
|
+
try:
|
|
839
|
+
out_rate = int(out_fmt.sampleRate()) or in_rate
|
|
840
|
+
out_ch = int(out_fmt.channelCount()) or in_channels
|
|
841
|
+
out_sw = int(out_fmt.bytesPerSample()) or 2
|
|
842
|
+
out_sf = out_fmt.sampleFormat()
|
|
843
|
+
|
|
844
|
+
# pick string flag for format conversion
|
|
845
|
+
if out_sf == QAudioFormat.SampleFormat.UInt8 and out_sw == 1:
|
|
846
|
+
flag = "u8"
|
|
847
|
+
elif out_sf == QAudioFormat.SampleFormat.Float and out_sw == 4:
|
|
848
|
+
flag = "f32"
|
|
849
|
+
else:
|
|
850
|
+
flag = "s16"
|
|
851
|
+
|
|
852
|
+
return convert_s16_pcm(
|
|
853
|
+
data,
|
|
854
|
+
in_rate=in_rate,
|
|
855
|
+
in_channels=in_channels,
|
|
856
|
+
out_rate=out_rate,
|
|
857
|
+
out_channels=out_ch,
|
|
858
|
+
out_width=out_sw,
|
|
859
|
+
out_format=flag
|
|
860
|
+
)
|
|
861
|
+
except Exception:
|
|
862
|
+
return data
|
|
863
|
+
|
|
864
|
+
def stop_realtime(self):
|
|
865
|
+
"""Stop realtime audio playback session (simple/friendly)."""
|
|
866
|
+
s = self._rt_session
|
|
867
|
+
if s is not None:
|
|
868
|
+
try:
|
|
869
|
+
s.mark_final() # NOTE: add small tail and let it finish
|
|
870
|
+
except Exception:
|
|
871
|
+
try:
|
|
872
|
+
s.stop()
|
|
873
|
+
except Exception:
|
|
874
|
+
pass
|
|
875
|
+
|
|
876
|
+
def set_rt_signals(self, signals) -> None:
|
|
877
|
+
"""
|
|
878
|
+
Set signals object for realtime events.
|
|
879
|
+
|
|
880
|
+
:param signals: Signals object
|
|
881
|
+
"""
|
|
882
|
+
self._rt_signals = signals
|
|
883
|
+
|
|
884
|
+
def set_signals(self, signals) -> None:
|
|
885
|
+
"""
|
|
886
|
+
Alias to set_rt_signals to keep backend API consistent.
|
|
887
|
+
|
|
888
|
+
:param signals: Signals object
|
|
889
|
+
"""
|
|
890
|
+
self.set_rt_signals(signals)
|
|
891
|
+
|
|
892
|
+
def handle_realtime(self, payload: dict) -> None:
|
|
893
|
+
"""
|
|
894
|
+
Handle realtime audio playback payload.
|
|
895
|
+
|
|
896
|
+
Expected payload keys:
|
|
897
|
+
- data: bytes
|
|
898
|
+
- mime: str (e.g. "audio/pcm", "audio/l16", etc.)
|
|
899
|
+
- rate: int (sample rate)
|
|
900
|
+
- channels: int (number of channels)
|
|
901
|
+
- final: bool (True if final chunk)
|
|
902
|
+
If mime is not PCM/L16, the chunk is ignored.
|
|
903
|
+
|
|
904
|
+
:param payload: Payload dictionary
|
|
905
|
+
"""
|
|
906
|
+
try:
|
|
907
|
+
data: bytes = payload.get("data", b"") or b""
|
|
908
|
+
mime: str = (payload.get("mime", "audio/pcm") or "audio/pcm").lower()
|
|
909
|
+
rate = int(payload.get("rate", 24000) or 24000)
|
|
910
|
+
channels = int(payload.get("channels", 1) or 1)
|
|
911
|
+
final = bool(payload.get("final", False))
|
|
912
|
+
|
|
913
|
+
# only raw PCM/L16
|
|
914
|
+
if ("pcm" not in mime) and ("l16" not in mime):
|
|
915
|
+
if final and self._rt_session is not None:
|
|
916
|
+
try:
|
|
917
|
+
self._rt_session.mark_final()
|
|
918
|
+
except Exception:
|
|
919
|
+
pass
|
|
920
|
+
return
|
|
921
|
+
|
|
922
|
+
session = self._ensure_rt_session(mime, rate, channels)
|
|
923
|
+
|
|
924
|
+
if data:
|
|
925
|
+
out_fmt = session.format
|
|
926
|
+
if (out_fmt.sampleRate() != rate) or (out_fmt.channelCount() != channels) or (
|
|
927
|
+
out_fmt.sampleFormat() != QAudioFormat.SampleFormat.Int16):
|
|
928
|
+
data = self._convert_pcm_for_output(data, rate, channels, out_fmt)
|
|
929
|
+
session.feed(data)
|
|
930
|
+
|
|
931
|
+
if final:
|
|
932
|
+
session.mark_final()
|
|
933
|
+
|
|
934
|
+
except Exception as e:
|
|
935
|
+
try:
|
|
936
|
+
self.window.core.debug.log(f"[audio][native] handle_realtime error: {e}")
|
|
937
|
+
except Exception:
|
|
938
|
+
pass
|
|
939
|
+
|
|
940
|
+
# ---- REALTIME INPUT ----
|
|
941
|
+
def _emit_rt_input_delta(self, data: bytes, final: bool) -> None:
|
|
942
|
+
"""
|
|
943
|
+
Emit RT_INPUT_AUDIO_DELTA with a provider-agnostic payload.
|
|
944
|
+
Standardizes to PCM16, little-endian, and includes rate/channels.
|
|
945
|
+
|
|
946
|
+
:param data: audio data bytes
|
|
947
|
+
:param final: True if this is the final chunk
|
|
948
|
+
"""
|
|
949
|
+
if not self._rt_signals:
|
|
950
|
+
return
|
|
951
|
+
|
|
952
|
+
# Resolve current format safely
|
|
953
|
+
try:
|
|
954
|
+
rate = int(self.actual_audio_format.sampleRate())
|
|
955
|
+
channels = int(self.actual_audio_format.channelCount())
|
|
956
|
+
except Exception:
|
|
957
|
+
rate = int(self.window.core.config.get('audio.input.rate', 44100))
|
|
958
|
+
channels = int(self.window.core.config.get('audio.input.channels', 1))
|
|
959
|
+
|
|
960
|
+
event = build_rt_input_delta_event(rate=rate, channels=channels, data=data or b"", final=bool(final))
|
|
961
|
+
try:
|
|
962
|
+
self._rt_signals.response.emit(event)
|
|
963
|
+
except Exception:
|
|
964
|
+
QTimer.singleShot(0, lambda: self._rt_signals.response.emit(event))
|
|
965
|
+
|
|
966
|
+
def _convert_input_to_int16(self, raw: bytes, sample_format) -> bytes:
|
|
967
|
+
"""
|
|
968
|
+
Convert arbitrary QAudioFormat sample format to PCM16 little-endian.
|
|
969
|
+
Does not change sample rate or channel count.
|
|
970
|
+
|
|
971
|
+
:param raw: input audio data bytes
|
|
972
|
+
:param sample_format: QAudioFormat.SampleFormat of the input data
|
|
973
|
+
:return: converted audio data bytes in PCM16 LE
|
|
974
|
+
"""
|
|
975
|
+
return qaudio_to_s16le(raw, sample_format)
|
|
976
|
+
|
|
977
|
+
# ---- internals (diagnostics) ----
|
|
978
|
+
def _on_audio_state_changed(self, state: int):
|
|
979
|
+
"""
|
|
980
|
+
Diagnostics for input device state changes. Keep safe across Qt builds by using int.
|
|
981
|
+
"""
|
|
982
|
+
return
|
|
983
|
+
try:
|
|
984
|
+
# QAudio.State.StoppedState -> typically 0; compare robustly
|
|
985
|
+
try:
|
|
986
|
+
stopped_val = int(QAudio.State.StoppedState)
|
|
987
|
+
except Exception:
|
|
988
|
+
try:
|
|
989
|
+
stopped_val = int(QAudio.StoppedState)
|
|
990
|
+
except Exception:
|
|
991
|
+
stopped_val = 0
|
|
992
|
+
if int(state) == stopped_val and self.audio_source is not None:
|
|
993
|
+
err = self.audio_source.error()
|
|
994
|
+
if err:
|
|
995
|
+
print(f"[native][input] QAudioSource stopped with error: {err}")
|
|
996
|
+
except Exception:
|
|
997
|
+
pass
|