pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +8 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +29 -3
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/data/config/config.json +10 -4
- pygpt_net/data/config/models.json +149 -103
- pygpt_net/data/config/settings.json +50 -0
- pygpt_net/data/locale/locale.de.ini +5 -5
- pygpt_net/data/locale/locale.en.ini +19 -13
- pygpt_net/data/locale/locale.es.ini +5 -5
- pygpt_net/data/locale/locale.fr.ini +5 -5
- pygpt_net/data/locale/locale.it.ini +5 -5
- pygpt_net/data/locale/locale.pl.ini +5 -5
- pygpt_net/data/locale/locale.uk.ini +5 -5
- pygpt_net/data/locale/locale.zh.ini +1 -1
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +39 -6
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +15 -0
- pygpt_net/provider/core/model/patch.py +11 -0
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import audioop
|
|
14
|
+
|
|
15
|
+
def qaudio_dtype(sample_format):
|
|
16
|
+
"""
|
|
17
|
+
Map QAudioFormat.SampleFormat to numpy dtype.
|
|
18
|
+
|
|
19
|
+
Raises ValueError if the format is unsupported.
|
|
20
|
+
|
|
21
|
+
:param sample_format: QAudioFormat.SampleFormat
|
|
22
|
+
:return: numpy dtype
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
from PySide6.QtMultimedia import QAudioFormat
|
|
26
|
+
except Exception:
|
|
27
|
+
raise
|
|
28
|
+
|
|
29
|
+
if sample_format == QAudioFormat.SampleFormat.UInt8:
|
|
30
|
+
return np.uint8
|
|
31
|
+
elif sample_format == QAudioFormat.SampleFormat.Int16:
|
|
32
|
+
return np.int16
|
|
33
|
+
elif sample_format == QAudioFormat.SampleFormat.Int32:
|
|
34
|
+
return np.int32
|
|
35
|
+
elif sample_format == QAudioFormat.SampleFormat.Float:
|
|
36
|
+
return np.float32
|
|
37
|
+
raise ValueError("Unsupported sample format")
|
|
38
|
+
|
|
39
|
+
def qaudio_norm_factor(sample_format):
|
|
40
|
+
"""
|
|
41
|
+
Normalization factor for QAudioFormat.SampleFormat.
|
|
42
|
+
|
|
43
|
+
Raises ValueError if the format is unsupported.
|
|
44
|
+
|
|
45
|
+
:param sample_format: QAudioFormat.SampleFormat
|
|
46
|
+
:return: normalization factor (float)
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
from PySide6.QtMultimedia import QAudioFormat
|
|
50
|
+
except Exception:
|
|
51
|
+
raise
|
|
52
|
+
|
|
53
|
+
if sample_format == QAudioFormat.SampleFormat.UInt8:
|
|
54
|
+
return 255.0
|
|
55
|
+
elif sample_format == QAudioFormat.SampleFormat.Int16:
|
|
56
|
+
return 32768.0
|
|
57
|
+
elif sample_format == QAudioFormat.SampleFormat.Int32:
|
|
58
|
+
return float(2 ** 31)
|
|
59
|
+
elif sample_format == QAudioFormat.SampleFormat.Float:
|
|
60
|
+
return 1.0
|
|
61
|
+
raise ValueError("Unsupported sample format")
|
|
62
|
+
|
|
63
|
+
def qaudio_to_s16le(raw: bytes, sample_format) -> bytes:
|
|
64
|
+
"""
|
|
65
|
+
Convert arbitrary QAudioFormat sample format to PCM16 little-endian.
|
|
66
|
+
|
|
67
|
+
:param raw: input byte buffer
|
|
68
|
+
:param sample_format: QAudioFormat.SampleFormat
|
|
69
|
+
:return: converted byte buffer in PCM16 little-endian
|
|
70
|
+
"""
|
|
71
|
+
if not raw:
|
|
72
|
+
return b""
|
|
73
|
+
try:
|
|
74
|
+
from PySide6.QtMultimedia import QAudioFormat
|
|
75
|
+
except Exception:
|
|
76
|
+
return raw
|
|
77
|
+
|
|
78
|
+
if sample_format == QAudioFormat.SampleFormat.Int16:
|
|
79
|
+
return raw
|
|
80
|
+
elif sample_format == QAudioFormat.SampleFormat.UInt8:
|
|
81
|
+
arr = np.frombuffer(raw, dtype=np.uint8).astype(np.int16)
|
|
82
|
+
arr = (arr - 128) << 8
|
|
83
|
+
return arr.tobytes()
|
|
84
|
+
elif sample_format == QAudioFormat.SampleFormat.Int32:
|
|
85
|
+
arr = np.frombuffer(raw, dtype=np.int32)
|
|
86
|
+
arr = (arr >> 16).astype(np.int16)
|
|
87
|
+
return arr.tobytes()
|
|
88
|
+
elif sample_format == QAudioFormat.SampleFormat.Float:
|
|
89
|
+
arr = np.frombuffer(raw, dtype=np.float32)
|
|
90
|
+
arr = np.clip(arr, -1.0, 1.0)
|
|
91
|
+
arr = (arr * 32767.0).astype(np.int16)
|
|
92
|
+
return arr.tobytes()
|
|
93
|
+
return raw
|
|
94
|
+
|
|
95
|
+
def pyaudio_to_s16le(raw: bytes, fmt, pa_instance=None) -> bytes:
|
|
96
|
+
"""
|
|
97
|
+
Convert PyAudio input buffer to PCM16 little-endian without changing
|
|
98
|
+
sample rate or channel count.
|
|
99
|
+
|
|
100
|
+
:param raw: input byte buffer
|
|
101
|
+
:param fmt: PyAudio format (e.g., pyaudio.paInt16)
|
|
102
|
+
:param pa_instance: Optional PyAudio instance for sample size queries
|
|
103
|
+
:return: converted byte buffer in PCM16 little-endian
|
|
104
|
+
"""
|
|
105
|
+
if not raw:
|
|
106
|
+
return b""
|
|
107
|
+
try:
|
|
108
|
+
import pyaudio
|
|
109
|
+
except Exception:
|
|
110
|
+
return raw
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
if fmt == pyaudio.paInt16:
|
|
114
|
+
return raw
|
|
115
|
+
elif fmt == pyaudio.paUInt8:
|
|
116
|
+
arr = np.frombuffer(raw, dtype=np.uint8).astype(np.int16)
|
|
117
|
+
arr = (arr - 128) << 8
|
|
118
|
+
return arr.tobytes()
|
|
119
|
+
elif fmt == pyaudio.paInt8:
|
|
120
|
+
arr = np.frombuffer(raw, dtype=np.int8).astype(np.int16)
|
|
121
|
+
arr = (arr.astype(np.int16) << 8)
|
|
122
|
+
return arr.tobytes()
|
|
123
|
+
elif fmt == pyaudio.paFloat32:
|
|
124
|
+
arr = np.frombuffer(raw, dtype=np.float32)
|
|
125
|
+
arr = np.clip(arr, -1.0, 1.0)
|
|
126
|
+
arr = (arr * 32767.0).astype(np.int16)
|
|
127
|
+
return arr.tobytes()
|
|
128
|
+
else:
|
|
129
|
+
try:
|
|
130
|
+
sw = pa_instance.get_sample_size(fmt) if pa_instance is not None else 2
|
|
131
|
+
return audioop.lin2lin(raw, sw, 2)
|
|
132
|
+
except Exception:
|
|
133
|
+
return raw
|
|
134
|
+
except Exception:
|
|
135
|
+
return raw
|
|
136
|
+
|
|
137
|
+
def f32_to_s16le(raw: bytes) -> bytes:
|
|
138
|
+
"""
|
|
139
|
+
Convert float32 little-endian PCM to int16 little-endian PCM.
|
|
140
|
+
|
|
141
|
+
:param raw: input byte buffer in float32
|
|
142
|
+
:return: converted byte buffer in int16
|
|
143
|
+
"""
|
|
144
|
+
if not raw:
|
|
145
|
+
return b""
|
|
146
|
+
try:
|
|
147
|
+
arr = np.frombuffer(raw, dtype=np.float32)
|
|
148
|
+
arr = np.clip(arr, -1.0, 1.0)
|
|
149
|
+
s16 = (arr * 32767.0).astype(np.int16)
|
|
150
|
+
return s16.tobytes()
|
|
151
|
+
except Exception:
|
|
152
|
+
return b""
|
|
153
|
+
|
|
154
|
+
def convert_s16_pcm(
|
|
155
|
+
data: bytes,
|
|
156
|
+
in_rate: int,
|
|
157
|
+
in_channels: int,
|
|
158
|
+
out_rate: int,
|
|
159
|
+
out_channels: int,
|
|
160
|
+
out_width: int = 2,
|
|
161
|
+
out_format: str = "s16" # "s16" | "u8" | "f32"
|
|
162
|
+
) -> bytes:
|
|
163
|
+
"""
|
|
164
|
+
Minimal PCM converter to target format:
|
|
165
|
+
- assumes input is S16LE,
|
|
166
|
+
- converts channels (mono<->stereo) and sample rate,
|
|
167
|
+
- converts width if needed,
|
|
168
|
+
- applies bias for u8 or float conversion if requested.
|
|
169
|
+
|
|
170
|
+
:param data: input byte buffer in S16LE
|
|
171
|
+
:param in_rate: input sample rate
|
|
172
|
+
:param in_channels: input channel count
|
|
173
|
+
:param out_rate: output sample rate
|
|
174
|
+
:param out_channels: output channel count
|
|
175
|
+
:param out_width: output sample width in bytes (1, 2, or 4)
|
|
176
|
+
:param out_format: output format ("s16", "u8", or "f32")
|
|
177
|
+
:return: converted byte buffer
|
|
178
|
+
"""
|
|
179
|
+
if not data:
|
|
180
|
+
return b""
|
|
181
|
+
try:
|
|
182
|
+
src = data
|
|
183
|
+
|
|
184
|
+
# channels
|
|
185
|
+
if in_channels != out_channels:
|
|
186
|
+
if in_channels == 2 and out_channels == 1:
|
|
187
|
+
src = audioop.tomono(src, 2, 0.5, 0.5)
|
|
188
|
+
elif in_channels == 1 and out_channels == 2:
|
|
189
|
+
src = audioop.tostereo(src, 2, 1.0, 1.0)
|
|
190
|
+
else:
|
|
191
|
+
mid = audioop.tomono(src, 2, 0.5, 0.5) if in_channels > 1 else src
|
|
192
|
+
src = audioop.tostereo(mid, 2, 1.0, 1.0) if out_channels == 2 else mid
|
|
193
|
+
|
|
194
|
+
# sample rate
|
|
195
|
+
if in_rate != out_rate:
|
|
196
|
+
src, _ = audioop.ratecv(src, 2, out_channels, in_rate, out_rate, None)
|
|
197
|
+
|
|
198
|
+
# sample width (Int16 -> other widths if needed)
|
|
199
|
+
if out_width != 2:
|
|
200
|
+
src = audioop.lin2lin(src, 2, out_width)
|
|
201
|
+
|
|
202
|
+
# sample format nuances
|
|
203
|
+
if out_format == "u8" and out_width == 1:
|
|
204
|
+
src = audioop.bias(src, 1, 128) # center at 0x80
|
|
205
|
+
elif out_format == "f32" and out_width == 4:
|
|
206
|
+
arr = np.frombuffer(src, dtype=np.int16).astype(np.float32) / 32768.0
|
|
207
|
+
src = arr.tobytes()
|
|
208
|
+
|
|
209
|
+
return src
|
|
210
|
+
except Exception:
|
|
211
|
+
return data
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 04:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
from pydub import AudioSegment
|
|
14
|
+
|
|
15
|
+
def compute_envelope_from_file(audio_file: str, chunk_ms: int = 100) -> list:
|
|
16
|
+
"""
|
|
17
|
+
Calculate the volume envelope of an audio file (0-100 per chunk).
|
|
18
|
+
|
|
19
|
+
:param audio_file: Path to the audio file
|
|
20
|
+
:param chunk_ms: Chunk size in milliseconds
|
|
21
|
+
:return: List of volume levels (0-100) per chunk
|
|
22
|
+
"""
|
|
23
|
+
audio = AudioSegment.from_file(audio_file)
|
|
24
|
+
max_amplitude = 32767.0
|
|
25
|
+
envelope = []
|
|
26
|
+
|
|
27
|
+
for ms in range(0, len(audio), chunk_ms):
|
|
28
|
+
chunk = audio[ms:ms + chunk_ms]
|
|
29
|
+
rms = float(chunk.rms) if chunk.rms else 0.0
|
|
30
|
+
if rms > 0.0:
|
|
31
|
+
db = 20.0 * np.log10(max(1e-12, rms / max_amplitude))
|
|
32
|
+
else:
|
|
33
|
+
db = -60.0
|
|
34
|
+
db = max(-60.0, min(0.0, db))
|
|
35
|
+
volume = ((db + 60.0) / 60.0) * 100.0
|
|
36
|
+
envelope.append(volume)
|
|
37
|
+
|
|
38
|
+
return envelope
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 04:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from typing import Optional, Callable
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from PySide6.QtCore import QObject, QTimer, QUrl
|
|
16
|
+
from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput
|
|
17
|
+
|
|
18
|
+
from ..shared import compute_envelope_from_file
|
|
19
|
+
|
|
20
|
+
class NativePlayer(QObject):
|
|
21
|
+
"""
|
|
22
|
+
Thin wrapper around QtMultimedia audio playback with level metering.
|
|
23
|
+
"""
|
|
24
|
+
def __init__(self, window=None, chunk_ms: int = 10):
|
|
25
|
+
super().__init__(window)
|
|
26
|
+
self.window = window
|
|
27
|
+
self.chunk_ms = int(chunk_ms)
|
|
28
|
+
self.audio_output: Optional[QAudioOutput] = None
|
|
29
|
+
self.player: Optional[QMediaPlayer] = None
|
|
30
|
+
self.playback_timer: Optional[QTimer] = None
|
|
31
|
+
self.volume_timer: Optional[QTimer] = None
|
|
32
|
+
self.envelope = []
|
|
33
|
+
|
|
34
|
+
def stop_timers(self):
|
|
35
|
+
"""Stop playback timers."""
|
|
36
|
+
if self.playback_timer is not None:
|
|
37
|
+
self.playback_timer.stop()
|
|
38
|
+
self.playback_timer = None
|
|
39
|
+
if self.volume_timer is not None:
|
|
40
|
+
self.volume_timer.stop()
|
|
41
|
+
self.volume_timer = None
|
|
42
|
+
|
|
43
|
+
def stop(self, signals=None):
|
|
44
|
+
"""
|
|
45
|
+
Stop playback and timers.
|
|
46
|
+
|
|
47
|
+
:param signals: Signals to emit on stop
|
|
48
|
+
"""
|
|
49
|
+
if self.player is not None:
|
|
50
|
+
try:
|
|
51
|
+
self.player.stop()
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
self.stop_timers()
|
|
55
|
+
if signals is not None:
|
|
56
|
+
try:
|
|
57
|
+
signals.volume_changed.emit(0)
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
def update_volume(self, signals=None):
|
|
62
|
+
"""
|
|
63
|
+
Update the volume based on the current position in the audio file.
|
|
64
|
+
|
|
65
|
+
:param signals: Signals to emit volume changes
|
|
66
|
+
"""
|
|
67
|
+
if not self.player:
|
|
68
|
+
return
|
|
69
|
+
pos = self.player.position()
|
|
70
|
+
index = int(pos / self.chunk_ms)
|
|
71
|
+
volume = self.envelope[index] if index < len(self.envelope) else 0
|
|
72
|
+
if signals is not None:
|
|
73
|
+
signals.volume_changed.emit(volume)
|
|
74
|
+
|
|
75
|
+
def play_after(
|
|
76
|
+
self,
|
|
77
|
+
audio_file: str,
|
|
78
|
+
event_name: str,
|
|
79
|
+
stopped: Callable[[], bool],
|
|
80
|
+
signals=None,
|
|
81
|
+
auto_convert_to_wav: bool = False,
|
|
82
|
+
select_output_device: Optional[Callable[[], object]] = None,
|
|
83
|
+
):
|
|
84
|
+
"""
|
|
85
|
+
Start audio playback using QtMultimedia with periodic volume updates.
|
|
86
|
+
|
|
87
|
+
:param audio_file: Path to audio file
|
|
88
|
+
:param event_name: Event name to emit on playback start
|
|
89
|
+
:param stopped: Callable returning True when playback should stop
|
|
90
|
+
:param signals: Signals to emit on playback
|
|
91
|
+
:param auto_convert_to_wav: auto convert mp3 to wav if True
|
|
92
|
+
:param select_output_device: callable returning QAudioDevice for output
|
|
93
|
+
"""
|
|
94
|
+
self.audio_output = QAudioOutput()
|
|
95
|
+
self.audio_output.setVolume(1.0)
|
|
96
|
+
|
|
97
|
+
if callable(select_output_device):
|
|
98
|
+
try:
|
|
99
|
+
self.audio_output.setDevice(select_output_device())
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
if auto_convert_to_wav and audio_file.lower().endswith('.mp3'):
|
|
104
|
+
tmp_dir = self.window.core.audio.get_cache_dir()
|
|
105
|
+
base_name = os.path.splitext(os.path.basename(audio_file))[0]
|
|
106
|
+
dst_file = os.path.join(tmp_dir, "_" + base_name + ".wav")
|
|
107
|
+
wav_file = self.window.core.audio.mp3_to_wav(audio_file, dst_file)
|
|
108
|
+
if wav_file:
|
|
109
|
+
audio_file = wav_file
|
|
110
|
+
|
|
111
|
+
def check_stop():
|
|
112
|
+
if stopped():
|
|
113
|
+
self.stop(signals=signals)
|
|
114
|
+
else:
|
|
115
|
+
if self.player:
|
|
116
|
+
if self.player.playbackState() == QMediaPlayer.StoppedState:
|
|
117
|
+
self.stop(signals=signals)
|
|
118
|
+
|
|
119
|
+
self.envelope = compute_envelope_from_file(audio_file, chunk_ms=self.chunk_ms)
|
|
120
|
+
self.player = QMediaPlayer()
|
|
121
|
+
self.player.setAudioOutput(self.audio_output)
|
|
122
|
+
self.player.setSource(QUrl.fromLocalFile(audio_file))
|
|
123
|
+
self.player.play()
|
|
124
|
+
|
|
125
|
+
self.playback_timer = QTimer()
|
|
126
|
+
self.playback_timer.setInterval(100)
|
|
127
|
+
self.playback_timer.timeout.connect(check_stop)
|
|
128
|
+
|
|
129
|
+
self.volume_timer = QTimer(self)
|
|
130
|
+
self.volume_timer.setInterval(10)
|
|
131
|
+
self.volume_timer.timeout.connect(lambda: self.update_volume(signals))
|
|
132
|
+
|
|
133
|
+
self.playback_timer.start()
|
|
134
|
+
self.volume_timer.start()
|
|
135
|
+
if signals is not None:
|
|
136
|
+
signals.volume_changed.emit(0)
|
|
137
|
+
signals.playback.emit(event_name)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from pygpt_net.core.events import RealtimeEvent
|
|
13
|
+
|
|
14
|
+
def build_rt_input_delta_event(
|
|
15
|
+
rate: int,
|
|
16
|
+
channels: int,
|
|
17
|
+
data: bytes,
|
|
18
|
+
final: bool
|
|
19
|
+
) -> RealtimeEvent:
|
|
20
|
+
"""
|
|
21
|
+
Build RT_INPUT_AUDIO_DELTA event with a provider-agnostic payload.
|
|
22
|
+
|
|
23
|
+
:param rate: Sample rate (e.g., 16000)
|
|
24
|
+
:param channels: Number of channels (1 for mono, 2 for stereo)
|
|
25
|
+
:param data: Audio data bytes
|
|
26
|
+
:param final: Whether this is the final chunk
|
|
27
|
+
:return: RealtimeEvent instance
|
|
28
|
+
"""
|
|
29
|
+
return RealtimeEvent(
|
|
30
|
+
RealtimeEvent.RT_INPUT_AUDIO_DELTA,
|
|
31
|
+
{
|
|
32
|
+
"payload": {
|
|
33
|
+
"data": data or b"",
|
|
34
|
+
"mime": "audio/pcm",
|
|
35
|
+
"rate": int(rate),
|
|
36
|
+
"channels": int(channels),
|
|
37
|
+
"final": bool(final),
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def build_output_volume_event(value: int) -> RealtimeEvent:
|
|
43
|
+
"""
|
|
44
|
+
Build RT_OUTPUT_AUDIO_VOLUME_CHANGED event.
|
|
45
|
+
|
|
46
|
+
:param value: Volume level (0-100)
|
|
47
|
+
:return: RealtimeEvent instance
|
|
48
|
+
"""
|
|
49
|
+
return RealtimeEvent(
|
|
50
|
+
RealtimeEvent.RT_OUTPUT_AUDIO_VOLUME_CHANGED,
|
|
51
|
+
{"volume": int(value)}
|
|
52
|
+
)
|
pygpt_net/core/audio/capture.py
CHANGED
|
@@ -42,6 +42,11 @@ class Capture:
|
|
|
42
42
|
backend = "native"
|
|
43
43
|
return self.backends[backend]
|
|
44
44
|
|
|
45
|
+
def setup(self):
|
|
46
|
+
"""Setup audio input backend"""
|
|
47
|
+
for b in self.backends.values():
|
|
48
|
+
b.set_rt_signals(self.window.controller.realtime.signals)
|
|
49
|
+
|
|
45
50
|
def get_default_input_device(self) -> Tuple[int, str]:
|
|
46
51
|
"""
|
|
47
52
|
Get default input device
|
pygpt_net/core/audio/output.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from typing import List, Tuple
|
|
@@ -42,6 +42,10 @@ class Output:
|
|
|
42
42
|
backend = "native"
|
|
43
43
|
return self.backends[backend]
|
|
44
44
|
|
|
45
|
+
def setup(self):
|
|
46
|
+
"""Setup audio output backend"""
|
|
47
|
+
pass
|
|
48
|
+
|
|
45
49
|
def play(
|
|
46
50
|
self,
|
|
47
51
|
audio_file: str,
|
|
@@ -88,4 +92,11 @@ class Output:
|
|
|
88
92
|
|
|
89
93
|
:return: (id, name)
|
|
90
94
|
"""
|
|
91
|
-
return self.get_backend().get_default_output_device()
|
|
95
|
+
return self.get_backend().get_default_output_device()
|
|
96
|
+
|
|
97
|
+
def handle_realtime(self, payload, signals):
|
|
98
|
+
"""
|
|
99
|
+
Handle real-time audio playback
|
|
100
|
+
"""
|
|
101
|
+
#self.get_backend().set_signals(signals)
|
|
102
|
+
self.get_backend().handle_realtime(payload)
|
pygpt_net/core/audio/whisper.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date:
|
|
9
|
+
# Updated Date: 2025.08.29 18:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from typing import List
|
|
@@ -21,10 +21,14 @@ class Whisper:
|
|
|
21
21
|
self.window = window
|
|
22
22
|
self.voices = [
|
|
23
23
|
"alloy",
|
|
24
|
+
"ash",
|
|
25
|
+
"ballad",
|
|
26
|
+
"coral",
|
|
24
27
|
"echo",
|
|
25
28
|
"fable",
|
|
26
|
-
"onyx",
|
|
27
29
|
"nova",
|
|
30
|
+
"onyx",
|
|
31
|
+
"sage",
|
|
28
32
|
"shimmer",
|
|
29
33
|
]
|
|
30
34
|
|
pygpt_net/core/bridge/bridge.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import time
|
|
@@ -268,6 +268,7 @@ class Bridge:
|
|
|
268
268
|
worker = BridgeWorker()
|
|
269
269
|
worker.window = self.window
|
|
270
270
|
worker.signals.response.connect(self.window.controller.kernel.listener)
|
|
271
|
+
worker.rt_signals = self.window.controller.realtime.signals # Realtime signals
|
|
271
272
|
return worker
|
|
272
273
|
|
|
273
274
|
def apply_rate_limit(self):
|
pygpt_net/core/bridge/worker.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from PySide6.QtCore import QObject, Signal, QRunnable, Slot
|
|
@@ -33,6 +33,7 @@ class BridgeWorker(QRunnable):
|
|
|
33
33
|
def __init__(self, *args, **kwargs):
|
|
34
34
|
super().__init__()
|
|
35
35
|
self.signals = BridgeSignals()
|
|
36
|
+
self.rt_signals = None
|
|
36
37
|
self.args = args
|
|
37
38
|
self.kwargs = kwargs
|
|
38
39
|
self.window = None
|
|
@@ -116,12 +117,14 @@ class BridgeWorker(QRunnable):
|
|
|
116
117
|
result = self.window.core.api.google.call(
|
|
117
118
|
context=self.context,
|
|
118
119
|
extra=self.extra,
|
|
120
|
+
rt_signals=self.rt_signals,
|
|
119
121
|
)
|
|
120
122
|
elif sdk == "openai":
|
|
121
123
|
# print("Using OpenAI SDK")
|
|
122
124
|
result = self.window.core.api.openai.call(
|
|
123
125
|
context=self.context,
|
|
124
126
|
extra=self.extra,
|
|
127
|
+
rt_signals=self.rt_signals,
|
|
125
128
|
)
|
|
126
129
|
except Exception as e:
|
|
127
130
|
if self.signals:
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from typing import List, Tuple
|
|
@@ -17,6 +17,7 @@ from pygpt_net.core.events import (
|
|
|
17
17
|
ControlEvent,
|
|
18
18
|
AppEvent,
|
|
19
19
|
RenderEvent,
|
|
20
|
+
RealtimeEvent,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
|
|
@@ -71,6 +72,14 @@ class Dispatcher:
|
|
|
71
72
|
|
|
72
73
|
handled = False
|
|
73
74
|
|
|
75
|
+
# realtime first, if it's a realtime event
|
|
76
|
+
if isinstance(event, RealtimeEvent):
|
|
77
|
+
controller.realtime.handle(event)
|
|
78
|
+
if log_event:
|
|
79
|
+
debug.info(f"[event] Dispatch end: {event.full_name} ({event.call_id})")
|
|
80
|
+
self.call_id += 1
|
|
81
|
+
return [], event
|
|
82
|
+
|
|
74
83
|
# kernel
|
|
75
84
|
if isinstance(event, KernelEvent):
|
|
76
85
|
kernel_auto = (KernelEvent.INIT, KernelEvent.RESTART, KernelEvent.STOP, KernelEvent.TERMINATE)
|
|
@@ -96,20 +105,47 @@ class Dispatcher:
|
|
|
96
105
|
if handled:
|
|
97
106
|
return [], event
|
|
98
107
|
|
|
108
|
+
# realtime
|
|
109
|
+
controller.realtime.handle(event)
|
|
110
|
+
if event.stop:
|
|
111
|
+
if log_event:
|
|
112
|
+
debug.info(f"[event] Skipping... (stopped): {event.name}")
|
|
113
|
+
return [], event
|
|
114
|
+
|
|
99
115
|
# agents
|
|
100
116
|
controller.agent.handle(event)
|
|
117
|
+
if event.stop:
|
|
118
|
+
if log_event:
|
|
119
|
+
debug.info(f"[event] Skipping... (stopped): {event.name}")
|
|
120
|
+
return [], event
|
|
101
121
|
|
|
102
122
|
# ctx
|
|
103
123
|
controller.ctx.handle(event)
|
|
124
|
+
if event.stop:
|
|
125
|
+
if log_event:
|
|
126
|
+
debug.info(f"[event] Skipping... (stopped): {event.name}")
|
|
127
|
+
return [], event
|
|
104
128
|
|
|
105
129
|
# model
|
|
106
130
|
controller.model.handle(event)
|
|
131
|
+
if event.stop:
|
|
132
|
+
if log_event:
|
|
133
|
+
debug.info(f"[event] Skipping... (stopped): {event.name}")
|
|
134
|
+
return [], event
|
|
107
135
|
|
|
108
136
|
# idx
|
|
109
137
|
controller.idx.handle(event)
|
|
138
|
+
if event.stop:
|
|
139
|
+
if log_event:
|
|
140
|
+
debug.info(f"[event] Skipping... (stopped): {event.name}")
|
|
141
|
+
return [], event
|
|
110
142
|
|
|
111
143
|
# ui
|
|
112
144
|
controller.ui.handle(event)
|
|
145
|
+
if event.stop:
|
|
146
|
+
if log_event:
|
|
147
|
+
debug.info(f"[event] Skipping... (stopped): {event.name}")
|
|
148
|
+
return [], event
|
|
113
149
|
|
|
114
150
|
# access
|
|
115
151
|
if isinstance(event, (ControlEvent, AppEvent)):
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date:
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from .base import BaseEvent
|
|
@@ -14,4 +14,5 @@ from .app import AppEvent
|
|
|
14
14
|
from .control import ControlEvent
|
|
15
15
|
from .event import Event
|
|
16
16
|
from .kernel import KernelEvent
|
|
17
|
+
from .realtime import RealtimeEvent
|
|
17
18
|
from .render import RenderEvent
|