pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import numpy as np
13
+ import audioop
14
+
15
+ def qaudio_dtype(sample_format):
16
+ """
17
+ Map QAudioFormat.SampleFormat to numpy dtype.
18
+
19
+ Raises ValueError if the format is unsupported.
20
+
21
+ :param sample_format: QAudioFormat.SampleFormat
22
+ :return: numpy dtype
23
+ """
24
+ try:
25
+ from PySide6.QtMultimedia import QAudioFormat
26
+ except Exception:
27
+ raise
28
+
29
+ if sample_format == QAudioFormat.SampleFormat.UInt8:
30
+ return np.uint8
31
+ elif sample_format == QAudioFormat.SampleFormat.Int16:
32
+ return np.int16
33
+ elif sample_format == QAudioFormat.SampleFormat.Int32:
34
+ return np.int32
35
+ elif sample_format == QAudioFormat.SampleFormat.Float:
36
+ return np.float32
37
+ raise ValueError("Unsupported sample format")
38
+
39
+ def qaudio_norm_factor(sample_format):
40
+ """
41
+ Normalization factor for QAudioFormat.SampleFormat.
42
+
43
+ Raises ValueError if the format is unsupported.
44
+
45
+ :param sample_format: QAudioFormat.SampleFormat
46
+ :return: normalization factor (float)
47
+ """
48
+ try:
49
+ from PySide6.QtMultimedia import QAudioFormat
50
+ except Exception:
51
+ raise
52
+
53
+ if sample_format == QAudioFormat.SampleFormat.UInt8:
54
+ return 255.0
55
+ elif sample_format == QAudioFormat.SampleFormat.Int16:
56
+ return 32768.0
57
+ elif sample_format == QAudioFormat.SampleFormat.Int32:
58
+ return float(2 ** 31)
59
+ elif sample_format == QAudioFormat.SampleFormat.Float:
60
+ return 1.0
61
+ raise ValueError("Unsupported sample format")
62
+
63
+ def qaudio_to_s16le(raw: bytes, sample_format) -> bytes:
64
+ """
65
+ Convert arbitrary QAudioFormat sample format to PCM16 little-endian.
66
+
67
+ :param raw: input byte buffer
68
+ :param sample_format: QAudioFormat.SampleFormat
69
+ :return: converted byte buffer in PCM16 little-endian
70
+ """
71
+ if not raw:
72
+ return b""
73
+ try:
74
+ from PySide6.QtMultimedia import QAudioFormat
75
+ except Exception:
76
+ return raw
77
+
78
+ if sample_format == QAudioFormat.SampleFormat.Int16:
79
+ return raw
80
+ elif sample_format == QAudioFormat.SampleFormat.UInt8:
81
+ arr = np.frombuffer(raw, dtype=np.uint8).astype(np.int16)
82
+ arr = (arr - 128) << 8
83
+ return arr.tobytes()
84
+ elif sample_format == QAudioFormat.SampleFormat.Int32:
85
+ arr = np.frombuffer(raw, dtype=np.int32)
86
+ arr = (arr >> 16).astype(np.int16)
87
+ return arr.tobytes()
88
+ elif sample_format == QAudioFormat.SampleFormat.Float:
89
+ arr = np.frombuffer(raw, dtype=np.float32)
90
+ arr = np.clip(arr, -1.0, 1.0)
91
+ arr = (arr * 32767.0).astype(np.int16)
92
+ return arr.tobytes()
93
+ return raw
94
+
95
+ def pyaudio_to_s16le(raw: bytes, fmt, pa_instance=None) -> bytes:
96
+ """
97
+ Convert PyAudio input buffer to PCM16 little-endian without changing
98
+ sample rate or channel count.
99
+
100
+ :param raw: input byte buffer
101
+ :param fmt: PyAudio format (e.g., pyaudio.paInt16)
102
+ :param pa_instance: Optional PyAudio instance for sample size queries
103
+ :return: converted byte buffer in PCM16 little-endian
104
+ """
105
+ if not raw:
106
+ return b""
107
+ try:
108
+ import pyaudio
109
+ except Exception:
110
+ return raw
111
+
112
+ try:
113
+ if fmt == pyaudio.paInt16:
114
+ return raw
115
+ elif fmt == pyaudio.paUInt8:
116
+ arr = np.frombuffer(raw, dtype=np.uint8).astype(np.int16)
117
+ arr = (arr - 128) << 8
118
+ return arr.tobytes()
119
+ elif fmt == pyaudio.paInt8:
120
+ arr = np.frombuffer(raw, dtype=np.int8).astype(np.int16)
121
+ arr = (arr.astype(np.int16) << 8)
122
+ return arr.tobytes()
123
+ elif fmt == pyaudio.paFloat32:
124
+ arr = np.frombuffer(raw, dtype=np.float32)
125
+ arr = np.clip(arr, -1.0, 1.0)
126
+ arr = (arr * 32767.0).astype(np.int16)
127
+ return arr.tobytes()
128
+ else:
129
+ try:
130
+ sw = pa_instance.get_sample_size(fmt) if pa_instance is not None else 2
131
+ return audioop.lin2lin(raw, sw, 2)
132
+ except Exception:
133
+ return raw
134
+ except Exception:
135
+ return raw
136
+
137
+ def f32_to_s16le(raw: bytes) -> bytes:
138
+ """
139
+ Convert float32 little-endian PCM to int16 little-endian PCM.
140
+
141
+ :param raw: input byte buffer in float32
142
+ :return: converted byte buffer in int16
143
+ """
144
+ if not raw:
145
+ return b""
146
+ try:
147
+ arr = np.frombuffer(raw, dtype=np.float32)
148
+ arr = np.clip(arr, -1.0, 1.0)
149
+ s16 = (arr * 32767.0).astype(np.int16)
150
+ return s16.tobytes()
151
+ except Exception:
152
+ return b""
153
+
154
+ def convert_s16_pcm(
155
+ data: bytes,
156
+ in_rate: int,
157
+ in_channels: int,
158
+ out_rate: int,
159
+ out_channels: int,
160
+ out_width: int = 2,
161
+ out_format: str = "s16" # "s16" | "u8" | "f32"
162
+ ) -> bytes:
163
+ """
164
+ Minimal PCM converter to target format:
165
+ - assumes input is S16LE,
166
+ - converts channels (mono<->stereo) and sample rate,
167
+ - converts width if needed,
168
+ - applies bias for u8 or float conversion if requested.
169
+
170
+ :param data: input byte buffer in S16LE
171
+ :param in_rate: input sample rate
172
+ :param in_channels: input channel count
173
+ :param out_rate: output sample rate
174
+ :param out_channels: output channel count
175
+ :param out_width: output sample width in bytes (1, 2, or 4)
176
+ :param out_format: output format ("s16", "u8", or "f32")
177
+ :return: converted byte buffer
178
+ """
179
+ if not data:
180
+ return b""
181
+ try:
182
+ src = data
183
+
184
+ # channels
185
+ if in_channels != out_channels:
186
+ if in_channels == 2 and out_channels == 1:
187
+ src = audioop.tomono(src, 2, 0.5, 0.5)
188
+ elif in_channels == 1 and out_channels == 2:
189
+ src = audioop.tostereo(src, 2, 1.0, 1.0)
190
+ else:
191
+ mid = audioop.tomono(src, 2, 0.5, 0.5) if in_channels > 1 else src
192
+ src = audioop.tostereo(mid, 2, 1.0, 1.0) if out_channels == 2 else mid
193
+
194
+ # sample rate
195
+ if in_rate != out_rate:
196
+ src, _ = audioop.ratecv(src, 2, out_channels, in_rate, out_rate, None)
197
+
198
+ # sample width (Int16 -> other widths if needed)
199
+ if out_width != 2:
200
+ src = audioop.lin2lin(src, 2, out_width)
201
+
202
+ # sample format nuances
203
+ if out_format == "u8" and out_width == 1:
204
+ src = audioop.bias(src, 1, 128) # center at 0x80
205
+ elif out_format == "f32" and out_width == 4:
206
+ arr = np.frombuffer(src, dtype=np.int16).astype(np.float32) / 32768.0
207
+ src = arr.tobytes()
208
+
209
+ return src
210
+ except Exception:
211
+ return data
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 04:00:00 #
10
+ # ================================================== #
11
+
12
+ import numpy as np
13
+ from pydub import AudioSegment
14
+
15
+ def compute_envelope_from_file(audio_file: str, chunk_ms: int = 100) -> list:
16
+ """
17
+ Calculate the volume envelope of an audio file (0-100 per chunk).
18
+
19
+ :param audio_file: Path to the audio file
20
+ :param chunk_ms: Chunk size in milliseconds
21
+ :return: List of volume levels (0-100) per chunk
22
+ """
23
+ audio = AudioSegment.from_file(audio_file)
24
+ max_amplitude = 32767.0
25
+ envelope = []
26
+
27
+ for ms in range(0, len(audio), chunk_ms):
28
+ chunk = audio[ms:ms + chunk_ms]
29
+ rms = float(chunk.rms) if chunk.rms else 0.0
30
+ if rms > 0.0:
31
+ db = 20.0 * np.log10(max(1e-12, rms / max_amplitude))
32
+ else:
33
+ db = -60.0
34
+ db = max(-60.0, min(0.0, db))
35
+ volume = ((db + 60.0) / 60.0) * 100.0
36
+ envelope.append(volume)
37
+
38
+ return envelope
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 04:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Optional, Callable
13
+
14
+ import os
15
+ from PySide6.QtCore import QObject, QTimer, QUrl
16
+ from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput
17
+
18
+ from ..shared import compute_envelope_from_file
19
+
20
+ class NativePlayer(QObject):
21
+ """
22
+ Thin wrapper around QtMultimedia audio playback with level metering.
23
+ """
24
+ def __init__(self, window=None, chunk_ms: int = 10):
25
+ super().__init__(window)
26
+ self.window = window
27
+ self.chunk_ms = int(chunk_ms)
28
+ self.audio_output: Optional[QAudioOutput] = None
29
+ self.player: Optional[QMediaPlayer] = None
30
+ self.playback_timer: Optional[QTimer] = None
31
+ self.volume_timer: Optional[QTimer] = None
32
+ self.envelope = []
33
+
34
+ def stop_timers(self):
35
+ """Stop playback timers."""
36
+ if self.playback_timer is not None:
37
+ self.playback_timer.stop()
38
+ self.playback_timer = None
39
+ if self.volume_timer is not None:
40
+ self.volume_timer.stop()
41
+ self.volume_timer = None
42
+
43
+ def stop(self, signals=None):
44
+ """
45
+ Stop playback and timers.
46
+
47
+ :param signals: Signals to emit on stop
48
+ """
49
+ if self.player is not None:
50
+ try:
51
+ self.player.stop()
52
+ except Exception:
53
+ pass
54
+ self.stop_timers()
55
+ if signals is not None:
56
+ try:
57
+ signals.volume_changed.emit(0)
58
+ except Exception:
59
+ pass
60
+
61
+ def update_volume(self, signals=None):
62
+ """
63
+ Update the volume based on the current position in the audio file.
64
+
65
+ :param signals: Signals to emit volume changes
66
+ """
67
+ if not self.player:
68
+ return
69
+ pos = self.player.position()
70
+ index = int(pos / self.chunk_ms)
71
+ volume = self.envelope[index] if index < len(self.envelope) else 0
72
+ if signals is not None:
73
+ signals.volume_changed.emit(volume)
74
+
75
+ def play_after(
76
+ self,
77
+ audio_file: str,
78
+ event_name: str,
79
+ stopped: Callable[[], bool],
80
+ signals=None,
81
+ auto_convert_to_wav: bool = False,
82
+ select_output_device: Optional[Callable[[], object]] = None,
83
+ ):
84
+ """
85
+ Start audio playback using QtMultimedia with periodic volume updates.
86
+
87
+ :param audio_file: Path to audio file
88
+ :param event_name: Event name to emit on playback start
89
+ :param stopped: Callable returning True when playback should stop
90
+ :param signals: Signals to emit on playback
91
+ :param auto_convert_to_wav: auto convert mp3 to wav if True
92
+ :param select_output_device: callable returning QAudioDevice for output
93
+ """
94
+ self.audio_output = QAudioOutput()
95
+ self.audio_output.setVolume(1.0)
96
+
97
+ if callable(select_output_device):
98
+ try:
99
+ self.audio_output.setDevice(select_output_device())
100
+ except Exception:
101
+ pass
102
+
103
+ if auto_convert_to_wav and audio_file.lower().endswith('.mp3'):
104
+ tmp_dir = self.window.core.audio.get_cache_dir()
105
+ base_name = os.path.splitext(os.path.basename(audio_file))[0]
106
+ dst_file = os.path.join(tmp_dir, "_" + base_name + ".wav")
107
+ wav_file = self.window.core.audio.mp3_to_wav(audio_file, dst_file)
108
+ if wav_file:
109
+ audio_file = wav_file
110
+
111
+ def check_stop():
112
+ if stopped():
113
+ self.stop(signals=signals)
114
+ else:
115
+ if self.player:
116
+ if self.player.playbackState() == QMediaPlayer.StoppedState:
117
+ self.stop(signals=signals)
118
+
119
+ self.envelope = compute_envelope_from_file(audio_file, chunk_ms=self.chunk_ms)
120
+ self.player = QMediaPlayer()
121
+ self.player.setAudioOutput(self.audio_output)
122
+ self.player.setSource(QUrl.fromLocalFile(audio_file))
123
+ self.player.play()
124
+
125
+ self.playback_timer = QTimer()
126
+ self.playback_timer.setInterval(100)
127
+ self.playback_timer.timeout.connect(check_stop)
128
+
129
+ self.volume_timer = QTimer(self)
130
+ self.volume_timer.setInterval(10)
131
+ self.volume_timer.timeout.connect(lambda: self.update_volume(signals))
132
+
133
+ self.playback_timer.start()
134
+ self.volume_timer.start()
135
+ if signals is not None:
136
+ signals.volume_changed.emit(0)
137
+ signals.playback.emit(event_name)
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from pygpt_net.core.events import RealtimeEvent
13
+
14
+ def build_rt_input_delta_event(
15
+ rate: int,
16
+ channels: int,
17
+ data: bytes,
18
+ final: bool
19
+ ) -> RealtimeEvent:
20
+ """
21
+ Build RT_INPUT_AUDIO_DELTA event with a provider-agnostic payload.
22
+
23
+ :param rate: Sample rate (e.g., 16000)
24
+ :param channels: Number of channels (1 for mono, 2 for stereo)
25
+ :param data: Audio data bytes
26
+ :param final: Whether this is the final chunk
27
+ :return: RealtimeEvent instance
28
+ """
29
+ return RealtimeEvent(
30
+ RealtimeEvent.RT_INPUT_AUDIO_DELTA,
31
+ {
32
+ "payload": {
33
+ "data": data or b"",
34
+ "mime": "audio/pcm",
35
+ "rate": int(rate),
36
+ "channels": int(channels),
37
+ "final": bool(final),
38
+ }
39
+ }
40
+ )
41
+
42
+ def build_output_volume_event(value: int) -> RealtimeEvent:
43
+ """
44
+ Build RT_OUTPUT_AUDIO_VOLUME_CHANGED event.
45
+
46
+ :param value: Volume level (0-100)
47
+ :return: RealtimeEvent instance
48
+ """
49
+ return RealtimeEvent(
50
+ RealtimeEvent.RT_OUTPUT_AUDIO_VOLUME_CHANGED,
51
+ {"volume": int(value)}
52
+ )
@@ -42,6 +42,11 @@ class Capture:
42
42
  backend = "native"
43
43
  return self.backends[backend]
44
44
 
45
+ def setup(self):
46
+ """Setup audio input backend"""
47
+ for b in self.backends.values():
48
+ b.set_rt_signals(self.window.controller.realtime.signals)
49
+
45
50
  def get_default_input_device(self) -> Tuple[int, str]:
46
51
  """
47
52
  Get default input device
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.07 03:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import List, Tuple
@@ -42,6 +42,10 @@ class Output:
42
42
  backend = "native"
43
43
  return self.backends[backend]
44
44
 
45
+ def setup(self):
46
+ """Setup audio output backend"""
47
+ pass
48
+
45
49
  def play(
46
50
  self,
47
51
  audio_file: str,
@@ -88,4 +92,11 @@ class Output:
88
92
 
89
93
  :return: (id, name)
90
94
  """
91
- return self.get_backend().get_default_output_device()
95
+ return self.get_backend().get_default_output_device()
96
+
97
+ def handle_realtime(self, payload, signals):
98
+ """
99
+ Handle real-time audio playback
100
+ """
101
+ #self.get_backend().set_signals(signals)
102
+ self.get_backend().handle_realtime(payload)
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.12.14 18:00:00 #
9
+ # Updated Date: 2025.08.29 18:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import List
@@ -21,10 +21,14 @@ class Whisper:
21
21
  self.window = window
22
22
  self.voices = [
23
23
  "alloy",
24
+ "ash",
25
+ "ballad",
26
+ "coral",
24
27
  "echo",
25
28
  "fable",
26
- "onyx",
27
29
  "nova",
30
+ "onyx",
31
+ "sage",
28
32
  "shimmer",
29
33
  ]
30
34
 
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.15 23:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import time
@@ -59,7 +59,7 @@ class Bridge:
59
59
  if self.window.controller.kernel.stopped():
60
60
  return False
61
61
 
62
- allowed_model_change = MODE_VISION
62
+ allowed_model_change = [MODE_CHAT]
63
63
  is_virtual = False
64
64
  force_sync = False
65
65
 
@@ -254,7 +254,7 @@ class Bridge:
254
254
  context.mode = MODE_RESEARCH
255
255
 
256
256
  # default: OpenAI API call
257
- return self.window.core.gpt.quick_call(
257
+ return self.window.core.api.openai.quick_call(
258
258
  context=context,
259
259
  extra=extra,
260
260
  )
@@ -268,6 +268,7 @@ class Bridge:
268
268
  worker = BridgeWorker()
269
269
  worker.window = self.window
270
270
  worker.signals.response.connect(self.window.controller.kernel.listener)
271
+ worker.rt_signals = self.window.controller.realtime.signals # Realtime signals
271
272
  return worker
272
273
 
273
274
  def apply_rate_limit(self):
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.11 14:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from PySide6.QtCore import QObject, Signal, QRunnable, Slot
@@ -17,6 +17,8 @@ from pygpt_net.core.types import (
17
17
  MODE_LANGCHAIN,
18
18
  MODE_LLAMA_INDEX,
19
19
  MODE_ASSISTANT,
20
+ MODE_VISION,
21
+ MODE_LOOP_NEXT,
20
22
  )
21
23
  from pygpt_net.core.events import KernelEvent, Event
22
24
 
@@ -29,8 +31,9 @@ class BridgeSignals(QObject):
29
31
  class BridgeWorker(QRunnable):
30
32
  """Bridge worker"""
31
33
  def __init__(self, *args, **kwargs):
32
- QRunnable.__init__(self)
34
+ super().__init__()
33
35
  self.signals = BridgeSignals()
36
+ self.rt_signals = None
34
37
  self.args = args
35
38
  self.kwargs = kwargs
36
39
  self.window = None
@@ -64,6 +67,8 @@ class BridgeWorker(QRunnable):
64
67
  extra=self.extra,
65
68
  )
66
69
  """
70
+ elif self.mode == MODE_VISION:
71
+ raise Exception("Vision mode is deprecated from v2.6.30 and integrated into Chat. ")
67
72
 
68
73
  # LlamaIndex: chat with files
69
74
  if self.mode == MODE_LLAMA_INDEX:
@@ -87,7 +92,7 @@ class BridgeWorker(QRunnable):
87
92
  self.extra["error"] = str(self.window.core.agents.runner.get_error())
88
93
 
89
94
  # Loop: next step
90
- elif self.mode == "loop_next": # virtual mode
95
+ elif self.mode == MODE_LOOP_NEXT: # virtual mode
91
96
  result = self.window.core.agents.runner.loop.run_next(
92
97
  context=self.context,
93
98
  extra=self.extra,
@@ -98,12 +103,29 @@ class BridgeWorker(QRunnable):
98
103
  else:
99
104
  self.extra["error"] = str(self.window.core.agents.runner.get_error())
100
105
 
101
- # API OpenAI: chat, completion, vision, image, assistants
106
+ # API SDK: chat, completion, vision, image, assistants
102
107
  else:
103
- result = self.window.core.gpt.call(
104
- context=self.context,
105
- extra=self.extra,
106
- )
108
+ sdk = "openai"
109
+ model = self.context.model
110
+ if model.provider == "google":
111
+ if self.window.core.config.get("api_native_google", False):
112
+ sdk = "google"
113
+
114
+ # call appropriate SDK
115
+ if sdk == "google":
116
+ # print("Using Google SDK")
117
+ result = self.window.core.api.google.call(
118
+ context=self.context,
119
+ extra=self.extra,
120
+ rt_signals=self.rt_signals,
121
+ )
122
+ elif sdk == "openai":
123
+ # print("Using OpenAI SDK")
124
+ result = self.window.core.api.openai.call(
125
+ context=self.context,
126
+ extra=self.extra,
127
+ rt_signals=self.rt_signals,
128
+ )
107
129
  except Exception as e:
108
130
  if self.signals:
109
131
  self.extra["error"] = e
@@ -170,7 +192,7 @@ class BridgeWorker(QRunnable):
170
192
  ad_context = self.window.controller.chat.attachment.get_context(ctx, self.context.history)
171
193
  ad_mode = self.window.controller.chat.attachment.get_mode()
172
194
  if ad_context:
173
- self.context.prompt += "\n\n" + ad_context # append to input text
195
+ self.context.prompt += f"\n\n{ad_context}" # append to input text
174
196
  if (ad_mode == self.window.controller.chat.attachment.MODE_QUERY_CONTEXT
175
197
  or self.mode in [MODE_AGENT_LLAMA, MODE_AGENT_OPENAI]):
176
198
  ctx.hidden_input = ad_context # store for future use, only if query context
@@ -69,8 +69,8 @@ class Console:
69
69
  elif msg.lower() == "mpkfa":
70
70
  self.log("GOD MODE ACTIVATED ;)")
71
71
  elif msg == "oclr":
72
- if self.window.core.gpt.client:
73
- self.window.core.gpt.client.close()
72
+ if self.window.core.api.openai.client:
73
+ self.window.core.api.openai.client.close()
74
74
  self.log("OpenAI client closed")
75
75
  else:
76
76
  self.log("OpenAI client not initialized")
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.07.30 00:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
@@ -62,7 +62,7 @@ class PresetsDebug:
62
62
  MODE_CHAT: preset.chat,
63
63
  MODE_COMPLETION: preset.completion,
64
64
  MODE_IMAGE: preset.img,
65
- MODE_VISION: preset.vision,
65
+ # MODE_VISION: preset.vision,
66
66
  # MODE_LANGCHAIN: preset.langchain,
67
67
  MODE_ASSISTANT: preset.assistant,
68
68
  MODE_LLAMA_INDEX: preset.llama_index,