pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +8 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +29 -3
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/data/config/config.json +10 -4
- pygpt_net/data/config/models.json +149 -103
- pygpt_net/data/config/settings.json +50 -0
- pygpt_net/data/locale/locale.de.ini +5 -5
- pygpt_net/data/locale/locale.en.ini +19 -13
- pygpt_net/data/locale/locale.es.ini +5 -5
- pygpt_net/data/locale/locale.fr.ini +5 -5
- pygpt_net/data/locale/locale.it.ini +5 -5
- pygpt_net/data/locale/locale.pl.ini +5 -5
- pygpt_net/data/locale/locale.uk.ini +5 -5
- pygpt_net/data/locale/locale.zh.ini +1 -1
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +39 -6
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +15 -0
- pygpt_net/provider/core/model/patch.py +11 -0
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from .base import BaseEvent
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RealtimeEvent(BaseEvent):
|
|
18
|
+
"""
|
|
19
|
+
Realtime events
|
|
20
|
+
|
|
21
|
+
- RT_OUTPUT_AUDIO_DELTA - audio output chunk (with payload)
|
|
22
|
+
- RT_OUTPUT_READY - audio output is ready (STREAM_BEGIN)
|
|
23
|
+
- RT_OUTPUT_TEXT_DELTA - text chunk (delta)
|
|
24
|
+
- RT_OUTPUT_AUDIO_END - audio output ended (STREAM_END)
|
|
25
|
+
- RT_OUTPUT_TURN_END - audio output turn ended (TURN_END)
|
|
26
|
+
- RT_OUTPUT_AUDIO_ERROR - audio output error (STREAM_ERROR)
|
|
27
|
+
- RT_OUTPUT_AUDIO_VOLUME_CHANGED - audio output volume changed (volume level)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
# realtime events
|
|
31
|
+
RT_OUTPUT_AUDIO_DELTA = "rt.output.audio.delta"
|
|
32
|
+
RT_OUTPUT_AUDIO_END = "rt.output.audio.end"
|
|
33
|
+
RT_OUTPUT_AUDIO_ERROR = "rt.output.audio.error"
|
|
34
|
+
RT_OUTPUT_AUDIO_VOLUME_CHANGED = "rt.output.audio.volume.changed"
|
|
35
|
+
RT_OUTPUT_AUDIO_COMMIT = "rt.output.audio.commit"
|
|
36
|
+
RT_OUTPUT_READY = "rt.output.audio.ready"
|
|
37
|
+
RT_OUTPUT_TEXT_DELTA = "rt.output.text.delta"
|
|
38
|
+
RT_OUTPUT_TURN_END = "rt.output.turn.end"
|
|
39
|
+
RT_INPUT_AUDIO_DELTA = "rt.input.audio.delta"
|
|
40
|
+
RT_INPUT_AUDIO_MANUAL_START = "rt.input.audio.manual.start"
|
|
41
|
+
RT_INPUT_AUDIO_MANUAL_STOP = "rt.input.audio.manual.stop"
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
name: Optional[str] = None,
|
|
46
|
+
data: Optional[dict] = None,
|
|
47
|
+
):
|
|
48
|
+
"""
|
|
49
|
+
Event object class
|
|
50
|
+
|
|
51
|
+
:param name: event name
|
|
52
|
+
:param data: event data
|
|
53
|
+
"""
|
|
54
|
+
super(RealtimeEvent, self).__init__(name, data)
|
|
55
|
+
self.id = "RealtimeEvent"
|
pygpt_net/core/image/image.py
CHANGED
|
@@ -12,10 +12,11 @@
|
|
|
12
12
|
import os
|
|
13
13
|
import uuid
|
|
14
14
|
from time import strftime
|
|
15
|
-
from typing import List
|
|
15
|
+
from typing import List, Dict
|
|
16
16
|
|
|
17
17
|
from PySide6.QtCore import Slot, QObject
|
|
18
18
|
|
|
19
|
+
from pygpt_net.core.types import IMAGE_AVAILABLE_RESOLUTIONS
|
|
19
20
|
from pygpt_net.item.ctx import CtxItem
|
|
20
21
|
from pygpt_net.utils import trans
|
|
21
22
|
|
|
@@ -141,3 +142,52 @@ class Image(QObject):
|
|
|
141
142
|
img_dir = self.window.core.config.get_user_dir("img")
|
|
142
143
|
filename = f"{dt_prefix}_{img_id}.png"
|
|
143
144
|
return os.path.join(img_dir, filename)
|
|
145
|
+
|
|
146
|
+
def get_resolution_option(self) -> dict:
|
|
147
|
+
"""
|
|
148
|
+
Get image resolution option for UI
|
|
149
|
+
|
|
150
|
+
:return: dict
|
|
151
|
+
"""
|
|
152
|
+
return {
|
|
153
|
+
"type": "combo",
|
|
154
|
+
"slider": True,
|
|
155
|
+
"label": "img_resolution",
|
|
156
|
+
"value": "1024x1024",
|
|
157
|
+
"keys": self.get_available_resolutions(),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
def get_available_resolutions(self, model: str = None) -> Dict[str, str]:
|
|
161
|
+
"""
|
|
162
|
+
Get available image resolutions
|
|
163
|
+
|
|
164
|
+
:param model: model name
|
|
165
|
+
:return: dict of available resolutions
|
|
166
|
+
"""
|
|
167
|
+
available = IMAGE_AVAILABLE_RESOLUTIONS
|
|
168
|
+
model_keys = available.keys()
|
|
169
|
+
# find by model if specified
|
|
170
|
+
if model:
|
|
171
|
+
model = self._normalize_model_name(model)
|
|
172
|
+
for key in model_keys:
|
|
173
|
+
if model.startswith(key):
|
|
174
|
+
return available[key]
|
|
175
|
+
|
|
176
|
+
# return all available resolutions, but unique only
|
|
177
|
+
resolutions = {}
|
|
178
|
+
for key in model_keys:
|
|
179
|
+
resolutions.update(available[key])
|
|
180
|
+
return resolutions
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _normalize_model_name(self, model: str) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Normalize model id (strip optional 'models/' prefix).
|
|
186
|
+
|
|
187
|
+
:param model: model id
|
|
188
|
+
"""
|
|
189
|
+
try:
|
|
190
|
+
return model.split("/")[-1]
|
|
191
|
+
except Exception:
|
|
192
|
+
return model
|
|
193
|
+
|
|
File without changes
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class RealtimeOptions:
|
|
17
|
+
"""
|
|
18
|
+
Options for starting a realtime session.
|
|
19
|
+
|
|
20
|
+
:param provider: Provider name ("google" or "openai")
|
|
21
|
+
:param model: Model name
|
|
22
|
+
:param system_prompt: System prompt text
|
|
23
|
+
:param prompt: User prompt text
|
|
24
|
+
:param voice: Voice name for TTS
|
|
25
|
+
:param audio_data: Optional input audio data (bytes)
|
|
26
|
+
:param audio_format: Format of the input audio (e.g., "pcm16", "wav")
|
|
27
|
+
:param audio_rate: Sample rate of the input audio (e.g., 16000)
|
|
28
|
+
:param vad: Voice Activity Detection mode (e.g., "server_vad" or None for manual)
|
|
29
|
+
:param extra: Free-form dictionary for extra parameters
|
|
30
|
+
:param rt_signals: Real-time signals for event handling
|
|
31
|
+
"""
|
|
32
|
+
provider: str = "openai" # "google" | "openai"
|
|
33
|
+
model: Optional[str] = None
|
|
34
|
+
system_prompt: Optional[str] = None
|
|
35
|
+
prompt: Optional[str] = None
|
|
36
|
+
voice: Optional[str] = None
|
|
37
|
+
|
|
38
|
+
# Optional input audio
|
|
39
|
+
audio_data: Optional[bytes] = None
|
|
40
|
+
audio_format: Optional[str] = None # e.g., "pcm16", "wav"
|
|
41
|
+
audio_rate: Optional[int] = None # e.g., 16000
|
|
42
|
+
|
|
43
|
+
# Provider-specific VAD flag (use None for manual mode)
|
|
44
|
+
vad: Optional[str] = None # e.g., "server_vad"
|
|
45
|
+
|
|
46
|
+
vad_end_silence_ms: Optional[int] = 2000 # VAD end silence in ms
|
|
47
|
+
vad_prefix_padding_ms: Optional[int] = 300 # VAD prefix padding in ms
|
|
48
|
+
|
|
49
|
+
# Real-time signals
|
|
50
|
+
rt_signals: field() = None # RT signals
|
|
51
|
+
|
|
52
|
+
# Tools and remote tools
|
|
53
|
+
tools: Optional[list] = None
|
|
54
|
+
remote_tools: Optional[list] = None
|
|
55
|
+
|
|
56
|
+
# Auto-turn enable/disable
|
|
57
|
+
auto_turn: Optional[bool] = False
|
|
58
|
+
|
|
59
|
+
# Transcript enable/disable
|
|
60
|
+
transcribe: Optional[bool] = True
|
|
61
|
+
|
|
62
|
+
# Last session ID
|
|
63
|
+
rt_session_id: Optional[str] = None
|
|
64
|
+
|
|
65
|
+
# Extra parameters
|
|
66
|
+
extra: dict = field(default_factory=dict)
|
|
67
|
+
|
|
68
|
+
def to_dict(self):
|
|
69
|
+
return {
|
|
70
|
+
"provider": self.provider,
|
|
71
|
+
"model": self.model,
|
|
72
|
+
"system_prompt": self.system_prompt,
|
|
73
|
+
"prompt": self.prompt,
|
|
74
|
+
"voice": self.voice,
|
|
75
|
+
"audio_data (len)": len(self.audio_data) if self.audio_data else 0,
|
|
76
|
+
"audio_format": self.audio_format,
|
|
77
|
+
"audio_rate": self.audio_rate,
|
|
78
|
+
"vad": self.vad,
|
|
79
|
+
"vad_end_silence_ms": self.vad_end_silence_ms,
|
|
80
|
+
"vad_prefix_padding_ms": self.vad_prefix_padding_ms,
|
|
81
|
+
"tools": self.tools,
|
|
82
|
+
"remote_tools": self.remote_tools,
|
|
83
|
+
"auto_turn": self.auto_turn,
|
|
84
|
+
"transcribe": self.transcribe,
|
|
85
|
+
"rt_session_id": self.rt_session_id,
|
|
86
|
+
"extra": self.extra,
|
|
87
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import io
|
|
13
|
+
import math
|
|
14
|
+
import os
|
|
15
|
+
import wave
|
|
16
|
+
import audioop
|
|
17
|
+
from array import array
|
|
18
|
+
import struct
|
|
19
|
+
from typing import Optional, Tuple, List
|
|
20
|
+
|
|
21
|
+
DEFAULT_24K = 24000
|
|
22
|
+
|
|
23
|
+
def coerce_to_pcm16_mono(data: bytes, fmt: Optional[str], rate_hint: Optional[int], fallback_rate: int = DEFAULT_24K) -> Tuple[int, int, bytes]:
|
|
24
|
+
"""
|
|
25
|
+
Convert input audio (PCM16 raw or WAV) to PCM16 mono bytes. Float WAV is treated as raw (best effort).
|
|
26
|
+
Returns (sample_rate, channels=1, pcm16_bytes).
|
|
27
|
+
"""
|
|
28
|
+
if not data:
|
|
29
|
+
return fallback_rate, 1, b""
|
|
30
|
+
fmt = (fmt or "").lower().strip()
|
|
31
|
+
if fmt in ("pcm16", "pcm", "raw"):
|
|
32
|
+
sr = int(rate_hint) if rate_hint else fallback_rate
|
|
33
|
+
return sr, 1, data
|
|
34
|
+
|
|
35
|
+
# WAV path
|
|
36
|
+
try:
|
|
37
|
+
with wave.open(io.BytesIO(data), "rb") as wf:
|
|
38
|
+
sr = wf.getframerate() or fallback_rate
|
|
39
|
+
ch = wf.getnchannels() or 1
|
|
40
|
+
sw = wf.getsampwidth() or 2
|
|
41
|
+
frames = wf.readframes(wf.getnframes())
|
|
42
|
+
|
|
43
|
+
if sw != 2:
|
|
44
|
+
frames = audioop.lin2lin(frames, sw, 2)
|
|
45
|
+
if ch == 2:
|
|
46
|
+
frames = audioop.tomono(frames, 2, 0.5, 0.5)
|
|
47
|
+
elif ch != 1:
|
|
48
|
+
frames = audioop.tomono(frames, 2, 1.0, 0.0)
|
|
49
|
+
|
|
50
|
+
return sr, 1, frames
|
|
51
|
+
except Exception:
|
|
52
|
+
sr = int(rate_hint) if rate_hint else fallback_rate
|
|
53
|
+
return sr, 1, data
|
|
54
|
+
|
|
55
|
+
def float32_to_int16_bytes(b: bytes) -> bytes:
|
|
56
|
+
"""Convert little-endian float32 PCM [-1.0, 1.0] to int16 PCM."""
|
|
57
|
+
if not b:
|
|
58
|
+
return b""
|
|
59
|
+
try:
|
|
60
|
+
arr = array("f")
|
|
61
|
+
arr.frombytes(b)
|
|
62
|
+
if struct.unpack('<I', struct.pack('=I', 1))[0] != 1: # fallback if non-little
|
|
63
|
+
arr.byteswap()
|
|
64
|
+
out = array("h", (max(-32768, min(32767, int(round(x * 32767.0)))) for x in arr))
|
|
65
|
+
return out.tobytes()
|
|
66
|
+
except Exception:
|
|
67
|
+
try:
|
|
68
|
+
n = len(b) // 4
|
|
69
|
+
vals = struct.unpack("<" + "f" * n, b[: n * 4])
|
|
70
|
+
out = array("h", (max(-32768, min(32767, int(round(x * 32767.0)))) for x in vals))
|
|
71
|
+
return out.tobytes()
|
|
72
|
+
except Exception:
|
|
73
|
+
return b""
|
|
74
|
+
|
|
75
|
+
def parse_wav_fmt(data: bytes) -> Optional[dict]:
|
|
76
|
+
"""Minimal WAV fmt chunk parser to detect float/int format."""
|
|
77
|
+
try:
|
|
78
|
+
if len(data) < 12 or data[0:4] != b"RIFF" or data[8:12] != b"WAVE":
|
|
79
|
+
return None
|
|
80
|
+
p = 12
|
|
81
|
+
while p + 8 <= len(data):
|
|
82
|
+
cid = data[p:p+4]
|
|
83
|
+
sz = int.from_bytes(data[p+4:p+8], "little", signed=False)
|
|
84
|
+
p += 8
|
|
85
|
+
if cid == b"fmt ":
|
|
86
|
+
fmtb = data[p:p+sz]
|
|
87
|
+
if len(fmtb) < 16:
|
|
88
|
+
return None
|
|
89
|
+
format_tag = int.from_bytes(fmtb[0:2], "little")
|
|
90
|
+
channels = int.from_bytes(fmtb[2:4], "little")
|
|
91
|
+
sample_rate = int.from_bytes(fmtb[4:8], "little")
|
|
92
|
+
bits_per_sample = int.from_bytes(fmtb[14:16], "little")
|
|
93
|
+
sub_tag = None
|
|
94
|
+
if format_tag == 65534 and sz >= 40: # WAVE_FORMAT_EXTENSIBLE
|
|
95
|
+
sub_tag = int.from_bytes(fmtb[24:26], "little", signed=False)
|
|
96
|
+
return {
|
|
97
|
+
"format_tag": format_tag,
|
|
98
|
+
"channels": channels,
|
|
99
|
+
"sample_rate": sample_rate,
|
|
100
|
+
"bits_per_sample": bits_per_sample,
|
|
101
|
+
"subformat_tag": sub_tag,
|
|
102
|
+
}
|
|
103
|
+
p += (sz + 1) & ~1
|
|
104
|
+
return None
|
|
105
|
+
except Exception:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def to_pcm16_mono(data: bytes, fmt: Optional[str], rate_hint: Optional[int], target_rate: int) -> Tuple[bytes, int]:
|
|
109
|
+
"""
|
|
110
|
+
Normalize any input audio (RAW/WAV, int/float) to PCM16 mono at target_rate.
|
|
111
|
+
Returns (pcm16_bytes, target_rate).
|
|
112
|
+
"""
|
|
113
|
+
if not data:
|
|
114
|
+
return b"", target_rate
|
|
115
|
+
|
|
116
|
+
fmt = (fmt or "").lower().strip()
|
|
117
|
+
if fmt in ("pcm16", "pcm", "raw"):
|
|
118
|
+
src_rate = int(rate_hint) if rate_hint else target_rate
|
|
119
|
+
pcm16 = data
|
|
120
|
+
if src_rate != target_rate:
|
|
121
|
+
try:
|
|
122
|
+
pcm16, _ = audioop.ratecv(pcm16, 2, 1, src_rate, target_rate, None)
|
|
123
|
+
except Exception:
|
|
124
|
+
return b"", target_rate
|
|
125
|
+
return pcm16, target_rate
|
|
126
|
+
|
|
127
|
+
# WAV path with float support
|
|
128
|
+
try:
|
|
129
|
+
fmt_info = parse_wav_fmt(data)
|
|
130
|
+
with wave.open(io.BytesIO(data), "rb") as wf:
|
|
131
|
+
sr = wf.getframerate() or target_rate
|
|
132
|
+
ch = wf.getnchannels() or 1
|
|
133
|
+
sw = wf.getsampwidth() or 2
|
|
134
|
+
frames = wf.readframes(wf.getnframes())
|
|
135
|
+
|
|
136
|
+
format_tag = (fmt_info or {}).get("format_tag", 1)
|
|
137
|
+
bits_per_sample = (fmt_info or {}).get("bits_per_sample", sw * 8)
|
|
138
|
+
|
|
139
|
+
# float32 -> int16
|
|
140
|
+
if format_tag == 3 or ((format_tag == 65534) and (fmt_info or {}).get("subformat_tag") == 3):
|
|
141
|
+
frames16 = float32_to_int16_bytes(frames)
|
|
142
|
+
else:
|
|
143
|
+
if sw != 2:
|
|
144
|
+
frames16 = audioop.lin2lin(frames, sw, 2)
|
|
145
|
+
else:
|
|
146
|
+
frames16 = frames
|
|
147
|
+
|
|
148
|
+
# mixdown to mono
|
|
149
|
+
if ch == 2:
|
|
150
|
+
try:
|
|
151
|
+
frames16 = audioop.tomono(frames16, 2, 0.5, 0.5)
|
|
152
|
+
except Exception:
|
|
153
|
+
frames16 = frames16[0::2] + b""
|
|
154
|
+
elif ch != 1:
|
|
155
|
+
try:
|
|
156
|
+
frames16 = audioop.tomono(frames16, 2, 1.0, 0.0)
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
# resample
|
|
161
|
+
if sr != target_rate:
|
|
162
|
+
try:
|
|
163
|
+
frames16, _ = audioop.ratecv(frames16, 2, 1, sr, target_rate, None)
|
|
164
|
+
except Exception:
|
|
165
|
+
return b"", target_rate
|
|
166
|
+
|
|
167
|
+
return frames16, target_rate
|
|
168
|
+
except Exception:
|
|
169
|
+
return b"", target_rate
|
|
170
|
+
|
|
171
|
+
def resample_pcm16_mono(pcm: bytes, src_rate: int, dst_rate: int) -> bytes:
|
|
172
|
+
if src_rate == dst_rate or not pcm:
|
|
173
|
+
return pcm
|
|
174
|
+
try:
|
|
175
|
+
out, _ = audioop.ratecv(pcm, 2, 1, src_rate, dst_rate, None)
|
|
176
|
+
return out
|
|
177
|
+
except Exception:
|
|
178
|
+
return pcm
|
|
179
|
+
|
|
180
|
+
def iter_pcm_chunks(pcm: bytes, sr: int, ms: int = 50) -> List[bytes]:
|
|
181
|
+
"""Split PCM16 mono stream into ~ms byte chunks."""
|
|
182
|
+
b_per_ms = int(sr * 2 / 1000)
|
|
183
|
+
n = max(b_per_ms * ms, 1)
|
|
184
|
+
return [pcm[i:i + n] for i in range(0, len(pcm), n)]
|
|
185
|
+
|
|
186
|
+
def dump_wav(path: str, sample_rate: int, pcm16_mono: bytes):
|
|
187
|
+
try:
|
|
188
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
try:
|
|
192
|
+
with wave.open(path, "wb") as wf:
|
|
193
|
+
wf.setnchannels(1)
|
|
194
|
+
wf.setsampwidth(2)
|
|
195
|
+
wf.setframerate(int(sample_rate))
|
|
196
|
+
wf.writeframes(pcm16_mono)
|
|
197
|
+
except Exception:
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
def pcm16_stats(pcm16_mono: bytes, sample_rate: int) -> dict:
|
|
201
|
+
try:
|
|
202
|
+
n_samp = len(pcm16_mono) // 2
|
|
203
|
+
dur = n_samp / float(sample_rate or 1)
|
|
204
|
+
rms = audioop.rms(pcm16_mono, 2)
|
|
205
|
+
peak = audioop.max(pcm16_mono, 2) if pcm16_mono else 0
|
|
206
|
+
try:
|
|
207
|
+
avg = audioop.avg(pcm16_mono, 2)
|
|
208
|
+
except Exception:
|
|
209
|
+
avg = 0
|
|
210
|
+
dbfs = (-999.0 if rms == 0 else 20.0 * math.log10(rms / 32768.0))
|
|
211
|
+
return {"duration_s": dur, "samples": n_samp, "rms": rms, "peak": peak, "dc_offset": avg, "dbfs": dbfs}
|
|
212
|
+
except Exception:
|
|
213
|
+
return {}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import threading
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
class BackgroundLoop:
|
|
17
|
+
"""
|
|
18
|
+
Dedicated background asyncio loop running in its own thread.
|
|
19
|
+
Safe cross-thread scheduling and sync wrappers.
|
|
20
|
+
"""
|
|
21
|
+
def __init__(self, name: str = "RT-Loop"):
|
|
22
|
+
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|
23
|
+
self._thread: Optional[threading.Thread] = None
|
|
24
|
+
self._name = name
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def loop(self) -> Optional[asyncio.AbstractEventLoop]:
|
|
28
|
+
return self._loop
|
|
29
|
+
|
|
30
|
+
def ensure(self):
|
|
31
|
+
if self._loop and self._loop.is_running():
|
|
32
|
+
return
|
|
33
|
+
self._loop = asyncio.new_event_loop()
|
|
34
|
+
|
|
35
|
+
def _runner(loop: asyncio.AbstractEventLoop):
|
|
36
|
+
asyncio.set_event_loop(loop)
|
|
37
|
+
loop.run_forever()
|
|
38
|
+
|
|
39
|
+
self._thread = threading.Thread(target=_runner, args=(self._loop,), name=self._name, daemon=True)
|
|
40
|
+
self._thread.start()
|
|
41
|
+
|
|
42
|
+
async def run(self, coro):
|
|
43
|
+
if not self._loop:
|
|
44
|
+
raise RuntimeError("Owner loop is not running")
|
|
45
|
+
cfut = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
|
46
|
+
return await asyncio.wrap_future(cfut)
|
|
47
|
+
|
|
48
|
+
def run_sync(self, coro, timeout: float = 5.0):
|
|
49
|
+
if not self._loop or not self._loop.is_running():
|
|
50
|
+
return None
|
|
51
|
+
fut = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
|
52
|
+
try:
|
|
53
|
+
return fut.result(timeout=timeout)
|
|
54
|
+
except Exception:
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
def stop(self, timeout: float = 2.0):
|
|
58
|
+
loop, thread = self._loop, self._thread
|
|
59
|
+
if loop and loop.is_running():
|
|
60
|
+
loop.call_soon_threadsafe(loop.stop)
|
|
61
|
+
if thread and thread.is_alive():
|
|
62
|
+
thread.join(timeout=timeout)
|
|
63
|
+
self._loop = None
|
|
64
|
+
self._thread = None
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from typing import Optional
|
|
13
|
+
from pygpt_net.item.ctx import CtxItem
|
|
14
|
+
|
|
15
|
+
def set_ctx_rt_handle(ctx: Optional[CtxItem], handle: Optional[str], window=None):
|
|
16
|
+
"""Persist server session handle into ctx.extra['rt_session_id'] (best effort)."""
|
|
17
|
+
try:
|
|
18
|
+
if not ctx:
|
|
19
|
+
return
|
|
20
|
+
if not isinstance(ctx.extra, dict):
|
|
21
|
+
ctx.extra = {}
|
|
22
|
+
val = (handle or "").strip()
|
|
23
|
+
if val:
|
|
24
|
+
ctx.extra["rt_session_id"] = val
|
|
25
|
+
if window:
|
|
26
|
+
try:
|
|
27
|
+
window.core.ctx.update_item(ctx)
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
except Exception:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def set_rt_session_expires_at(ctx: Optional[CtxItem], epoch_seconds: Optional[int], window=None):
|
|
34
|
+
"""Persist optional session expiration timestamp into ctx.extra."""
|
|
35
|
+
if not ctx or epoch_seconds is None:
|
|
36
|
+
return
|
|
37
|
+
try:
|
|
38
|
+
if not isinstance(ctx.extra, dict):
|
|
39
|
+
ctx.extra = {}
|
|
40
|
+
ctx.extra["rt_session_expires_at"] = int(epoch_seconds)
|
|
41
|
+
if window:
|
|
42
|
+
try:
|
|
43
|
+
window.core.ctx.update_item(ctx)
|
|
44
|
+
except Exception:
|
|
45
|
+
pass
|
|
46
|
+
except Exception:
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
def extract_last_session_id(items: list[CtxItem]) -> Optional[str]:
|
|
50
|
+
"""Extract last known session ID from a list of CtxItems."""
|
|
51
|
+
if not items:
|
|
52
|
+
return None
|
|
53
|
+
for item in reversed(items):
|
|
54
|
+
if not item or not isinstance(item.extra, dict):
|
|
55
|
+
continue
|
|
56
|
+
val = item.extra.get("rt_session_id")
|
|
57
|
+
if isinstance(val, str) and val.strip():
|
|
58
|
+
return val.strip()
|
|
59
|
+
return None
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
|
|
14
|
+
def coalesce_text(parts):
|
|
15
|
+
"""Merge text parts, preserving intentional newlines and fixing spaces."""
|
|
16
|
+
if not parts:
|
|
17
|
+
return ""
|
|
18
|
+
out = []
|
|
19
|
+
for piece in parts:
|
|
20
|
+
if not piece:
|
|
21
|
+
continue
|
|
22
|
+
s = str(piece)
|
|
23
|
+
s = re.sub(r"[ \t\f\v]+", " ", s)
|
|
24
|
+
s = re.sub(r"[ \t]*\n[ \t]*", "\n", s)
|
|
25
|
+
if not out:
|
|
26
|
+
out.append(s.strip())
|
|
27
|
+
continue
|
|
28
|
+
if out[-1].endswith("\n") or s.startswith("\n"):
|
|
29
|
+
out.append(s.lstrip())
|
|
30
|
+
else:
|
|
31
|
+
out.append(" " + s.strip())
|
|
32
|
+
text = "".join(out)
|
|
33
|
+
text = re.sub(r"[ \t]+([,.;:!?%])", r"\1", text)
|
|
34
|
+
text = re.sub(r"[ \t]+([\)\]\}])", r"\1", text)
|
|
35
|
+
text = re.sub(r"[ \t]+(['\"])", r"\1", text)
|
|
36
|
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
37
|
+
return text.strip()
|