pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +7 -1
- pygpt_net/app_core.py +3 -1
- pygpt_net/config.py +3 -1
- pygpt_net/controller/__init__.py +9 -2
- pygpt_net/controller/audio/audio.py +38 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +23 -62
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/lang/custom.py +2 -2
- pygpt_net/controller/media/__init__.py +12 -0
- pygpt_net/controller/media/media.py +115 -0
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +293 -0
- pygpt_net/controller/ui/mode.py +23 -2
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +14 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +56 -5
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +160 -0
- pygpt_net/core/render/web/body.py +24 -3
- pygpt_net/core/text/utils.py +54 -2
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +54 -0
- pygpt_net/core/video/__init__.py +12 -0
- pygpt_net/core/video/video.py +290 -0
- pygpt_net/data/config/config.json +26 -5
- pygpt_net/data/config/models.json +221 -103
- pygpt_net/data/config/settings.json +244 -6
- pygpt_net/data/css/web-blocks.css +6 -0
- pygpt_net/data/css/web-chatgpt.css +6 -0
- pygpt_net/data/css/web-chatgpt_wide.css +6 -0
- pygpt_net/data/locale/locale.de.ini +35 -7
- pygpt_net/data/locale/locale.en.ini +56 -17
- pygpt_net/data/locale/locale.es.ini +35 -7
- pygpt_net/data/locale/locale.fr.ini +35 -7
- pygpt_net/data/locale/locale.it.ini +35 -7
- pygpt_net/data/locale/locale.pl.ini +38 -7
- pygpt_net/data/locale/locale.uk.ini +35 -7
- pygpt_net/data/locale/locale.zh.ini +31 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
- pygpt_net/item/model.py +22 -1
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +76 -7
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/video.py +364 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +241 -178
- pygpt_net/provider/core/model/patch.py +28 -2
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/web/duckduck_search.py +212 -0
- pygpt_net/ui/layout/toolbox/audio.py +55 -0
- pygpt_net/ui/layout/toolbox/footer.py +14 -42
- pygpt_net/ui/layout/toolbox/image.py +7 -13
- pygpt_net/ui/layout/toolbox/raw.py +52 -0
- pygpt_net/ui/layout/toolbox/split.py +48 -0
- pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
- pygpt_net/ui/layout/toolbox/video.py +49 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from typing import Any, Optional, List, Dict
|
|
14
|
+
|
|
15
|
+
def sanitize_function_tools(tools) -> list:
|
|
16
|
+
"""
|
|
17
|
+
OpenAI: Normalize function tools into a flat dict shape:
|
|
18
|
+
{"type":"function","name","description","parameters", ...}
|
|
19
|
+
Accepts legacy {"type":"function","function":{...}} and flattens it.
|
|
20
|
+
"""
|
|
21
|
+
out = []
|
|
22
|
+
if not tools:
|
|
23
|
+
return out
|
|
24
|
+
for t in tools:
|
|
25
|
+
if not isinstance(t, dict):
|
|
26
|
+
continue
|
|
27
|
+
tt = dict(t)
|
|
28
|
+
ttype = (tt.get("type") or "function").lower()
|
|
29
|
+
if ttype != "function":
|
|
30
|
+
continue
|
|
31
|
+
if isinstance(tt.get("function"), dict):
|
|
32
|
+
fn = tt["function"]
|
|
33
|
+
nt = {"type": "function"}
|
|
34
|
+
for k in ("name", "description", "parameters", "strict", "strict_schema"):
|
|
35
|
+
if k in fn and fn[k] is not None:
|
|
36
|
+
nt[k] = fn[k]
|
|
37
|
+
if "description" not in nt and tt.get("description"):
|
|
38
|
+
nt["description"] = tt["description"]
|
|
39
|
+
else:
|
|
40
|
+
nt = {
|
|
41
|
+
"type": "function",
|
|
42
|
+
"name": tt.get("name"),
|
|
43
|
+
"description": tt.get("description"),
|
|
44
|
+
"parameters": tt.get("parameters"),
|
|
45
|
+
}
|
|
46
|
+
for k in ("strict", "strict_schema"):
|
|
47
|
+
if k in tt:
|
|
48
|
+
nt[k] = tt[k]
|
|
49
|
+
if not nt.get("name"):
|
|
50
|
+
continue
|
|
51
|
+
if not isinstance(nt.get("parameters"), dict):
|
|
52
|
+
nt["parameters"] = {"type": "object", "properties": {}}
|
|
53
|
+
out.append(nt)
|
|
54
|
+
return out
|
|
55
|
+
|
|
56
|
+
def sanitize_remote_tools(remote_tools) -> list:
|
|
57
|
+
"""OpenAI: Pass-through for non-function tools (ensure lowercased 'type')."""
|
|
58
|
+
allowed = {"function", "mcp"} # Realtime accepts only these
|
|
59
|
+
out = []
|
|
60
|
+
if not remote_tools:
|
|
61
|
+
return out
|
|
62
|
+
for t in remote_tools:
|
|
63
|
+
if not isinstance(t, dict):
|
|
64
|
+
continue
|
|
65
|
+
tt = dict(t)
|
|
66
|
+
ttype = tt.get("type")
|
|
67
|
+
if not ttype:
|
|
68
|
+
continue
|
|
69
|
+
if allowed is not None and ttype not in allowed:
|
|
70
|
+
continue
|
|
71
|
+
tt["type"] = str(ttype).lower()
|
|
72
|
+
out.append(tt)
|
|
73
|
+
return out
|
|
74
|
+
|
|
75
|
+
def tools_signature(tools_list: list) -> str:
|
|
76
|
+
"""Order-insensitive stable signature for tools list."""
|
|
77
|
+
def canon(obj):
|
|
78
|
+
if isinstance(obj, dict):
|
|
79
|
+
return {k: canon(v) for k, v in sorted(obj.items())}
|
|
80
|
+
if isinstance(obj, list):
|
|
81
|
+
return [canon(x) for x in obj]
|
|
82
|
+
return obj
|
|
83
|
+
try:
|
|
84
|
+
canon_items = [json.dumps(canon(t), ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
|
85
|
+
for t in (tools_list or [])]
|
|
86
|
+
canon_items.sort()
|
|
87
|
+
return "|".join(canon_items)
|
|
88
|
+
except Exception:
|
|
89
|
+
return str(tools_list)
|
|
90
|
+
|
|
91
|
+
def prepare_tools_for_session(opts) -> list:
|
|
92
|
+
"""Compose session.tools from opts.remote_tools + opts.tools."""
|
|
93
|
+
fn = sanitize_function_tools(getattr(opts, "tools", None))
|
|
94
|
+
rt = sanitize_remote_tools(getattr(opts, "remote_tools", None))
|
|
95
|
+
return (rt or []) + (fn or [])
|
|
96
|
+
|
|
97
|
+
def prepare_tools_for_response(opts) -> tuple[list, Optional[str]]:
|
|
98
|
+
"""Compose per-response function tools and tool_choice."""
|
|
99
|
+
fn = sanitize_function_tools(getattr(opts, "tools", None))
|
|
100
|
+
tool_choice = getattr(opts, "tool_choice", None)
|
|
101
|
+
return fn, tool_choice
|
|
102
|
+
|
|
103
|
+
def build_tool_outputs_payload(results, last_tool_calls: List[Dict]) -> List[Dict]:
|
|
104
|
+
"""
|
|
105
|
+
Normalize 'results' into:
|
|
106
|
+
[{"call_id": str, "previous_item_id": str|None, "output": str}]
|
|
107
|
+
Matching priority: call_id -> item.id -> function name -> first unused.
|
|
108
|
+
"""
|
|
109
|
+
calls = list(last_tool_calls or [])
|
|
110
|
+
by_id = {c.get("id") or "": c for c in calls if c.get("id")}
|
|
111
|
+
by_call = {c.get("call_id") or "": c for c in calls if c.get("call_id")}
|
|
112
|
+
by_name: dict[str, list] = {}
|
|
113
|
+
for c in calls:
|
|
114
|
+
nm = ((c.get("function") or {}).get("name") or "").strip()
|
|
115
|
+
if nm:
|
|
116
|
+
by_name.setdefault(nm, []).append(c)
|
|
117
|
+
|
|
118
|
+
used: set[str] = set()
|
|
119
|
+
def to_str(val) -> str:
|
|
120
|
+
if val is None:
|
|
121
|
+
return ""
|
|
122
|
+
if isinstance(val, (dict, list)):
|
|
123
|
+
try:
|
|
124
|
+
return json.dumps(val, ensure_ascii=False)
|
|
125
|
+
except Exception:
|
|
126
|
+
return str(val)
|
|
127
|
+
return str(val)
|
|
128
|
+
|
|
129
|
+
def pick_name(name: str):
|
|
130
|
+
arr = by_name.get(name) or []
|
|
131
|
+
for cand in arr:
|
|
132
|
+
cid = cand.get("call_id") or ""
|
|
133
|
+
if cid and cid not in used:
|
|
134
|
+
used.add(cid)
|
|
135
|
+
return cand
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
def pick_first():
|
|
139
|
+
for cand in calls:
|
|
140
|
+
cid = cand.get("call_id") or ""
|
|
141
|
+
if cid and cid not in used:
|
|
142
|
+
used.add(cid)
|
|
143
|
+
return cand
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
out: list[dict] = []
|
|
147
|
+
|
|
148
|
+
if isinstance(results, dict) and ("function_responses" in results or "tool_outputs" in results):
|
|
149
|
+
items = results.get("function_responses") or results.get("tool_outputs") or []
|
|
150
|
+
for it in items:
|
|
151
|
+
if not isinstance(it, dict):
|
|
152
|
+
c = pick_first()
|
|
153
|
+
if c:
|
|
154
|
+
out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(it)})
|
|
155
|
+
continue
|
|
156
|
+
cid = it.get("call_id") or it.get("id") or it.get("tool_call_id") or ""
|
|
157
|
+
nm = it.get("name") or ""
|
|
158
|
+
resp = it.get("response")
|
|
159
|
+
if resp is None:
|
|
160
|
+
resp = it.get("result") or it.get("output") or it.get("content")
|
|
161
|
+
c = by_call.get(cid) or by_id.get(cid) or (pick_name(nm) if nm else pick_first())
|
|
162
|
+
if c:
|
|
163
|
+
out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(resp)})
|
|
164
|
+
return out
|
|
165
|
+
|
|
166
|
+
if isinstance(results, list):
|
|
167
|
+
for it in results:
|
|
168
|
+
if not isinstance(it, dict):
|
|
169
|
+
c = pick_first()
|
|
170
|
+
if c:
|
|
171
|
+
out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(it)})
|
|
172
|
+
continue
|
|
173
|
+
cid = it.get("call_id") or it.get("id") or it.get("tool_call_id") or ""
|
|
174
|
+
nm = it.get("name") or ""
|
|
175
|
+
resp = it.get("response")
|
|
176
|
+
if resp is None:
|
|
177
|
+
resp = it.get("result") or it.get("output") or it.get("content")
|
|
178
|
+
c = by_call.get(cid) or by_id.get(cid) or (pick_name(nm) if nm else pick_first())
|
|
179
|
+
if c:
|
|
180
|
+
out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(resp)})
|
|
181
|
+
return out
|
|
182
|
+
|
|
183
|
+
if isinstance(results, dict):
|
|
184
|
+
for k, v in results.items():
|
|
185
|
+
if not isinstance(k, str):
|
|
186
|
+
continue
|
|
187
|
+
c = by_call.get(k) or by_id.get(k) or pick_name(k)
|
|
188
|
+
if c:
|
|
189
|
+
out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(v)})
|
|
190
|
+
return out
|
|
191
|
+
|
|
192
|
+
c = pick_first()
|
|
193
|
+
if c:
|
|
194
|
+
out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(results)})
|
|
195
|
+
return out
|
|
196
|
+
|
|
197
|
+
def build_function_responses_payload(results, last_tool_calls: List[Dict]) -> List[Dict]:
|
|
198
|
+
"""
|
|
199
|
+
Produce neutral list of dicts for Google:
|
|
200
|
+
[{"id": "...", "name": "...", "response": {...}}]
|
|
201
|
+
Provider converts to gtypes.FunctionResponse downstream.
|
|
202
|
+
"""
|
|
203
|
+
calls = list(last_tool_calls or [])
|
|
204
|
+
by_id = {c.get("id") or "": c for c in calls if c.get("id")}
|
|
205
|
+
by_name: dict[str, list] = {}
|
|
206
|
+
for c in calls:
|
|
207
|
+
nm = (c.get("function") or {}).get("name") or ""
|
|
208
|
+
if nm:
|
|
209
|
+
by_name.setdefault(nm, []).append(c)
|
|
210
|
+
|
|
211
|
+
used_ids: set[str] = set()
|
|
212
|
+
|
|
213
|
+
def pick_id_for_name(name: str) -> str:
|
|
214
|
+
arr = by_name.get(name) or []
|
|
215
|
+
for cand in arr:
|
|
216
|
+
cid = cand.get("id") or ""
|
|
217
|
+
if cid and cid not in used_ids:
|
|
218
|
+
used_ids.add(cid)
|
|
219
|
+
return cid
|
|
220
|
+
return ""
|
|
221
|
+
|
|
222
|
+
def to_resp_dict(val):
|
|
223
|
+
if isinstance(val, dict):
|
|
224
|
+
return val
|
|
225
|
+
return {"result": str(val)}
|
|
226
|
+
|
|
227
|
+
out: list = []
|
|
228
|
+
|
|
229
|
+
if isinstance(results, dict) and "function_responses" in results:
|
|
230
|
+
items = results.get("function_responses") or []
|
|
231
|
+
for it in items:
|
|
232
|
+
fid = it.get("id") or ""
|
|
233
|
+
nm = it.get("name") or ""
|
|
234
|
+
resp = it.get("response")
|
|
235
|
+
if resp is None:
|
|
236
|
+
resp = it.get("result") or it.get("output") or it.get("content") or {}
|
|
237
|
+
out.append({"id": fid, "name": nm, "response": to_resp_dict(resp)})
|
|
238
|
+
return out
|
|
239
|
+
|
|
240
|
+
if isinstance(results, list):
|
|
241
|
+
for it in results:
|
|
242
|
+
if not isinstance(it, dict):
|
|
243
|
+
if calls:
|
|
244
|
+
ref = calls[0]
|
|
245
|
+
cid = ref.get("id") or ""
|
|
246
|
+
nm = (ref.get("function") or {}).get("name") or ""
|
|
247
|
+
used_ids.add(cid)
|
|
248
|
+
out.append({"id": cid, "name": nm, "response": to_resp_dict(it)})
|
|
249
|
+
continue
|
|
250
|
+
fid = it.get("id") or it.get("call_id") or it.get("tool_call_id") or ""
|
|
251
|
+
nm = it.get("name") or ""
|
|
252
|
+
resp = it.get("response")
|
|
253
|
+
if resp is None:
|
|
254
|
+
resp = it.get("result") or it.get("output") or it.get("content") or {}
|
|
255
|
+
if not fid and nm:
|
|
256
|
+
fid = pick_id_for_name(nm)
|
|
257
|
+
if fid:
|
|
258
|
+
used_ids.add(fid)
|
|
259
|
+
out.append({"id": fid, "name": nm, "response": to_resp_dict(resp)})
|
|
260
|
+
return out
|
|
261
|
+
|
|
262
|
+
if isinstance(results, dict):
|
|
263
|
+
for k, v in results.items():
|
|
264
|
+
if not isinstance(k, str):
|
|
265
|
+
continue
|
|
266
|
+
if k in by_id:
|
|
267
|
+
nm = (by_id[k].get("function") or {}).get("name") or ""
|
|
268
|
+
used_ids.add(k)
|
|
269
|
+
out.append({"id": k, "name": nm, "response": to_resp_dict(v)})
|
|
270
|
+
else:
|
|
271
|
+
nm = k
|
|
272
|
+
fid = pick_id_for_name(nm)
|
|
273
|
+
out.append({"id": fid, "name": nm, "response": to_resp_dict(v)})
|
|
274
|
+
return out
|
|
275
|
+
|
|
276
|
+
return out
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from enum import Enum
|
|
13
|
+
|
|
14
|
+
class TurnMode(str, Enum):
|
|
15
|
+
MANUAL = "manual"
|
|
16
|
+
AUTO = "auto" # future (server VAD / automatic activity detection)
|
|
17
|
+
|
|
18
|
+
def apply_turn_mode_openai(session_payload: dict, mode: TurnMode):
|
|
19
|
+
"""
|
|
20
|
+
Mutate OpenAI session.update payload to reflect turn mode.
|
|
21
|
+
Manual: turn_detection=None (default).
|
|
22
|
+
Auto: enable server VAD if available.
|
|
23
|
+
"""
|
|
24
|
+
sess = session_payload.setdefault("session", {})
|
|
25
|
+
if mode == TurnMode.AUTO:
|
|
26
|
+
sess["turn_detection"] = {"type": "server_vad"}
|
|
27
|
+
else:
|
|
28
|
+
sess["turn_detection"] = None
|
|
29
|
+
|
|
30
|
+
def apply_turn_mode_google(live_cfg: dict, mode: TurnMode):
|
|
31
|
+
"""
|
|
32
|
+
Mutate Google Live connect config to reflect turn mode.
|
|
33
|
+
Manual: automatic_activity_detection.disabled=True
|
|
34
|
+
Auto: disabled=False (server handles VAD).
|
|
35
|
+
"""
|
|
36
|
+
ri = live_cfg.setdefault("realtime_input_config", {})
|
|
37
|
+
aad = ri.setdefault("automatic_activity_detection", {})
|
|
38
|
+
aad["disabled"] = (mode != TurnMode.AUTO)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from typing import Optional, Callable, Awaitable
|
|
13
|
+
|
|
14
|
+
TextCallback = Callable[[str], Awaitable[None]]
|
|
15
|
+
AudioCallback = Callable[[bytes, str, Optional[int], Optional[int], bool], Awaitable[None]]
|
|
16
|
+
StopCallback = Callable[[], bool]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from PySide6.QtCore import Slot, QRunnable, QObject, Signal
|
|
16
|
+
|
|
17
|
+
from pygpt_net.core.events import RealtimeEvent
|
|
18
|
+
from pygpt_net.item.ctx import CtxItem
|
|
19
|
+
|
|
20
|
+
from .options import RealtimeOptions
|
|
21
|
+
|
|
22
|
+
class RealtimeSignals(QObject):
|
|
23
|
+
"""Realtime signals"""
|
|
24
|
+
response = Signal(object) # RealtimeEvent
|
|
25
|
+
|
|
26
|
+
class RealtimeWorker(QRunnable):
|
|
27
|
+
"""
|
|
28
|
+
QRunnable worker that runs a provider-specific realtime session (websocket).
|
|
29
|
+
|
|
30
|
+
- RT_OUTPUT_READY is emitted when the audio output is ready (STREAM_BEGIN).
|
|
31
|
+
- RT_OUTPUT_TEXT_DELTA is emitted for text deltas.
|
|
32
|
+
- RT_OUTPUT_AUDIO_DELTA is emitted for audio chunks to be handled by the main-thread AudioDispatcher.
|
|
33
|
+
- RT_OUTPUT_AUDIO_END is emitted when the session ends.
|
|
34
|
+
- RT_OUTPUT_AUDIO_ERROR is emitted on error.
|
|
35
|
+
"""
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
window,
|
|
39
|
+
ctx: CtxItem,
|
|
40
|
+
opts: RealtimeOptions
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Initialize the worker.
|
|
44
|
+
|
|
45
|
+
:param window: Window instance
|
|
46
|
+
:param ctx: CtxItem
|
|
47
|
+
:param opts: RealtimeOptions
|
|
48
|
+
"""
|
|
49
|
+
super().__init__()
|
|
50
|
+
self.window = window
|
|
51
|
+
self.ctx = ctx
|
|
52
|
+
self.opts = opts
|
|
53
|
+
|
|
54
|
+
def get_client(self, provider: str):
|
|
55
|
+
"""
|
|
56
|
+
Get the appropriate client based on the provider
|
|
57
|
+
|
|
58
|
+
:param provider: Provider name
|
|
59
|
+
:return: Client instance
|
|
60
|
+
"""
|
|
61
|
+
provider = (provider or "openai").lower()
|
|
62
|
+
if provider == "google":
|
|
63
|
+
return self.window.core.api.google.realtime.handler
|
|
64
|
+
elif provider == "openai":
|
|
65
|
+
return self.window.core.api.openai.realtime.handler
|
|
66
|
+
else:
|
|
67
|
+
raise RuntimeError(f"Unsupported realtime provider: {provider}")
|
|
68
|
+
|
|
69
|
+
@Slot()
|
|
70
|
+
def run(self):
|
|
71
|
+
loop = None # ensure defined for cleanup
|
|
72
|
+
|
|
73
|
+
# STREAM_BEGIN -> UI
|
|
74
|
+
try:
|
|
75
|
+
event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_READY, {
|
|
76
|
+
"ctx": self.ctx,
|
|
77
|
+
})
|
|
78
|
+
self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
|
|
79
|
+
except Exception:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
loop = asyncio.new_event_loop()
|
|
84
|
+
asyncio.set_event_loop(loop)
|
|
85
|
+
|
|
86
|
+
async def _amain():
|
|
87
|
+
# Text deltas -> UI
|
|
88
|
+
async def on_text(delta: str):
|
|
89
|
+
if not delta:
|
|
90
|
+
return
|
|
91
|
+
event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_TEXT_DELTA, {
|
|
92
|
+
"ctx": self.ctx,
|
|
93
|
+
"chunk": delta,
|
|
94
|
+
})
|
|
95
|
+
self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
|
|
96
|
+
|
|
97
|
+
# Audio -> enqueue to main-thread
|
|
98
|
+
async def on_audio(
|
|
99
|
+
data: bytes,
|
|
100
|
+
mime: str,
|
|
101
|
+
rate: Optional[int],
|
|
102
|
+
channels: Optional[int],
|
|
103
|
+
final: bool = False
|
|
104
|
+
):
|
|
105
|
+
event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_DELTA, {
|
|
106
|
+
"payload": {
|
|
107
|
+
"ctx": self.ctx,
|
|
108
|
+
"data": data or b"",
|
|
109
|
+
"mime": mime or "audio/pcm",
|
|
110
|
+
"rate": int(rate) if rate is not None else None,
|
|
111
|
+
"channels": int(channels) if channels is not None else None,
|
|
112
|
+
"final": bool(final),
|
|
113
|
+
"provider": self.opts.provider,
|
|
114
|
+
"model": self.opts.model,
|
|
115
|
+
}
|
|
116
|
+
})
|
|
117
|
+
self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
|
|
118
|
+
|
|
119
|
+
def _should_stop() -> bool:
|
|
120
|
+
try:
|
|
121
|
+
return bool(self.window.controller.kernel.stopped())
|
|
122
|
+
except Exception:
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
# run the client
|
|
126
|
+
client = self.get_client(self.opts.provider)
|
|
127
|
+
await client.run(self.ctx, self.opts, on_text, on_audio, _should_stop)
|
|
128
|
+
|
|
129
|
+
loop.run_until_complete(_amain())
|
|
130
|
+
# print("[rt] STREAM_END")
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
try:
|
|
134
|
+
event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_ERROR, {"error": e})
|
|
135
|
+
self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
|
|
136
|
+
finally:
|
|
137
|
+
pass
|
|
138
|
+
finally:
|
|
139
|
+
# Robust asyncio teardown to avoid hangs on subsequent runs
|
|
140
|
+
if loop is not None:
|
|
141
|
+
try:
|
|
142
|
+
pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
|
|
143
|
+
for t in pending:
|
|
144
|
+
t.cancel()
|
|
145
|
+
if pending:
|
|
146
|
+
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
try:
|
|
150
|
+
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
151
|
+
except Exception:
|
|
152
|
+
pass
|
|
153
|
+
try:
|
|
154
|
+
loop.close()
|
|
155
|
+
except Exception:
|
|
156
|
+
pass
|
|
157
|
+
try:
|
|
158
|
+
asyncio.set_event_loop(None)
|
|
159
|
+
except Exception:
|
|
160
|
+
pass
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.
|
|
9
|
+
# Updated Date: 2025.09.01 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
@@ -15,6 +15,7 @@ from random import shuffle as _shuffle
|
|
|
15
15
|
|
|
16
16
|
from typing import Optional, List, Dict
|
|
17
17
|
|
|
18
|
+
from pygpt_net.core.text.utils import elide_filename
|
|
18
19
|
from pygpt_net.core.events import Event
|
|
19
20
|
from pygpt_net.item.ctx import CtxItem
|
|
20
21
|
from pygpt_net.utils import trans
|
|
@@ -25,6 +26,7 @@ import pygpt_net.js_rc
|
|
|
25
26
|
import pygpt_net.css_rc
|
|
26
27
|
import pygpt_net.fonts_rc
|
|
27
28
|
|
|
29
|
+
|
|
28
30
|
class Body:
|
|
29
31
|
|
|
30
32
|
NUM_TIPS = 13
|
|
@@ -1066,7 +1068,7 @@ class Body:
|
|
|
1066
1068
|
num_all: Optional[int] = None
|
|
1067
1069
|
) -> str:
|
|
1068
1070
|
"""
|
|
1069
|
-
Get image HTML
|
|
1071
|
+
Get media image/video/audio HTML
|
|
1070
1072
|
|
|
1071
1073
|
:param url: URL to image
|
|
1072
1074
|
:param num: number of image
|
|
@@ -1075,7 +1077,26 @@ class Body:
|
|
|
1075
1077
|
"""
|
|
1076
1078
|
url, path = self.window.core.filesystem.extract_local_url(url)
|
|
1077
1079
|
basename = os.path.basename(path)
|
|
1078
|
-
|
|
1080
|
+
|
|
1081
|
+
# if video file then embed video player
|
|
1082
|
+
ext = os.path.splitext(basename)[1].lower()
|
|
1083
|
+
video_exts = (".mp4", ".webm", ".ogg", ".mov", ".avi", ".mkv")
|
|
1084
|
+
if ext in video_exts:
|
|
1085
|
+
# check if .webm file exists for better compatibility
|
|
1086
|
+
if ext != ".webm":
|
|
1087
|
+
webm_path = os.path.splitext(path)[0] + ".webm"
|
|
1088
|
+
if os.path.exists(webm_path):
|
|
1089
|
+
path = webm_path
|
|
1090
|
+
ext = ".webm"
|
|
1091
|
+
return f'''
|
|
1092
|
+
<div class="extra-src-video-box" title="{url}">
|
|
1093
|
+
<video class="video-player" controls>
|
|
1094
|
+
<source src="{path}" type="video/{ext[1:]}">
|
|
1095
|
+
</video>
|
|
1096
|
+
<p><a href="{url}" class="title">{elide_filename(basename)}</a></p>
|
|
1097
|
+
</div>
|
|
1098
|
+
'''
|
|
1099
|
+
return f'<div class="extra-src-img-box" title="{url}"><div class="img-outer"><div class="img-wrapper"><a href="{url}"><img src="{path}" class="image"></a></div><a href="{url}" class="title">{elide_filename(basename)}</a></div></div><br/>'
|
|
1079
1100
|
|
|
1080
1101
|
def get_url_html(
|
|
1081
1102
|
self,
|
pygpt_net/core/text/utils.py
CHANGED
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
# Updated Date: 2025.08.15 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
def output_html2text(html: str) -> str:
|
|
14
13
|
"""
|
|
15
14
|
Convert output HTML to plain text
|
|
@@ -76,4 +75,57 @@ def has_unclosed_code_tag(text: str) -> bool:
|
|
|
76
75
|
"""
|
|
77
76
|
if not text:
|
|
78
77
|
return False
|
|
79
|
-
return (text.count('```') % 2) != 0
|
|
78
|
+
return (text.count('```') % 2) != 0
|
|
79
|
+
|
|
80
|
+
def elide_filename(name_or_path: str, max_len: int = 45, ellipsis: str = "...", keep_dir: bool = False) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Elide a long filename by replacing the middle with an ellipsis, preserving the extension.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
name_or_path: Filename or full path.
|
|
86
|
+
max_len: Maximum length of the resulting string (including extension and ellipsis).
|
|
87
|
+
ellipsis: Ellipsis text to insert (e.g., "...").
|
|
88
|
+
keep_dir: If True and a path is provided, keep the directory prefix and elide only the basename.
|
|
89
|
+
If False, operate on the basename only.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Elided filename (or path if keep_dir=True).
|
|
93
|
+
"""
|
|
94
|
+
import os
|
|
95
|
+
|
|
96
|
+
if max_len <= 0:
|
|
97
|
+
return name_or_path
|
|
98
|
+
|
|
99
|
+
dirpart, base = os.path.split(name_or_path) if keep_dir else ("", os.path.basename(name_or_path))
|
|
100
|
+
stem, ext = os.path.splitext(base)
|
|
101
|
+
|
|
102
|
+
# if already short enough
|
|
103
|
+
if len(base) <= max_len:
|
|
104
|
+
return os.path.join(dirpart, base) if keep_dir else base
|
|
105
|
+
|
|
106
|
+
# minimal sanity for very small max_len
|
|
107
|
+
min_needed = len(ext) + len(ellipsis) + 2 # at least 1 char head + 1 char tail
|
|
108
|
+
if max_len < min_needed:
|
|
109
|
+
# degrade gracefully: keep first char, ellipsis, last char, and as much ext as fits
|
|
110
|
+
head = stem[:1] if stem else ""
|
|
111
|
+
tail = stem[-1:] if len(stem) > 1 else ""
|
|
112
|
+
# if ext is too long, trim it (rare edge case)
|
|
113
|
+
ext_trim = ext[: max(0, max_len - len(head) - len(ellipsis) - len(tail))]
|
|
114
|
+
out = f"{head}{ellipsis}{tail}{ext_trim}"
|
|
115
|
+
return os.path.join(dirpart, out) if keep_dir else out
|
|
116
|
+
|
|
117
|
+
# compute available budget for visible stem parts
|
|
118
|
+
avail = max_len - len(ext) - len(ellipsis)
|
|
119
|
+
# split budget between head and tail (favor head slightly)
|
|
120
|
+
head_len = (avail + 1) // 2
|
|
121
|
+
tail_len = avail - head_len
|
|
122
|
+
|
|
123
|
+
# guardrails
|
|
124
|
+
head_len = max(1, head_len)
|
|
125
|
+
tail_len = max(1, tail_len)
|
|
126
|
+
|
|
127
|
+
# build elided name
|
|
128
|
+
head = stem[:head_len]
|
|
129
|
+
tail = stem[-tail_len:] if tail_len <= len(stem) else stem
|
|
130
|
+
out = f"{head}{ellipsis}{tail}{ext}"
|
|
131
|
+
return os.path.join(dirpart, out) if keep_dir else out
|
pygpt_net/core/types/__init__.py
CHANGED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.09.01 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
VIDEO_AVAILABLE_ASPECT_RATIOS = {
|
|
13
|
+
"16:9": "16:9",
|
|
14
|
+
"9:16": "9:16",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
IMAGE_AVAILABLE_RESOLUTIONS = {
|
|
19
|
+
"gpt-image": {
|
|
20
|
+
"auto": "auto",
|
|
21
|
+
"1024x1024": "1024x1024",
|
|
22
|
+
"1536x1024": "1536x1024",
|
|
23
|
+
"1024x1536": "1024x1536"
|
|
24
|
+
},
|
|
25
|
+
"dall-e-3": {
|
|
26
|
+
"1792x1024": "1792x1024",
|
|
27
|
+
"1024x1792": "1024x1792",
|
|
28
|
+
"1024x1024": "1024x1024"
|
|
29
|
+
},
|
|
30
|
+
"dall-e-2": {
|
|
31
|
+
"1024x1024": "1024x1024",
|
|
32
|
+
"512x512": "512x512",
|
|
33
|
+
"256x256": "256x256"
|
|
34
|
+
},
|
|
35
|
+
"imagen-3.0": {
|
|
36
|
+
"1024x1024": "1024x1024",
|
|
37
|
+
"896x1280": "896x1280",
|
|
38
|
+
"1280x896": "1280x896",
|
|
39
|
+
"768x1408": "768x1408",
|
|
40
|
+
"1408x768": "1408x768"
|
|
41
|
+
},
|
|
42
|
+
"imagen-4.0": {
|
|
43
|
+
"1024x1024": "1024x1024",
|
|
44
|
+
"896x1280": "896x1280",
|
|
45
|
+
"1280x896": "1280x896",
|
|
46
|
+
"768x1408": "768x1408",
|
|
47
|
+
"1408x768": "1408x768",
|
|
48
|
+
"2048x2048": "2048x2048",
|
|
49
|
+
"1792x2560": "1792x2560",
|
|
50
|
+
"2560x1792": "2560x1792",
|
|
51
|
+
"1536x2816": "1536x2816",
|
|
52
|
+
"2816x1536": "2816x1536"
|
|
53
|
+
}
|
|
54
|
+
}
|