pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Optional
13
+ from pygpt_net.item.ctx import CtxItem
14
+
15
+ def set_ctx_rt_handle(ctx: Optional[CtxItem], handle: Optional[str], window=None):
16
+ """Persist server session handle into ctx.extra['rt_session_id'] (best effort)."""
17
+ try:
18
+ if not ctx:
19
+ return
20
+ if not isinstance(ctx.extra, dict):
21
+ ctx.extra = {}
22
+ val = (handle or "").strip()
23
+ if val:
24
+ ctx.extra["rt_session_id"] = val
25
+ if window:
26
+ try:
27
+ window.core.ctx.update_item(ctx)
28
+ except Exception:
29
+ pass
30
+ except Exception:
31
+ pass
32
+
33
+ def set_rt_session_expires_at(ctx: Optional[CtxItem], epoch_seconds: Optional[int], window=None):
34
+ """Persist optional session expiration timestamp into ctx.extra."""
35
+ if not ctx or epoch_seconds is None:
36
+ return
37
+ try:
38
+ if not isinstance(ctx.extra, dict):
39
+ ctx.extra = {}
40
+ ctx.extra["rt_session_expires_at"] = int(epoch_seconds)
41
+ if window:
42
+ try:
43
+ window.core.ctx.update_item(ctx)
44
+ except Exception:
45
+ pass
46
+ except Exception:
47
+ pass
48
+
49
+ def extract_last_session_id(items: list[CtxItem]) -> Optional[str]:
50
+ """Extract last known session ID from a list of CtxItems."""
51
+ if not items:
52
+ return None
53
+ for item in reversed(items):
54
+ if not item or not isinstance(item.extra, dict):
55
+ continue
56
+ val = item.extra.get("rt_session_id")
57
+ if isinstance(val, str) and val.strip():
58
+ return val.strip()
59
+ return None
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import re
13
+
14
+ def coalesce_text(parts):
15
+ """Merge text parts, preserving intentional newlines and fixing spaces."""
16
+ if not parts:
17
+ return ""
18
+ out = []
19
+ for piece in parts:
20
+ if not piece:
21
+ continue
22
+ s = str(piece)
23
+ s = re.sub(r"[ \t\f\v]+", " ", s)
24
+ s = re.sub(r"[ \t]*\n[ \t]*", "\n", s)
25
+ if not out:
26
+ out.append(s.strip())
27
+ continue
28
+ if out[-1].endswith("\n") or s.startswith("\n"):
29
+ out.append(s.lstrip())
30
+ else:
31
+ out.append(" " + s.strip())
32
+ text = "".join(out)
33
+ text = re.sub(r"[ \t]+([,.;:!?%])", r"\1", text)
34
+ text = re.sub(r"[ \t]+([\)\]\}])", r"\1", text)
35
+ text = re.sub(r"[ \t]+(['\"])", r"\1", text)
36
+ text = re.sub(r"\n{3,}", "\n\n", text)
37
+ return text.strip()
@@ -0,0 +1,276 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import json
13
+ from typing import Any, Optional, List, Dict
14
+
15
+ def sanitize_function_tools(tools) -> list:
16
+ """
17
+ OpenAI: Normalize function tools into a flat dict shape:
18
+ {"type":"function","name","description","parameters", ...}
19
+ Accepts legacy {"type":"function","function":{...}} and flattens it.
20
+ """
21
+ out = []
22
+ if not tools:
23
+ return out
24
+ for t in tools:
25
+ if not isinstance(t, dict):
26
+ continue
27
+ tt = dict(t)
28
+ ttype = (tt.get("type") or "function").lower()
29
+ if ttype != "function":
30
+ continue
31
+ if isinstance(tt.get("function"), dict):
32
+ fn = tt["function"]
33
+ nt = {"type": "function"}
34
+ for k in ("name", "description", "parameters", "strict", "strict_schema"):
35
+ if k in fn and fn[k] is not None:
36
+ nt[k] = fn[k]
37
+ if "description" not in nt and tt.get("description"):
38
+ nt["description"] = tt["description"]
39
+ else:
40
+ nt = {
41
+ "type": "function",
42
+ "name": tt.get("name"),
43
+ "description": tt.get("description"),
44
+ "parameters": tt.get("parameters"),
45
+ }
46
+ for k in ("strict", "strict_schema"):
47
+ if k in tt:
48
+ nt[k] = tt[k]
49
+ if not nt.get("name"):
50
+ continue
51
+ if not isinstance(nt.get("parameters"), dict):
52
+ nt["parameters"] = {"type": "object", "properties": {}}
53
+ out.append(nt)
54
+ return out
55
+
56
+ def sanitize_remote_tools(remote_tools) -> list:
57
+ """OpenAI: Pass-through for non-function tools (ensure lowercased 'type')."""
58
+ allowed = {"function", "mcp"} # Realtime accepts only these
59
+ out = []
60
+ if not remote_tools:
61
+ return out
62
+ for t in remote_tools:
63
+ if not isinstance(t, dict):
64
+ continue
65
+ tt = dict(t)
66
+ ttype = tt.get("type")
67
+ if not ttype:
68
+ continue
69
+ if allowed is not None and ttype not in allowed:
70
+ continue
71
+ tt["type"] = str(ttype).lower()
72
+ out.append(tt)
73
+ return out
74
+
75
+ def tools_signature(tools_list: list) -> str:
76
+ """Order-insensitive stable signature for tools list."""
77
+ def canon(obj):
78
+ if isinstance(obj, dict):
79
+ return {k: canon(v) for k, v in sorted(obj.items())}
80
+ if isinstance(obj, list):
81
+ return [canon(x) for x in obj]
82
+ return obj
83
+ try:
84
+ canon_items = [json.dumps(canon(t), ensure_ascii=False, sort_keys=True, separators=(",", ":"))
85
+ for t in (tools_list or [])]
86
+ canon_items.sort()
87
+ return "|".join(canon_items)
88
+ except Exception:
89
+ return str(tools_list)
90
+
91
+ def prepare_tools_for_session(opts) -> list:
92
+ """Compose session.tools from opts.remote_tools + opts.tools."""
93
+ fn = sanitize_function_tools(getattr(opts, "tools", None))
94
+ rt = sanitize_remote_tools(getattr(opts, "remote_tools", None))
95
+ return (rt or []) + (fn or [])
96
+
97
+ def prepare_tools_for_response(opts) -> tuple[list, Optional[str]]:
98
+ """Compose per-response function tools and tool_choice."""
99
+ fn = sanitize_function_tools(getattr(opts, "tools", None))
100
+ tool_choice = getattr(opts, "tool_choice", None)
101
+ return fn, tool_choice
102
+
103
+ def build_tool_outputs_payload(results, last_tool_calls: List[Dict]) -> List[Dict]:
104
+ """
105
+ Normalize 'results' into:
106
+ [{"call_id": str, "previous_item_id": str|None, "output": str}]
107
+ Matching priority: call_id -> item.id -> function name -> first unused.
108
+ """
109
+ calls = list(last_tool_calls or [])
110
+ by_id = {c.get("id") or "": c for c in calls if c.get("id")}
111
+ by_call = {c.get("call_id") or "": c for c in calls if c.get("call_id")}
112
+ by_name: dict[str, list] = {}
113
+ for c in calls:
114
+ nm = ((c.get("function") or {}).get("name") or "").strip()
115
+ if nm:
116
+ by_name.setdefault(nm, []).append(c)
117
+
118
+ used: set[str] = set()
119
+ def to_str(val) -> str:
120
+ if val is None:
121
+ return ""
122
+ if isinstance(val, (dict, list)):
123
+ try:
124
+ return json.dumps(val, ensure_ascii=False)
125
+ except Exception:
126
+ return str(val)
127
+ return str(val)
128
+
129
+ def pick_name(name: str):
130
+ arr = by_name.get(name) or []
131
+ for cand in arr:
132
+ cid = cand.get("call_id") or ""
133
+ if cid and cid not in used:
134
+ used.add(cid)
135
+ return cand
136
+ return None
137
+
138
+ def pick_first():
139
+ for cand in calls:
140
+ cid = cand.get("call_id") or ""
141
+ if cid and cid not in used:
142
+ used.add(cid)
143
+ return cand
144
+ return None
145
+
146
+ out: list[dict] = []
147
+
148
+ if isinstance(results, dict) and ("function_responses" in results or "tool_outputs" in results):
149
+ items = results.get("function_responses") or results.get("tool_outputs") or []
150
+ for it in items:
151
+ if not isinstance(it, dict):
152
+ c = pick_first()
153
+ if c:
154
+ out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(it)})
155
+ continue
156
+ cid = it.get("call_id") or it.get("id") or it.get("tool_call_id") or ""
157
+ nm = it.get("name") or ""
158
+ resp = it.get("response")
159
+ if resp is None:
160
+ resp = it.get("result") or it.get("output") or it.get("content")
161
+ c = by_call.get(cid) or by_id.get(cid) or (pick_name(nm) if nm else pick_first())
162
+ if c:
163
+ out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(resp)})
164
+ return out
165
+
166
+ if isinstance(results, list):
167
+ for it in results:
168
+ if not isinstance(it, dict):
169
+ c = pick_first()
170
+ if c:
171
+ out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(it)})
172
+ continue
173
+ cid = it.get("call_id") or it.get("id") or it.get("tool_call_id") or ""
174
+ nm = it.get("name") or ""
175
+ resp = it.get("response")
176
+ if resp is None:
177
+ resp = it.get("result") or it.get("output") or it.get("content")
178
+ c = by_call.get(cid) or by_id.get(cid) or (pick_name(nm) if nm else pick_first())
179
+ if c:
180
+ out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(resp)})
181
+ return out
182
+
183
+ if isinstance(results, dict):
184
+ for k, v in results.items():
185
+ if not isinstance(k, str):
186
+ continue
187
+ c = by_call.get(k) or by_id.get(k) or pick_name(k)
188
+ if c:
189
+ out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(v)})
190
+ return out
191
+
192
+ c = pick_first()
193
+ if c:
194
+ out.append({"call_id": c.get("call_id"), "previous_item_id": c.get("id"), "output": to_str(results)})
195
+ return out
196
+
197
+ def build_function_responses_payload(results, last_tool_calls: List[Dict]) -> List[Dict]:
198
+ """
199
+ Produce neutral list of dicts for Google:
200
+ [{"id": "...", "name": "...", "response": {...}}]
201
+ Provider converts to gtypes.FunctionResponse downstream.
202
+ """
203
+ calls = list(last_tool_calls or [])
204
+ by_id = {c.get("id") or "": c for c in calls if c.get("id")}
205
+ by_name: dict[str, list] = {}
206
+ for c in calls:
207
+ nm = (c.get("function") or {}).get("name") or ""
208
+ if nm:
209
+ by_name.setdefault(nm, []).append(c)
210
+
211
+ used_ids: set[str] = set()
212
+
213
+ def pick_id_for_name(name: str) -> str:
214
+ arr = by_name.get(name) or []
215
+ for cand in arr:
216
+ cid = cand.get("id") or ""
217
+ if cid and cid not in used_ids:
218
+ used_ids.add(cid)
219
+ return cid
220
+ return ""
221
+
222
+ def to_resp_dict(val):
223
+ if isinstance(val, dict):
224
+ return val
225
+ return {"result": str(val)}
226
+
227
+ out: list = []
228
+
229
+ if isinstance(results, dict) and "function_responses" in results:
230
+ items = results.get("function_responses") or []
231
+ for it in items:
232
+ fid = it.get("id") or ""
233
+ nm = it.get("name") or ""
234
+ resp = it.get("response")
235
+ if resp is None:
236
+ resp = it.get("result") or it.get("output") or it.get("content") or {}
237
+ out.append({"id": fid, "name": nm, "response": to_resp_dict(resp)})
238
+ return out
239
+
240
+ if isinstance(results, list):
241
+ for it in results:
242
+ if not isinstance(it, dict):
243
+ if calls:
244
+ ref = calls[0]
245
+ cid = ref.get("id") or ""
246
+ nm = (ref.get("function") or {}).get("name") or ""
247
+ used_ids.add(cid)
248
+ out.append({"id": cid, "name": nm, "response": to_resp_dict(it)})
249
+ continue
250
+ fid = it.get("id") or it.get("call_id") or it.get("tool_call_id") or ""
251
+ nm = it.get("name") or ""
252
+ resp = it.get("response")
253
+ if resp is None:
254
+ resp = it.get("result") or it.get("output") or it.get("content") or {}
255
+ if not fid and nm:
256
+ fid = pick_id_for_name(nm)
257
+ if fid:
258
+ used_ids.add(fid)
259
+ out.append({"id": fid, "name": nm, "response": to_resp_dict(resp)})
260
+ return out
261
+
262
+ if isinstance(results, dict):
263
+ for k, v in results.items():
264
+ if not isinstance(k, str):
265
+ continue
266
+ if k in by_id:
267
+ nm = (by_id[k].get("function") or {}).get("name") or ""
268
+ used_ids.add(k)
269
+ out.append({"id": k, "name": nm, "response": to_resp_dict(v)})
270
+ else:
271
+ nm = k
272
+ fid = pick_id_for_name(nm)
273
+ out.append({"id": fid, "name": nm, "response": to_resp_dict(v)})
274
+ return out
275
+
276
+ return out
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from enum import Enum
13
+
14
+ class TurnMode(str, Enum):
15
+ MANUAL = "manual"
16
+ AUTO = "auto" # future (server VAD / automatic activity detection)
17
+
18
+ def apply_turn_mode_openai(session_payload: dict, mode: TurnMode):
19
+ """
20
+ Mutate OpenAI session.update payload to reflect turn mode.
21
+ Manual: turn_detection=None (default).
22
+ Auto: enable server VAD if available.
23
+ """
24
+ sess = session_payload.setdefault("session", {})
25
+ if mode == TurnMode.AUTO:
26
+ sess["turn_detection"] = {"type": "server_vad"}
27
+ else:
28
+ sess["turn_detection"] = None
29
+
30
+ def apply_turn_mode_google(live_cfg: dict, mode: TurnMode):
31
+ """
32
+ Mutate Google Live connect config to reflect turn mode.
33
+ Manual: automatic_activity_detection.disabled=True
34
+ Auto: disabled=False (server handles VAD).
35
+ """
36
+ ri = live_cfg.setdefault("realtime_input_config", {})
37
+ aad = ri.setdefault("automatic_activity_detection", {})
38
+ aad["disabled"] = (mode != TurnMode.AUTO)
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Optional, Callable, Awaitable
13
+
14
+ TextCallback = Callable[[str], Awaitable[None]]
15
+ AudioCallback = Callable[[bytes, str, Optional[int], Optional[int], bool], Awaitable[None]]
16
+ StopCallback = Callable[[], bool]
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
+ # ================================================== #
11
+
12
+ import asyncio
13
+ from typing import Optional
14
+
15
+ from PySide6.QtCore import Slot, QRunnable, QObject, Signal
16
+
17
+ from pygpt_net.core.events import RealtimeEvent
18
+ from pygpt_net.item.ctx import CtxItem
19
+
20
+ from .options import RealtimeOptions
21
+
22
+ class RealtimeSignals(QObject):
23
+ """Realtime signals"""
24
+ response = Signal(object) # RealtimeEvent
25
+
26
+ class RealtimeWorker(QRunnable):
27
+ """
28
+ QRunnable worker that runs a provider-specific realtime session (websocket).
29
+
30
+ - RT_OUTPUT_READY is emitted when the audio output is ready (STREAM_BEGIN).
31
+ - RT_OUTPUT_TEXT_DELTA is emitted for text deltas.
32
+ - RT_OUTPUT_AUDIO_DELTA is emitted for audio chunks to be handled by the main-thread AudioDispatcher.
33
+ - RT_OUTPUT_AUDIO_END is emitted when the session ends.
34
+ - RT_OUTPUT_AUDIO_ERROR is emitted on error.
35
+ """
36
+ def __init__(
37
+ self,
38
+ window,
39
+ ctx: CtxItem,
40
+ opts: RealtimeOptions
41
+ ):
42
+ """
43
+ Initialize the worker.
44
+
45
+ :param window: Window instance
46
+ :param ctx: CtxItem
47
+ :param opts: RealtimeOptions
48
+ """
49
+ super().__init__()
50
+ self.window = window
51
+ self.ctx = ctx
52
+ self.opts = opts
53
+
54
+ def get_client(self, provider: str):
55
+ """
56
+ Get the appropriate client based on the provider
57
+
58
+ :param provider: Provider name
59
+ :return: Client instance
60
+ """
61
+ provider = (provider or "openai").lower()
62
+ if provider == "google":
63
+ return self.window.core.api.google.realtime.handler
64
+ elif provider == "openai":
65
+ return self.window.core.api.openai.realtime.handler
66
+ else:
67
+ raise RuntimeError(f"Unsupported realtime provider: {provider}")
68
+
69
+ @Slot()
70
+ def run(self):
71
+ loop = None # ensure defined for cleanup
72
+
73
+ # STREAM_BEGIN -> UI
74
+ try:
75
+ event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_READY, {
76
+ "ctx": self.ctx,
77
+ })
78
+ self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
79
+ except Exception:
80
+ pass
81
+
82
+ try:
83
+ loop = asyncio.new_event_loop()
84
+ asyncio.set_event_loop(loop)
85
+
86
+ async def _amain():
87
+ # Text deltas -> UI
88
+ async def on_text(delta: str):
89
+ if not delta:
90
+ return
91
+ event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_TEXT_DELTA, {
92
+ "ctx": self.ctx,
93
+ "chunk": delta,
94
+ })
95
+ self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
96
+
97
+ # Audio -> enqueue to main-thread
98
+ async def on_audio(
99
+ data: bytes,
100
+ mime: str,
101
+ rate: Optional[int],
102
+ channels: Optional[int],
103
+ final: bool = False
104
+ ):
105
+ event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_DELTA, {
106
+ "payload": {
107
+ "ctx": self.ctx,
108
+ "data": data or b"",
109
+ "mime": mime or "audio/pcm",
110
+ "rate": int(rate) if rate is not None else None,
111
+ "channels": int(channels) if channels is not None else None,
112
+ "final": bool(final),
113
+ "provider": self.opts.provider,
114
+ "model": self.opts.model,
115
+ }
116
+ })
117
+ self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
118
+
119
+ def _should_stop() -> bool:
120
+ try:
121
+ return bool(self.window.controller.kernel.stopped())
122
+ except Exception:
123
+ return False
124
+
125
+ # run the client
126
+ client = self.get_client(self.opts.provider)
127
+ await client.run(self.ctx, self.opts, on_text, on_audio, _should_stop)
128
+
129
+ loop.run_until_complete(_amain())
130
+ # print("[rt] STREAM_END")
131
+
132
+ except Exception as e:
133
+ try:
134
+ event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_ERROR, {"error": e})
135
+ self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
136
+ finally:
137
+ try:
138
+ event = RealtimeEvent(RealtimeEvent.RT_OUTPUT_AUDIO_END, {"ctx": self.ctx})
139
+ self.opts.rt_signals.response.emit(event) if self.opts.rt_signals else None
140
+ except Exception:
141
+ pass
142
+ finally:
143
+ # Robust asyncio teardown to avoid hangs on subsequent runs
144
+ if loop is not None:
145
+ try:
146
+ pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
147
+ for t in pending:
148
+ t.cancel()
149
+ if pending:
150
+ loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
151
+ except Exception:
152
+ pass
153
+ try:
154
+ loop.run_until_complete(loop.shutdown_asyncgens())
155
+ except Exception:
156
+ pass
157
+ try:
158
+ loop.close()
159
+ except Exception:
160
+ pass
161
+ try:
162
+ asyncio.set_event_loop(None)
163
+ except Exception:
164
+ pass
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.15 23:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Tuple, List
@@ -36,8 +36,8 @@ from pygpt_net.item.ctx import CtxItem
36
36
 
37
37
  CHAT_MODES = [
38
38
  MODE_CHAT,
39
- MODE_VISION,
40
- MODE_LANGCHAIN,
39
+ # MODE_VISION,
40
+ # MODE_LANGCHAIN,
41
41
  MODE_ASSISTANT,
42
42
  MODE_LLAMA_INDEX,
43
43
  MODE_AGENT,
@@ -328,7 +328,7 @@ class Tokens:
328
328
  model_id = self.window.core.models.get_id(model)
329
329
  mode = self.window.core.config.get('mode')
330
330
  tokens = 0
331
- if mode in [MODE_CHAT, MODE_VISION, MODE_AUDIO, MODE_RESEARCH]:
331
+ if mode in [MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
332
332
  tokens += self.from_prompt(system_prompt, "", model_id)
333
333
  tokens += self.from_text("system", model_id)
334
334
  tokens += self.from_prompt(input_prompt, "", model_id)
@@ -11,6 +11,7 @@
11
11
 
12
12
  from .agent import *
13
13
  from .base import *
14
+ from .image import *
14
15
  from .mode import *
15
16
  from .model import *
16
17
  from .openai import *
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.07.13 01:00:00 #
10
+ # ================================================== #
11
+
12
+ IMAGE_AVAILABLE_RESOLUTIONS = {
13
+ "gpt-image": {
14
+ "auto": "auto",
15
+ "1024x1024": "1024x1024",
16
+ "1536x1024": "1536x1024",
17
+ "1024x1536": "1024x1536"
18
+ },
19
+ "dall-e-3": {
20
+ "1792x1024": "1792x1024",
21
+ "1024x1792": "1024x1792",
22
+ "1024x1024": "1024x1024"
23
+ },
24
+ "dall-e-2": {
25
+ "1024x1024": "1024x1024",
26
+ "512x512": "512x512",
27
+ "256x256": "256x256"
28
+ },
29
+ "imagen-3.0": {
30
+ "1024x1024": "1024x1024",
31
+ "896x1280": "896x1280",
32
+ "1280x896": "1280x896",
33
+ "768x1408": "768x1408",
34
+ "1408x768": "1408x768"
35
+ },
36
+ "imagen-4.0": {
37
+ "1024x1024": "1024x1024",
38
+ "896x1280": "896x1280",
39
+ "1280x896": "1280x896",
40
+ "768x1408": "768x1408",
41
+ "1408x768": "1408x768",
42
+ "2048x2048": "2048x2048",
43
+ "1792x2560": "1792x2560",
44
+ "2560x1792": "2560x1792",
45
+ "1536x2816": "1536x2816",
46
+ "2816x1536": "2816x1536"
47
+ }
48
+ }