pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/{container.py → app_core.py} +5 -6
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/access/control.py +1 -9
- pygpt_net/controller/assistant/assistant.py +4 -4
- pygpt_net/controller/assistant/batch.py +7 -7
- pygpt_net/controller/assistant/files.py +4 -4
- pygpt_net/controller/assistant/threads.py +3 -3
- pygpt_net/controller/attachment/attachment.py +4 -7
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +30 -4
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +4 -405
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/chat/vision.py +11 -19
- pygpt_net/controller/config/placeholder.py +1 -1
- pygpt_net/controller/ctx/ctx.py +1 -1
- pygpt_net/controller/ctx/summarizer.py +1 -1
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/mode/mode.py +21 -12
- pygpt_net/controller/plugins/settings.py +3 -2
- pygpt_net/controller/presets/editor.py +112 -99
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/theme/theme.py +3 -2
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/controller/ui/vision.py +4 -4
- pygpt_net/core/agents/legacy.py +2 -2
- pygpt_net/core/agents/runners/openai_workflow.py +2 -2
- pygpt_net/core/assistants/files.py +5 -5
- pygpt_net/core/assistants/store.py +4 -4
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +4 -3
- pygpt_net/core/bridge/worker.py +31 -9
- pygpt_net/core/debug/console/console.py +2 -2
- pygpt_net/core/debug/presets.py +2 -2
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/experts/experts.py +2 -2
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/modes/modes.py +2 -2
- pygpt_net/core/presets/presets.py +3 -3
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/tokens/tokens.py +4 -4
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/core/types/mode.py +5 -2
- pygpt_net/core/vision/analyzer.py +1 -1
- pygpt_net/data/config/config.json +13 -4
- pygpt_net/data/config/models.json +219 -101
- pygpt_net/data/config/modes.json +3 -9
- pygpt_net/data/config/settings.json +135 -27
- pygpt_net/data/config/settings_section.json +2 -2
- pygpt_net/data/locale/locale.de.ini +7 -7
- pygpt_net/data/locale/locale.en.ini +25 -12
- pygpt_net/data/locale/locale.es.ini +7 -7
- pygpt_net/data/locale/locale.fr.ini +7 -7
- pygpt_net/data/locale/locale.it.ini +7 -7
- pygpt_net/data/locale/locale.pl.ini +8 -8
- pygpt_net/data/locale/locale.uk.ini +7 -7
- pygpt_net/data/locale/locale.zh.ini +3 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/item/model.py +23 -3
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/plugin/openai_dalle/plugin.py +4 -4
- pygpt_net/plugin/openai_vision/plugin.py +12 -13
- pygpt_net/provider/agents/openai/agent.py +5 -5
- pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
- pygpt_net/provider/agents/openai/agent_planner.py +5 -6
- pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
- pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
- pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
- pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
- pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
- pygpt_net/provider/agents/openai/evolve.py +5 -5
- pygpt_net/provider/agents/openai/supervisor.py +4 -4
- pygpt_net/provider/api/__init__.py +27 -0
- pygpt_net/provider/api/anthropic/__init__.py +68 -0
- pygpt_net/provider/api/google/__init__.py +295 -0
- pygpt_net/provider/api/google/audio.py +121 -0
- pygpt_net/provider/api/google/chat.py +591 -0
- pygpt_net/provider/api/google/image.py +427 -0
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/tools.py +222 -0
- pygpt_net/provider/api/google/vision.py +129 -0
- pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
- pygpt_net/provider/api/openai/agents/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
- pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
- pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
- pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
- pygpt_net/provider/api/openai/worker/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
- pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_input/openai_whisper.py +1 -1
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/openai_tts.py +9 -6
- pygpt_net/provider/core/config/patch.py +26 -0
- pygpt_net/provider/core/model/patch.py +20 -0
- pygpt_net/provider/core/preset/json_file.py +2 -4
- pygpt_net/provider/llms/anthropic.py +2 -5
- pygpt_net/provider/llms/base.py +4 -3
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/llms/openai.py +1 -1
- pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
- pygpt_net/ui/dialog/preset.py +71 -55
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/main.py +6 -4
- pygpt_net/ui/widget/option/combo.py +15 -1
- pygpt_net/utils.py +9 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
- /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from typing import Optional, Dict, Any
|
|
14
|
+
|
|
15
|
+
from pygpt_net.core.events import RealtimeEvent
|
|
16
|
+
from pygpt_net.core.realtime.options import RealtimeOptions
|
|
17
|
+
from pygpt_net.core.bridge.context import BridgeContext
|
|
18
|
+
from pygpt_net.core.realtime.shared.session import extract_last_session_id
|
|
19
|
+
from pygpt_net.item.model import ModelItem
|
|
20
|
+
|
|
21
|
+
from .client import GoogleLiveClient
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Realtime:
|
|
25
|
+
|
|
26
|
+
PROVIDER = "google"
|
|
27
|
+
|
|
28
|
+
def __init__(self, window=None):
|
|
29
|
+
"""
|
|
30
|
+
Google GenAI API realtime controller
|
|
31
|
+
|
|
32
|
+
:param window: Window instance
|
|
33
|
+
"""
|
|
34
|
+
self.window = window
|
|
35
|
+
self.handler = GoogleLiveClient(window)
|
|
36
|
+
self.prev_auto_turn = False
|
|
37
|
+
self.prev_vad_silence = 2000
|
|
38
|
+
self.prev_vad_prefix = 300
|
|
39
|
+
|
|
40
|
+
def begin(
|
|
41
|
+
self,
|
|
42
|
+
context: BridgeContext,
|
|
43
|
+
model: Optional[ModelItem] = None,
|
|
44
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
45
|
+
rt_signals=None
|
|
46
|
+
) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Begin realtime session if applicable
|
|
49
|
+
|
|
50
|
+
:param context: BridgeContext
|
|
51
|
+
:param model: Optional[ModelItem]
|
|
52
|
+
:param extra: Optional dict with extra parameters
|
|
53
|
+
:param rt_signals: Optional RealtimeSignals
|
|
54
|
+
:return: bool - True if realtime session started, False otherwise
|
|
55
|
+
"""
|
|
56
|
+
# Build realtime options
|
|
57
|
+
mm = context.multimodal_ctx
|
|
58
|
+
audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
|
|
59
|
+
audio_format = getattr(mm, "audio_format", None) if mm else None
|
|
60
|
+
audio_rate = getattr(mm, "audio_rate", None) if mm else None
|
|
61
|
+
is_debug = self.window.core.config.get("log.realtime", False)
|
|
62
|
+
auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
|
|
63
|
+
opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
|
|
64
|
+
opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
|
|
65
|
+
|
|
66
|
+
# setup manager
|
|
67
|
+
self.window.controller.realtime.set_current_active(self.PROVIDER)
|
|
68
|
+
self.window.controller.realtime.set_busy()
|
|
69
|
+
self.handler.set_debug(is_debug)
|
|
70
|
+
|
|
71
|
+
# handle sub-reply (tool results from tool calls)
|
|
72
|
+
if context.ctx.internal:
|
|
73
|
+
if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
|
|
74
|
+
tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
|
|
75
|
+
tool_call_id = None
|
|
76
|
+
if isinstance(tool_calls, list) and len(tool_calls) > 0:
|
|
77
|
+
tool_call_id = tool_calls[0].get("call_id", "") # get first call_id
|
|
78
|
+
if not tool_call_id:
|
|
79
|
+
tool_call_id = tool_calls[0].get("id", "") # fallback to id
|
|
80
|
+
if tool_call_id:
|
|
81
|
+
tool_results = context.ctx.input
|
|
82
|
+
try:
|
|
83
|
+
tool_results = json.loads(tool_results)
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
self.handler.send_tool_results_sync({
|
|
87
|
+
tool_call_id: tool_results
|
|
88
|
+
})
|
|
89
|
+
return True # do not start new session, just send tool results
|
|
90
|
+
|
|
91
|
+
# update auto-turn in active session
|
|
92
|
+
if (self.handler.is_session_active()
|
|
93
|
+
and (auto_turn != self.prev_auto_turn
|
|
94
|
+
or opt_vad_silence != self.prev_vad_silence
|
|
95
|
+
or opt_vad_prefix != self.prev_vad_prefix)):
|
|
96
|
+
self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
|
|
97
|
+
|
|
98
|
+
# Tools
|
|
99
|
+
tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
|
|
100
|
+
remote_tools = self.window.core.api.google.build_remote_tools(model)
|
|
101
|
+
if tools:
|
|
102
|
+
remote_tools = [] # in Google, remote tools are not allowed if function calling is used
|
|
103
|
+
|
|
104
|
+
# if auto-turn is enabled and prompt is empty, update session and context only
|
|
105
|
+
if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
|
|
106
|
+
self.handler.update_session_tools_sync(tools, remote_tools)
|
|
107
|
+
self.handler.update_ctx(context.ctx)
|
|
108
|
+
return True # do not send new request if session is active
|
|
109
|
+
|
|
110
|
+
# Last session ID
|
|
111
|
+
last_session_id = extract_last_session_id(context.history)
|
|
112
|
+
if is_debug:
|
|
113
|
+
print("[realtime session] Last ID", last_session_id)
|
|
114
|
+
|
|
115
|
+
# Voice
|
|
116
|
+
voice_name = "Kore"
|
|
117
|
+
try:
|
|
118
|
+
v = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
|
|
119
|
+
if v:
|
|
120
|
+
mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse",
|
|
121
|
+
"legend": "Legend"}
|
|
122
|
+
voice_name = mapping.get(str(v).strip().lower(), str(v))
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
# Options
|
|
127
|
+
opts = RealtimeOptions(
|
|
128
|
+
provider=self.PROVIDER,
|
|
129
|
+
model=model.id,
|
|
130
|
+
system_prompt=context.system_prompt,
|
|
131
|
+
prompt=context.prompt,
|
|
132
|
+
voice=voice_name,
|
|
133
|
+
audio_data=audio_bytes,
|
|
134
|
+
audio_format=audio_format,
|
|
135
|
+
audio_rate=audio_rate,
|
|
136
|
+
vad=None,
|
|
137
|
+
extra=extra or {},
|
|
138
|
+
tools=tools,
|
|
139
|
+
remote_tools=remote_tools,
|
|
140
|
+
rt_signals=rt_signals,
|
|
141
|
+
rt_session_id=last_session_id,
|
|
142
|
+
auto_turn=auto_turn,
|
|
143
|
+
vad_end_silence_ms=opt_vad_silence,
|
|
144
|
+
vad_prefix_padding_ms=opt_vad_prefix,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Start or append to realtime session via manager
|
|
148
|
+
try:
|
|
149
|
+
if is_debug:
|
|
150
|
+
print("[realtime] Starting session with options:", opts.to_dict())
|
|
151
|
+
rt = self.window.controller.realtime.manager
|
|
152
|
+
rt.start(context.ctx, opts)
|
|
153
|
+
|
|
154
|
+
self.prev_auto_turn = auto_turn
|
|
155
|
+
self.prev_vad_silence = opt_vad_silence
|
|
156
|
+
self.prev_vad_prefix = opt_vad_prefix
|
|
157
|
+
return True
|
|
158
|
+
except Exception as e:
|
|
159
|
+
self.window.core.debug.log(e)
|
|
160
|
+
return False # fallback to non-live path
|
|
161
|
+
|
|
162
|
+
def handle_audio_input(self, event: RealtimeEvent):
|
|
163
|
+
"""
|
|
164
|
+
Handle Realtime audio input event
|
|
165
|
+
|
|
166
|
+
:param event: RealtimeEvent
|
|
167
|
+
"""
|
|
168
|
+
self.handler.rt_handle_audio_input_sync(event)
|
|
169
|
+
|
|
170
|
+
def manual_commit(self):
|
|
171
|
+
"""Manually commit audio input to realtime session"""
|
|
172
|
+
self.handler.force_response_now_sync()
|
|
173
|
+
|
|
174
|
+
def shutdown(self):
|
|
175
|
+
"""Shutdown realtime loops"""
|
|
176
|
+
if self.handler.is_session_active():
|
|
177
|
+
self.handler.close_session_sync()
|
|
178
|
+
try:
|
|
179
|
+
self.handler.stop_loop_sync()
|
|
180
|
+
except Exception:
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
def reset(self):
|
|
184
|
+
"""Close realtime session"""
|
|
185
|
+
if self.handler.is_session_active():
|
|
186
|
+
self.handler.close_session_sync()
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.28 20:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from typing import List, Any, Dict, Optional
|
|
14
|
+
|
|
15
|
+
from google.genai import types as gtypes
|
|
16
|
+
from pygpt_net.item.model import ModelItem
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Tools:
|
|
20
|
+
def __init__(self, window=None):
|
|
21
|
+
"""
|
|
22
|
+
Tools mapper for Google GenAI
|
|
23
|
+
|
|
24
|
+
:param window: Window instance
|
|
25
|
+
"""
|
|
26
|
+
self.window = window
|
|
27
|
+
|
|
28
|
+
# -------- SANITIZER --------
|
|
29
|
+
def _sanitize_schema(self, schema: Any) -> Any:
|
|
30
|
+
"""
|
|
31
|
+
Sanitize JSON Schema dict by removing unsupported keywords and normalizing types.
|
|
32
|
+
|
|
33
|
+
1. Remove unsupported keywords like additionalProperties, patternProperties,
|
|
34
|
+
dependencies, oneOf, anyOf, allOf, $ref, $defs, examples, readOnly, writeOnly.
|
|
35
|
+
2. Normalize 'type' to a single value (e.g., if it's a list, take the first non-null type).
|
|
36
|
+
3. Ensure 'enum' is only present for string types.
|
|
37
|
+
4. Recursively sanitize nested schemas in 'properties' and 'items'.
|
|
38
|
+
5. Handle arrays by ensuring 'items' is a single schema.
|
|
39
|
+
6. Handle objects by ensuring 'properties' is a dict and 'required' is a list of strings.
|
|
40
|
+
|
|
41
|
+
:param schema: Any JSON Schema as dict or list
|
|
42
|
+
:return: Sanitized schema dict
|
|
43
|
+
"""
|
|
44
|
+
if isinstance(schema, list):
|
|
45
|
+
return self._sanitize_schema(schema[0]) if schema else {}
|
|
46
|
+
|
|
47
|
+
if not isinstance(schema, dict):
|
|
48
|
+
return schema
|
|
49
|
+
|
|
50
|
+
banned = {
|
|
51
|
+
"additionalProperties",
|
|
52
|
+
"additional_properties",
|
|
53
|
+
"unevaluatedProperties",
|
|
54
|
+
"patternProperties",
|
|
55
|
+
"dependencies",
|
|
56
|
+
"dependentSchemas",
|
|
57
|
+
"dependentRequired",
|
|
58
|
+
"oneOf",
|
|
59
|
+
"anyOf",
|
|
60
|
+
"allOf",
|
|
61
|
+
"$defs",
|
|
62
|
+
"$ref",
|
|
63
|
+
"$schema",
|
|
64
|
+
"$id",
|
|
65
|
+
"examples",
|
|
66
|
+
"readOnly",
|
|
67
|
+
"writeOnly",
|
|
68
|
+
"nullable",
|
|
69
|
+
}
|
|
70
|
+
for k in list(schema.keys()):
|
|
71
|
+
if k in banned:
|
|
72
|
+
schema.pop(k, None)
|
|
73
|
+
|
|
74
|
+
# Union -> first non-null type
|
|
75
|
+
t = schema.get("type")
|
|
76
|
+
if isinstance(t, list):
|
|
77
|
+
t_no_null = [x for x in t if x != "null"]
|
|
78
|
+
schema["type"] = t_no_null[0] if t_no_null else "string"
|
|
79
|
+
|
|
80
|
+
# enum only for string
|
|
81
|
+
if "enum" in schema and schema.get("type") not in ("string", "STRING"):
|
|
82
|
+
schema.pop("enum", None)
|
|
83
|
+
|
|
84
|
+
# object
|
|
85
|
+
if (schema.get("type") or "").lower() == "object":
|
|
86
|
+
props = schema.get("properties")
|
|
87
|
+
if not isinstance(props, dict):
|
|
88
|
+
props = {}
|
|
89
|
+
clean_props: Dict[str, Any] = {}
|
|
90
|
+
for pname, pval in props.items():
|
|
91
|
+
clean_props[pname] = self._sanitize_schema(pval)
|
|
92
|
+
schema["properties"] = clean_props
|
|
93
|
+
|
|
94
|
+
req = schema.get("required")
|
|
95
|
+
if not isinstance(req, list) or not all(isinstance(x, str) for x in req):
|
|
96
|
+
schema.pop("required", None)
|
|
97
|
+
elif len(req) == 0:
|
|
98
|
+
schema.pop("required", None)
|
|
99
|
+
|
|
100
|
+
# array
|
|
101
|
+
if (schema.get("type") or "").lower() == "array":
|
|
102
|
+
items = schema.get("items")
|
|
103
|
+
if isinstance(items, list) and items:
|
|
104
|
+
items = items[0]
|
|
105
|
+
if not isinstance(items, dict):
|
|
106
|
+
items = {"type": "string"}
|
|
107
|
+
schema["items"] = self._sanitize_schema(items)
|
|
108
|
+
|
|
109
|
+
# recursive sanitize
|
|
110
|
+
for k, v in list(schema.items()):
|
|
111
|
+
if isinstance(v, dict):
|
|
112
|
+
schema[k] = self._sanitize_schema(v)
|
|
113
|
+
elif isinstance(v, list):
|
|
114
|
+
schema[k] = [self._sanitize_schema(x) for x in v]
|
|
115
|
+
|
|
116
|
+
return schema
|
|
117
|
+
|
|
118
|
+
# -------- CONVERTER to gtypes.Schema (UPPERCASE) --------
|
|
119
|
+
def _to_gschema(self, schema: Any) -> gtypes.Schema:
|
|
120
|
+
"""
|
|
121
|
+
Convert sanitized dict -> google.genai.types.Schema.
|
|
122
|
+
Enforces UPPERCASE type names (OBJECT, ARRAY, STRING, NUMBER, INTEGER, BOOLEAN).
|
|
123
|
+
|
|
124
|
+
:param schema: Sanitized JSON Schema as dict
|
|
125
|
+
:return: gtypes.Schema
|
|
126
|
+
"""
|
|
127
|
+
TYPE_MAP = {
|
|
128
|
+
"enum": "STRING",
|
|
129
|
+
"ENUM": "STRING",
|
|
130
|
+
"object": "OBJECT",
|
|
131
|
+
"dict": "OBJECT",
|
|
132
|
+
"array": "ARRAY",
|
|
133
|
+
"list": "ARRAY",
|
|
134
|
+
"string": "STRING",
|
|
135
|
+
"number": "NUMBER",
|
|
136
|
+
"float": "NUMBER",
|
|
137
|
+
"integer": "INTEGER",
|
|
138
|
+
"boolean": "BOOLEAN",
|
|
139
|
+
"int": "INTEGER",
|
|
140
|
+
"bool": "BOOLEAN",
|
|
141
|
+
"OBJECT": "OBJECT",
|
|
142
|
+
"DICT": "OBJECT",
|
|
143
|
+
"ARRAY": "ARRAY",
|
|
144
|
+
"LIST": "ARRAY",
|
|
145
|
+
"STRING": "STRING",
|
|
146
|
+
"NUMBER": "NUMBER",
|
|
147
|
+
"FLOAT": "NUMBER",
|
|
148
|
+
"INTEGER": "INTEGER",
|
|
149
|
+
"BOOLEAN": "BOOLEAN",
|
|
150
|
+
"INT": "INTEGER",
|
|
151
|
+
"BOOL": "BOOLEAN",
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if isinstance(schema, gtypes.Schema):
|
|
155
|
+
return schema
|
|
156
|
+
|
|
157
|
+
if not isinstance(schema, dict):
|
|
158
|
+
return gtypes.Schema(type="STRING")
|
|
159
|
+
|
|
160
|
+
t = TYPE_MAP.get(str(schema.get("type", "OBJECT")).upper(), "OBJECT")
|
|
161
|
+
desc = schema.get("description")
|
|
162
|
+
fmt = schema.get("format")
|
|
163
|
+
enum = schema.get("enum") if isinstance(schema.get("enum"), list) else None
|
|
164
|
+
req = schema.get("required") if isinstance(schema.get("required"), list) else None
|
|
165
|
+
|
|
166
|
+
gs = gtypes.Schema(
|
|
167
|
+
type=t,
|
|
168
|
+
description=desc,
|
|
169
|
+
format=fmt,
|
|
170
|
+
enum=enum,
|
|
171
|
+
required=[x for x in (req or []) if isinstance(x, str)] or None,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
props = schema.get("properties")
|
|
175
|
+
if isinstance(props, dict):
|
|
176
|
+
gs.properties = {k: self._to_gschema(v) for k, v in props.items()}
|
|
177
|
+
|
|
178
|
+
items = schema.get("items")
|
|
179
|
+
if isinstance(items, dict):
|
|
180
|
+
gs.items = self._to_gschema(items)
|
|
181
|
+
|
|
182
|
+
return gs
|
|
183
|
+
|
|
184
|
+
def prepare(self, model: ModelItem, functions: list) -> List[gtypes.Tool]:
|
|
185
|
+
"""
|
|
186
|
+
Prepare Google Function Declarations (types.Tool) for google-genai.
|
|
187
|
+
|
|
188
|
+
:param model: ModelItem
|
|
189
|
+
:param functions: List of function definitions as dicts with 'name', 'desc', 'params' (JSON Schema)
|
|
190
|
+
:return: List of gtypes.Tool
|
|
191
|
+
"""
|
|
192
|
+
if not functions or not isinstance(functions, list):
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
fds: List[gtypes.FunctionDeclaration] = []
|
|
196
|
+
for function in functions:
|
|
197
|
+
name = str(function.get("name") or "").strip()
|
|
198
|
+
if not name:
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
desc = function.get("desc") or ""
|
|
202
|
+
params: Optional[dict] = {}
|
|
203
|
+
if function.get("params"):
|
|
204
|
+
try:
|
|
205
|
+
params = json.loads(function["params"])
|
|
206
|
+
except Exception:
|
|
207
|
+
params = {}
|
|
208
|
+
|
|
209
|
+
params = self._sanitize_schema(params or {})
|
|
210
|
+
if not params.get("type"):
|
|
211
|
+
params["type"] = "object"
|
|
212
|
+
|
|
213
|
+
gschema = self._to_gschema(params or {"type": "object"})
|
|
214
|
+
|
|
215
|
+
fd = gtypes.FunctionDeclaration(
|
|
216
|
+
name=name,
|
|
217
|
+
description=desc,
|
|
218
|
+
parameters=gschema,
|
|
219
|
+
)
|
|
220
|
+
fds.append(fd)
|
|
221
|
+
|
|
222
|
+
return [gtypes.Tool(function_declarations=fds)] if fds else []
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.28 20:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
from typing import Optional, Dict, List, Union
|
|
14
|
+
|
|
15
|
+
from google.genai.types import Part
|
|
16
|
+
|
|
17
|
+
from pygpt_net.item.attachment import AttachmentItem
|
|
18
|
+
from pygpt_net.item.ctx import CtxItem
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Vision:
|
|
22
|
+
def __init__(self, window=None):
|
|
23
|
+
"""
|
|
24
|
+
Vision helpers for Google GenAI
|
|
25
|
+
|
|
26
|
+
:param window: Window instance
|
|
27
|
+
"""
|
|
28
|
+
self.window = window
|
|
29
|
+
self.attachments: Dict[str, str] = {}
|
|
30
|
+
self.urls: List[str] = []
|
|
31
|
+
self.input_tokens = 0
|
|
32
|
+
|
|
33
|
+
def build_parts(
|
|
34
|
+
self,
|
|
35
|
+
content: Union[str, list],
|
|
36
|
+
attachments: Optional[Dict[str, AttachmentItem]] = None,
|
|
37
|
+
) -> List[Part]:
|
|
38
|
+
"""
|
|
39
|
+
Build image parts from local attachments (inline bytes)
|
|
40
|
+
|
|
41
|
+
:param content: Message content (str or list)
|
|
42
|
+
:param attachments: Attachments dict (id -> AttachmentItem)
|
|
43
|
+
:return: List of Parts
|
|
44
|
+
"""
|
|
45
|
+
parts: List[Part] = []
|
|
46
|
+
self.attachments = {}
|
|
47
|
+
self.urls = []
|
|
48
|
+
|
|
49
|
+
if attachments:
|
|
50
|
+
for id_, attachment in attachments.items():
|
|
51
|
+
if attachment.path and os.path.exists(attachment.path):
|
|
52
|
+
if self.is_image(attachment.path):
|
|
53
|
+
mime = self._guess_mime(attachment.path)
|
|
54
|
+
with open(attachment.path, "rb") as f:
|
|
55
|
+
data = f.read()
|
|
56
|
+
parts.append(Part.from_bytes(data=data, mime_type=mime))
|
|
57
|
+
self.attachments[id_] = attachment.path
|
|
58
|
+
attachment.consumed = True
|
|
59
|
+
|
|
60
|
+
return parts
|
|
61
|
+
|
|
62
|
+
def is_image(self, path: str) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Check if path looks like an image
|
|
65
|
+
|
|
66
|
+
:param path: File path
|
|
67
|
+
:return: True if image, False otherwise
|
|
68
|
+
"""
|
|
69
|
+
return path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif', '.webp'))
|
|
70
|
+
|
|
71
|
+
def _guess_mime(self, path: str) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Guess mime type from file extension
|
|
74
|
+
|
|
75
|
+
:param path: File path
|
|
76
|
+
:return: Mime type string
|
|
77
|
+
"""
|
|
78
|
+
ext = os.path.splitext(path)[1].lower().lstrip(".")
|
|
79
|
+
if ext in ("jpg", "jpeg"):
|
|
80
|
+
return "image/jpeg"
|
|
81
|
+
if ext == "png":
|
|
82
|
+
return "image/png"
|
|
83
|
+
if ext == "gif":
|
|
84
|
+
return "image/gif"
|
|
85
|
+
if ext == "bmp":
|
|
86
|
+
return "image/bmp"
|
|
87
|
+
if ext == "webp":
|
|
88
|
+
return "image/webp"
|
|
89
|
+
if ext == "tiff":
|
|
90
|
+
return "image/tiff"
|
|
91
|
+
return "image/jpeg"
|
|
92
|
+
|
|
93
|
+
def append_images(self, ctx: CtxItem):
|
|
94
|
+
"""
|
|
95
|
+
Append sent images paths to context for UI/history
|
|
96
|
+
|
|
97
|
+
:param ctx: CtxItem
|
|
98
|
+
"""
|
|
99
|
+
images = self.get_attachments()
|
|
100
|
+
if len(images) > 0:
|
|
101
|
+
ctx.images = self.window.core.filesystem.make_local_list(list(images.values()))
|
|
102
|
+
|
|
103
|
+
def get_attachments(self) -> Dict[str, str]:
|
|
104
|
+
"""
|
|
105
|
+
Return attachments dict (id -> path)
|
|
106
|
+
|
|
107
|
+
:return: Dict of attachments
|
|
108
|
+
"""
|
|
109
|
+
return self.attachments
|
|
110
|
+
|
|
111
|
+
def get_urls(self) -> List[str]:
|
|
112
|
+
"""
|
|
113
|
+
Return image urls (unused here)
|
|
114
|
+
|
|
115
|
+
:return: List of URLs
|
|
116
|
+
"""
|
|
117
|
+
return self.urls
|
|
118
|
+
|
|
119
|
+
def reset_tokens(self):
|
|
120
|
+
"""Reset input tokens counter"""
|
|
121
|
+
self.input_tokens = 0
|
|
122
|
+
|
|
123
|
+
def get_used_tokens(self) -> int:
|
|
124
|
+
"""
|
|
125
|
+
Return input tokens counter
|
|
126
|
+
|
|
127
|
+
:return: Number of input tokens
|
|
128
|
+
"""
|
|
129
|
+
return self.input_tokens
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from openai import OpenAI
|
|
@@ -33,13 +33,14 @@ from .container import Container
|
|
|
33
33
|
from .image import Image
|
|
34
34
|
from .remote_tools import RemoteTools
|
|
35
35
|
from .responses import Responses
|
|
36
|
+
from .realtime import Realtime
|
|
36
37
|
from .store import Store
|
|
37
38
|
from .summarizer import Summarizer
|
|
38
39
|
from .tools import Tools
|
|
39
40
|
from .vision import Vision
|
|
40
41
|
|
|
41
42
|
|
|
42
|
-
class
|
|
43
|
+
class ApiOpenAI:
|
|
43
44
|
|
|
44
45
|
def __init__(self, window=None):
|
|
45
46
|
"""
|
|
@@ -57,6 +58,7 @@ class Gpt:
|
|
|
57
58
|
self.image = Image(window)
|
|
58
59
|
self.remote_tools = RemoteTools(window)
|
|
59
60
|
self.responses = Responses(window)
|
|
61
|
+
self.realtime = Realtime(window)
|
|
60
62
|
self.store = Store(window)
|
|
61
63
|
self.summarizer = Summarizer(window)
|
|
62
64
|
self.tools = Tools(window)
|
|
@@ -90,12 +92,18 @@ class Gpt:
|
|
|
90
92
|
self.last_client_args = args
|
|
91
93
|
return self.client
|
|
92
94
|
|
|
93
|
-
def call(
|
|
95
|
+
def call(
|
|
96
|
+
self,
|
|
97
|
+
context: BridgeContext,
|
|
98
|
+
extra: dict = None,
|
|
99
|
+
rt_signals = None
|
|
100
|
+
) -> bool:
|
|
94
101
|
"""
|
|
95
102
|
Call OpenAI API
|
|
96
103
|
|
|
97
104
|
:param context: Bridge context
|
|
98
105
|
:param extra: Extra arguments
|
|
106
|
+
:param rt_signals: Realtime signals for audio streaming
|
|
99
107
|
:return: result
|
|
100
108
|
"""
|
|
101
109
|
mode = context.mode
|
|
@@ -145,6 +153,18 @@ class Gpt:
|
|
|
145
153
|
MODE_RESEARCH,
|
|
146
154
|
MODE_COMPUTER,
|
|
147
155
|
]:
|
|
156
|
+
if mode == MODE_AUDIO and stream:
|
|
157
|
+
|
|
158
|
+
# Realtime API for audio streaming
|
|
159
|
+
is_realtime = self.realtime.begin(
|
|
160
|
+
context=context,
|
|
161
|
+
model=model,
|
|
162
|
+
extra=extra or {},
|
|
163
|
+
rt_signals=rt_signals
|
|
164
|
+
)
|
|
165
|
+
if is_realtime:
|
|
166
|
+
return True
|
|
167
|
+
|
|
148
168
|
# responses API
|
|
149
169
|
if use_responses_api:
|
|
150
170
|
response = self.responses.send(
|
|
@@ -281,7 +301,7 @@ class Gpt:
|
|
|
281
301
|
# additional_kwargs["max_tokens"] = max_tokens
|
|
282
302
|
|
|
283
303
|
# tools / functions
|
|
284
|
-
tools = self.window.core.
|
|
304
|
+
tools = self.window.core.api.openai.tools.prepare(model, functions)
|
|
285
305
|
if len(tools) > 0 and "disable_tools" not in extra:
|
|
286
306
|
additional_kwargs["tools"] = tools
|
|
287
307
|
|
|
File without changes
|
|
@@ -48,7 +48,7 @@ class LocalComputer(Computer):
|
|
|
48
48
|
|
|
49
49
|
:return: Environment of the computer, such as "mac", "windows", "ubuntu", or "browser".
|
|
50
50
|
"""
|
|
51
|
-
return self.window.core.
|
|
51
|
+
return self.window.core.api.openai.computer.get_current_env()
|
|
52
52
|
|
|
53
53
|
@property
|
|
54
54
|
def dimensions(self) -> tuple[int, int]:
|
|
@@ -13,7 +13,7 @@ from agents import (
|
|
|
13
13
|
from pygpt_net.item.model import ModelItem
|
|
14
14
|
from pygpt_net.item.preset import PresetItem
|
|
15
15
|
|
|
16
|
-
from pygpt_net.provider.
|
|
16
|
+
from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def get_experts(
|
|
@@ -169,7 +169,7 @@ class StreamHandler:
|
|
|
169
169
|
self.files_handled = True
|
|
170
170
|
self.window.core.debug.info("[chat] Container files found, downloading...")
|
|
171
171
|
try:
|
|
172
|
-
self.window.core.
|
|
172
|
+
self.window.core.api.openai.container.download_files(ctx, self.files)
|
|
173
173
|
except Exception as e:
|
|
174
174
|
self.window.core.debug.error(f"[chat] Error downloading container files: {e}")
|
|
175
175
|
|