pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/{container.py → app_core.py} +5 -6
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/access/control.py +1 -9
- pygpt_net/controller/assistant/assistant.py +4 -4
- pygpt_net/controller/assistant/batch.py +7 -7
- pygpt_net/controller/assistant/files.py +4 -4
- pygpt_net/controller/assistant/threads.py +3 -3
- pygpt_net/controller/attachment/attachment.py +4 -7
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +30 -4
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +4 -405
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/chat/vision.py +11 -19
- pygpt_net/controller/config/placeholder.py +1 -1
- pygpt_net/controller/ctx/ctx.py +1 -1
- pygpt_net/controller/ctx/summarizer.py +1 -1
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/mode/mode.py +21 -12
- pygpt_net/controller/plugins/settings.py +3 -2
- pygpt_net/controller/presets/editor.py +112 -99
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/theme/theme.py +3 -2
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/controller/ui/vision.py +4 -4
- pygpt_net/core/agents/legacy.py +2 -2
- pygpt_net/core/agents/runners/openai_workflow.py +2 -2
- pygpt_net/core/assistants/files.py +5 -5
- pygpt_net/core/assistants/store.py +4 -4
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +4 -3
- pygpt_net/core/bridge/worker.py +31 -9
- pygpt_net/core/debug/console/console.py +2 -2
- pygpt_net/core/debug/presets.py +2 -2
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/experts/experts.py +2 -2
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/modes/modes.py +2 -2
- pygpt_net/core/presets/presets.py +3 -3
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/tokens/tokens.py +4 -4
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/core/types/mode.py +5 -2
- pygpt_net/core/vision/analyzer.py +1 -1
- pygpt_net/data/config/config.json +13 -4
- pygpt_net/data/config/models.json +219 -101
- pygpt_net/data/config/modes.json +3 -9
- pygpt_net/data/config/settings.json +135 -27
- pygpt_net/data/config/settings_section.json +2 -2
- pygpt_net/data/locale/locale.de.ini +7 -7
- pygpt_net/data/locale/locale.en.ini +25 -12
- pygpt_net/data/locale/locale.es.ini +7 -7
- pygpt_net/data/locale/locale.fr.ini +7 -7
- pygpt_net/data/locale/locale.it.ini +7 -7
- pygpt_net/data/locale/locale.pl.ini +8 -8
- pygpt_net/data/locale/locale.uk.ini +7 -7
- pygpt_net/data/locale/locale.zh.ini +3 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/item/model.py +23 -3
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/plugin/openai_dalle/plugin.py +4 -4
- pygpt_net/plugin/openai_vision/plugin.py +12 -13
- pygpt_net/provider/agents/openai/agent.py +5 -5
- pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
- pygpt_net/provider/agents/openai/agent_planner.py +5 -6
- pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
- pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
- pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
- pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
- pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
- pygpt_net/provider/agents/openai/evolve.py +5 -5
- pygpt_net/provider/agents/openai/supervisor.py +4 -4
- pygpt_net/provider/api/__init__.py +27 -0
- pygpt_net/provider/api/anthropic/__init__.py +68 -0
- pygpt_net/provider/api/google/__init__.py +295 -0
- pygpt_net/provider/api/google/audio.py +121 -0
- pygpt_net/provider/api/google/chat.py +591 -0
- pygpt_net/provider/api/google/image.py +427 -0
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/tools.py +222 -0
- pygpt_net/provider/api/google/vision.py +129 -0
- pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
- pygpt_net/provider/api/openai/agents/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
- pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
- pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
- pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
- pygpt_net/provider/api/openai/worker/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
- pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_input/openai_whisper.py +1 -1
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/openai_tts.py +9 -6
- pygpt_net/provider/core/config/patch.py +26 -0
- pygpt_net/provider/core/model/patch.py +20 -0
- pygpt_net/provider/core/preset/json_file.py +2 -4
- pygpt_net/provider/llms/anthropic.py +2 -5
- pygpt_net/provider/llms/base.py +4 -3
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/llms/openai.py +1 -1
- pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
- pygpt_net/ui/dialog/preset.py +71 -55
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/main.py +6 -4
- pygpt_net/ui/widget/option/combo.py +15 -1
- pygpt_net/utils.py +9 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
- /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from typing import Optional, Dict, Any
|
|
14
|
+
|
|
15
|
+
from pygpt_net.core.bridge import BridgeContext
|
|
16
|
+
from pygpt_net.core.events import RealtimeEvent
|
|
17
|
+
from pygpt_net.core.realtime.options import RealtimeOptions
|
|
18
|
+
from pygpt_net.core.realtime.shared.session import extract_last_session_id
|
|
19
|
+
from pygpt_net.item.model import ModelItem
|
|
20
|
+
from pygpt_net.utils import trans
|
|
21
|
+
|
|
22
|
+
from .client import OpenAIRealtimeClient
|
|
23
|
+
|
|
24
|
+
class Realtime:
|
|
25
|
+
|
|
26
|
+
PROVIDER = "openai"
|
|
27
|
+
|
|
28
|
+
def __init__(self, window=None):
|
|
29
|
+
"""
|
|
30
|
+
OpenAI API realtime controller
|
|
31
|
+
|
|
32
|
+
:param window: Window instance
|
|
33
|
+
"""
|
|
34
|
+
self.window = window
|
|
35
|
+
self.handler = OpenAIRealtimeClient(window)
|
|
36
|
+
self.prev_auto_turn = False
|
|
37
|
+
self.prev_vad_silence = 2000
|
|
38
|
+
self.prev_vad_prefix = 300
|
|
39
|
+
|
|
40
|
+
def begin(
|
|
41
|
+
self,
|
|
42
|
+
context: BridgeContext,
|
|
43
|
+
model: Optional[ModelItem] = None,
|
|
44
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
45
|
+
rt_signals=None
|
|
46
|
+
) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Begin realtime session if applicable
|
|
49
|
+
|
|
50
|
+
:param context: BridgeContext
|
|
51
|
+
:param model: Optional[ModelItem]
|
|
52
|
+
:param extra: Optional dict with extra parameters
|
|
53
|
+
:param rt_signals: RealtimeSignals
|
|
54
|
+
:return: True if realtime session started, False otherwise
|
|
55
|
+
"""
|
|
56
|
+
mm = context.multimodal_ctx
|
|
57
|
+
audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
|
|
58
|
+
audio_format = getattr(mm, "audio_format", None) if mm else None
|
|
59
|
+
audio_rate = getattr(mm, "audio_rate", None) if mm else None
|
|
60
|
+
is_debug = self.window.core.config.get("log.realtime", False)
|
|
61
|
+
auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
|
|
62
|
+
opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
|
|
63
|
+
opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
|
|
64
|
+
|
|
65
|
+
# setup manager
|
|
66
|
+
self.window.controller.realtime.set_current_active(self.PROVIDER)
|
|
67
|
+
self.window.controller.realtime.set_busy()
|
|
68
|
+
self.handler.set_debug(is_debug)
|
|
69
|
+
|
|
70
|
+
# tools
|
|
71
|
+
tools = self.window.core.api.openai.tools.prepare(model, context.external_functions)
|
|
72
|
+
|
|
73
|
+
# remote tools
|
|
74
|
+
remote_tools = []
|
|
75
|
+
remote_tools = self.window.core.api.openai.remote_tools.append_to_tools(
|
|
76
|
+
mode=context.mode,
|
|
77
|
+
model=model,
|
|
78
|
+
stream=context.stream,
|
|
79
|
+
is_expert_call=context.is_expert_call,
|
|
80
|
+
tools=remote_tools,
|
|
81
|
+
preset=context.preset,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# handle sub-reply (tool results from tool calls)
|
|
85
|
+
if context.ctx.internal:
|
|
86
|
+
if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
|
|
87
|
+
tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
|
|
88
|
+
tool_call_id = None
|
|
89
|
+
if isinstance(tool_calls, list) and len(tool_calls) > 0:
|
|
90
|
+
tool_call_id = tool_calls[0].get("call_id", "") # get first call_id
|
|
91
|
+
if not tool_call_id:
|
|
92
|
+
tool_call_id = tool_calls[0].get("id", "") # fallback to id
|
|
93
|
+
if tool_call_id:
|
|
94
|
+
tool_results = context.ctx.input
|
|
95
|
+
try:
|
|
96
|
+
tool_results = json.loads(tool_results)
|
|
97
|
+
except Exception:
|
|
98
|
+
pass
|
|
99
|
+
self.handler.send_tool_results_sync({
|
|
100
|
+
tool_call_id: tool_results
|
|
101
|
+
})
|
|
102
|
+
self.handler.update_ctx(context.ctx)
|
|
103
|
+
return True # do not start new session, just send tool results
|
|
104
|
+
|
|
105
|
+
# update auto-turn in active session
|
|
106
|
+
if (self.handler.is_session_active()
|
|
107
|
+
and (auto_turn != self.prev_auto_turn
|
|
108
|
+
or opt_vad_silence != self.prev_vad_silence
|
|
109
|
+
or opt_vad_prefix != self.prev_vad_prefix)):
|
|
110
|
+
print("updating")
|
|
111
|
+
self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
|
|
112
|
+
|
|
113
|
+
# if auto-turn is enabled and prompt is empty, update session and context only
|
|
114
|
+
if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
|
|
115
|
+
self.handler.update_session_tools_sync(tools, remote_tools)
|
|
116
|
+
self.handler.update_ctx(context.ctx)
|
|
117
|
+
self.window.update_status(trans("speech.listening"))
|
|
118
|
+
return True # do not send new request if session is active
|
|
119
|
+
|
|
120
|
+
# Last session ID
|
|
121
|
+
last_session_id = extract_last_session_id(context.history)
|
|
122
|
+
if is_debug:
|
|
123
|
+
print("[realtime session] Last ID", last_session_id)
|
|
124
|
+
|
|
125
|
+
# Voice
|
|
126
|
+
voice = "alloy"
|
|
127
|
+
try:
|
|
128
|
+
v = self.window.core.plugins.get_option("audio_output", "openai_voice")
|
|
129
|
+
if v:
|
|
130
|
+
voice = str(v)
|
|
131
|
+
except Exception:
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
# Options
|
|
135
|
+
opts = RealtimeOptions(
|
|
136
|
+
provider=self.PROVIDER,
|
|
137
|
+
model=context.model.id,
|
|
138
|
+
system_prompt=context.system_prompt,
|
|
139
|
+
prompt=context.prompt,
|
|
140
|
+
voice=voice,
|
|
141
|
+
audio_data=audio_bytes,
|
|
142
|
+
audio_format=audio_format,
|
|
143
|
+
audio_rate=audio_rate,
|
|
144
|
+
vad="server_vad",
|
|
145
|
+
extra=extra or {},
|
|
146
|
+
tools=tools,
|
|
147
|
+
remote_tools=remote_tools,
|
|
148
|
+
rt_signals=rt_signals,
|
|
149
|
+
rt_session_id=last_session_id,
|
|
150
|
+
auto_turn=auto_turn,
|
|
151
|
+
vad_end_silence_ms=opt_vad_silence,
|
|
152
|
+
vad_prefix_padding_ms=opt_vad_prefix,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Start or append to realtime session via manager
|
|
156
|
+
try:
|
|
157
|
+
if is_debug:
|
|
158
|
+
print("[realtime] Starting session with options:", opts.to_dict())
|
|
159
|
+
rt = self.window.controller.realtime.manager
|
|
160
|
+
rt.start(context.ctx, opts)
|
|
161
|
+
|
|
162
|
+
self.prev_auto_turn = auto_turn
|
|
163
|
+
self.prev_vad_silence = opt_vad_silence
|
|
164
|
+
self.prev_vad_prefix = opt_vad_prefix
|
|
165
|
+
return True
|
|
166
|
+
except Exception as e:
|
|
167
|
+
self.window.core.debug.log(e)
|
|
168
|
+
return False # fallback to non-live path
|
|
169
|
+
|
|
170
|
+
def handle_audio_input(self, event: RealtimeEvent):
|
|
171
|
+
"""
|
|
172
|
+
Handle Realtime audio input event
|
|
173
|
+
|
|
174
|
+
:param event: RealtimeEvent
|
|
175
|
+
"""
|
|
176
|
+
self.handler.rt_handle_audio_input_sync(event)
|
|
177
|
+
|
|
178
|
+
def manual_commit(self):
|
|
179
|
+
"""Manually commit audio input to realtime session"""
|
|
180
|
+
self.handler.force_response_now_sync()
|
|
181
|
+
|
|
182
|
+
def shutdown(self):
|
|
183
|
+
"""Shutdown realtime loops"""
|
|
184
|
+
if self.handler.is_session_active():
|
|
185
|
+
self.handler.close_session_sync()
|
|
186
|
+
try:
|
|
187
|
+
self.handler.stop_loop_sync()
|
|
188
|
+
except Exception:
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
def reset(self):
|
|
192
|
+
"""Close realtime session"""
|
|
193
|
+
if self.handler.is_session_active():
|
|
194
|
+
self.handler.close_session_sync()
|
|
@@ -113,7 +113,7 @@ class RemoteTools:
|
|
|
113
113
|
# extend local tools with remote tools
|
|
114
114
|
if enabled["computer_use"]:
|
|
115
115
|
if not model.id in OPENAI_REMOTE_TOOL_DISABLE_COMPUTER_USE:
|
|
116
|
-
tools.append(self.window.core.
|
|
116
|
+
tools.append(self.window.core.api.openai.computer.get_tool())
|
|
117
117
|
else:
|
|
118
118
|
if not model.id in OPENAI_REMOTE_TOOL_DISABLE_WEB_SEARCH:
|
|
119
119
|
if enabled["web_search"]:
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.28 09:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import base64
|
|
@@ -92,7 +92,7 @@ class Responses:
|
|
|
92
92
|
user_name = ctx.input_name # from ctx
|
|
93
93
|
ai_name = ctx.output_name # from ctx
|
|
94
94
|
|
|
95
|
-
client = self.window.core.
|
|
95
|
+
client = self.window.core.api.openai.get_client(mode, model)
|
|
96
96
|
|
|
97
97
|
# build chat messages
|
|
98
98
|
messages = self.build(
|
|
@@ -122,7 +122,7 @@ class Responses:
|
|
|
122
122
|
response_kwargs = {}
|
|
123
123
|
|
|
124
124
|
# tools / functions
|
|
125
|
-
tools = self.window.core.
|
|
125
|
+
tools = self.window.core.api.openai.tools.prepare_responses_api(model, functions)
|
|
126
126
|
|
|
127
127
|
# extra arguments, o3 only
|
|
128
128
|
if model.extra and "reasoning_effort" in model.extra:
|
|
@@ -130,7 +130,7 @@ class Responses:
|
|
|
130
130
|
response_kwargs['reasoning']['effort'] = model.extra["reasoning_effort"]
|
|
131
131
|
|
|
132
132
|
# append remote tools
|
|
133
|
-
tools = self.window.core.
|
|
133
|
+
tools = self.window.core.api.openai.remote_tools.append_to_tools(
|
|
134
134
|
mode=mode,
|
|
135
135
|
model=model,
|
|
136
136
|
stream=stream,
|
|
@@ -250,20 +250,31 @@ class Responses:
|
|
|
250
250
|
used_tokens,
|
|
251
251
|
max_ctx_tokens,
|
|
252
252
|
)
|
|
253
|
+
|
|
254
|
+
has_response_id_in_last_item = False
|
|
255
|
+
if items and len(items) > 0:
|
|
256
|
+
last_item = items[-1]
|
|
257
|
+
if last_item and last_item.msg_id:
|
|
258
|
+
has_response_id_in_last_item = True
|
|
259
|
+
|
|
253
260
|
for item in items:
|
|
254
261
|
# input
|
|
255
262
|
if item.final_input is not None and item.final_input != "":
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
263
|
+
if not has_response_id_in_last_item:
|
|
264
|
+
messages.append({
|
|
265
|
+
"role": "user",
|
|
266
|
+
"content": item.final_input,
|
|
267
|
+
})
|
|
260
268
|
|
|
261
269
|
# output
|
|
262
270
|
if item.final_output is not None and item.final_output != "":
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
271
|
+
if not has_response_id_in_last_item:
|
|
272
|
+
msg = {
|
|
273
|
+
"role": "assistant",
|
|
274
|
+
"content": item.final_output,
|
|
275
|
+
}
|
|
276
|
+
else:
|
|
277
|
+
msg = {}
|
|
267
278
|
# append previous audio ID
|
|
268
279
|
if MODE_AUDIO in model.mode:
|
|
269
280
|
if item.audio_id:
|
|
@@ -281,7 +292,9 @@ class Responses:
|
|
|
281
292
|
msg["audio"] = {
|
|
282
293
|
"id": self.audio_prev_id
|
|
283
294
|
}
|
|
284
|
-
|
|
295
|
+
|
|
296
|
+
if msg:
|
|
297
|
+
messages.append(msg)
|
|
285
298
|
|
|
286
299
|
# ---- tool output ----
|
|
287
300
|
is_tool_output = False # reset tool output flag
|
|
@@ -340,7 +353,7 @@ class Responses:
|
|
|
340
353
|
|
|
341
354
|
# computer call output
|
|
342
355
|
elif output_type == "computer_call":
|
|
343
|
-
base64img = self.window.core.
|
|
356
|
+
base64img = self.window.core.api.openai.vision.get_attachment(attachments)
|
|
344
357
|
if base64img and "call_id" in tool_call:
|
|
345
358
|
if tool_call["call_id"]:
|
|
346
359
|
# tool output
|
|
@@ -382,13 +395,13 @@ class Responses:
|
|
|
382
395
|
if (model.is_image_input()
|
|
383
396
|
and mode != MODE_COMPUTER
|
|
384
397
|
and not model.id.startswith("computer-use")):
|
|
385
|
-
content = self.window.core.
|
|
398
|
+
content = self.window.core.api.openai.vision.build_content(
|
|
386
399
|
content=content,
|
|
387
400
|
attachments=attachments,
|
|
388
401
|
responses_api=True,
|
|
389
402
|
)
|
|
390
403
|
if model.is_audio_input():
|
|
391
|
-
content = self.window.core.
|
|
404
|
+
content = self.window.core.api.openai.audio.build_content(
|
|
392
405
|
content=content,
|
|
393
406
|
multimodal_ctx=multimodal_ctx,
|
|
394
407
|
)
|
|
@@ -404,6 +417,7 @@ class Responses:
|
|
|
404
417
|
messages,
|
|
405
418
|
model.id,
|
|
406
419
|
)
|
|
420
|
+
|
|
407
421
|
return messages
|
|
408
422
|
|
|
409
423
|
def reset_tokens(self):
|
|
@@ -431,7 +445,7 @@ class Responses:
|
|
|
431
445
|
|
|
432
446
|
if mode in [
|
|
433
447
|
MODE_CHAT,
|
|
434
|
-
MODE_VISION,
|
|
448
|
+
# MODE_VISION,
|
|
435
449
|
MODE_RESEARCH,
|
|
436
450
|
MODE_COMPUTER,
|
|
437
451
|
]:
|
|
@@ -499,7 +513,7 @@ class Responses:
|
|
|
499
513
|
id = output.id
|
|
500
514
|
call_id = output.call_id
|
|
501
515
|
action = output.action
|
|
502
|
-
tool_calls, is_call = self.window.core.
|
|
516
|
+
tool_calls, is_call = self.window.core.api.openai.computer.handle_action(
|
|
503
517
|
id=id,
|
|
504
518
|
call_id=call_id,
|
|
505
519
|
action=action,
|
|
@@ -567,7 +581,7 @@ class Responses:
|
|
|
567
581
|
if files:
|
|
568
582
|
self.window.core.debug.info("[chat] Container files found, downloading...")
|
|
569
583
|
try:
|
|
570
|
-
self.window.core.
|
|
584
|
+
self.window.core.api.openai.container.download_files(ctx, files)
|
|
571
585
|
except Exception as e:
|
|
572
586
|
self.window.core.debug.error(f"[chat] Error downloading container files: {e}")
|
|
573
587
|
|
|
@@ -631,7 +645,7 @@ class Responses:
|
|
|
631
645
|
if files:
|
|
632
646
|
self.window.core.debug.info("[chat] Container files found, downloading...")
|
|
633
647
|
try:
|
|
634
|
-
self.window.core.
|
|
648
|
+
self.window.core.api.openai.container.download_files(ctx, files)
|
|
635
649
|
except Exception as e:
|
|
636
650
|
self.window.core.debug.error(f"[chat] Error downloading container files: {e}")
|
|
637
651
|
|
|
@@ -30,7 +30,7 @@ class Store:
|
|
|
30
30
|
|
|
31
31
|
:return: OpenAI client
|
|
32
32
|
"""
|
|
33
|
-
return self.window.core.
|
|
33
|
+
return self.window.core.api.openai.get_client()
|
|
34
34
|
|
|
35
35
|
def log(
|
|
36
36
|
self,
|
|
@@ -92,7 +92,7 @@ class Store:
|
|
|
92
92
|
:param file_id: file ID
|
|
93
93
|
:param path: path to save file
|
|
94
94
|
"""
|
|
95
|
-
client = self.window.core.
|
|
95
|
+
client = self.window.core.api.openai.get_client()
|
|
96
96
|
content = client.files.content(file_id)
|
|
97
97
|
data = content.read()
|
|
98
98
|
with open(path, 'wb', ) as f:
|
|
File without changes
|
|
@@ -565,7 +565,7 @@ class Worker(QRunnable):
|
|
|
565
565
|
"""
|
|
566
566
|
try:
|
|
567
567
|
if self.stream: # stream mode
|
|
568
|
-
run = self.window.core.
|
|
568
|
+
run = self.window.core.api.openai.assistants.run_create_stream(
|
|
569
569
|
self.signals,
|
|
570
570
|
self.ctx,
|
|
571
571
|
self.thread_id,
|
|
@@ -575,7 +575,7 @@ class Worker(QRunnable):
|
|
|
575
575
|
)
|
|
576
576
|
else:
|
|
577
577
|
# not stream mode
|
|
578
|
-
run = self.window.core.
|
|
578
|
+
run = self.window.core.api.openai.assistants.run_create(
|
|
579
579
|
self.thread_id,
|
|
580
580
|
self.assistant_id,
|
|
581
581
|
self.model,
|
|
@@ -596,7 +596,7 @@ class Worker(QRunnable):
|
|
|
596
596
|
:return: result
|
|
597
597
|
"""
|
|
598
598
|
try:
|
|
599
|
-
response = self.window.core.
|
|
599
|
+
response = self.window.core.api.openai.assistants.msg_send(
|
|
600
600
|
self.thread_id,
|
|
601
601
|
self.prompt,
|
|
602
602
|
self.file_ids,
|
|
@@ -615,7 +615,7 @@ class Worker(QRunnable):
|
|
|
615
615
|
:return: result
|
|
616
616
|
"""
|
|
617
617
|
try:
|
|
618
|
-
run = self.window.core.
|
|
618
|
+
run = self.window.core.api.openai.assistants.run_submit_tool(self.ctx, self.tools_outputs)
|
|
619
619
|
if run is not None:
|
|
620
620
|
self.ctx.run_id = run.id # update run id
|
|
621
621
|
self.signals.finished.emit(self.ctx, run, False) # continue status check
|
|
@@ -237,7 +237,7 @@ class ImportWorker(QRunnable):
|
|
|
237
237
|
self.log("Importing assistants...")
|
|
238
238
|
self.window.core.assistants.clear()
|
|
239
239
|
items = self.window.core.assistants.get_all()
|
|
240
|
-
self.window.core.
|
|
240
|
+
self.window.core.api.openai.assistants.import_all(items, callback=self.callback)
|
|
241
241
|
self.window.core.assistants.items = items
|
|
242
242
|
self.window.core.assistants.save()
|
|
243
243
|
|
|
@@ -266,7 +266,7 @@ class ImportWorker(QRunnable):
|
|
|
266
266
|
self.log("Importing vector stores...")
|
|
267
267
|
self.window.core.assistants.store.clear()
|
|
268
268
|
items = {}
|
|
269
|
-
self.window.core.
|
|
269
|
+
self.window.core.api.openai.store.import_stores(items, callback=self.callback)
|
|
270
270
|
self.window.core.assistants.store.import_items(items)
|
|
271
271
|
if not silent:
|
|
272
272
|
self.signals.finished.emit("vector_stores", self.store_id, len(items))
|
|
@@ -285,7 +285,7 @@ class ImportWorker(QRunnable):
|
|
|
285
285
|
"""
|
|
286
286
|
try:
|
|
287
287
|
self.log("Truncating stores...")
|
|
288
|
-
num = self.window.core.
|
|
288
|
+
num = self.window.core.api.openai.store.remove_all(callback=self.callback)
|
|
289
289
|
self.window.core.assistants.store.items = {}
|
|
290
290
|
self.window.core.assistants.store.save()
|
|
291
291
|
if not silent:
|
|
@@ -336,12 +336,12 @@ class ImportWorker(QRunnable):
|
|
|
336
336
|
self.log("Truncating all files...")
|
|
337
337
|
self.window.core.assistants.files.truncate() # clear all files
|
|
338
338
|
# remove all files in API
|
|
339
|
-
num = self.window.core.
|
|
339
|
+
num = self.window.core.api.openai.store.remove_files(callback=self.callback)
|
|
340
340
|
else:
|
|
341
341
|
self.log("Truncating files for store: {}".format(self.store_id))
|
|
342
342
|
self.window.core.assistants.files.truncate(self.store_id) # clear store files, remove from stores / DB
|
|
343
343
|
# remove store files in API
|
|
344
|
-
num = self.window.core.
|
|
344
|
+
num = self.window.core.api.openai.store.remove_store_files(
|
|
345
345
|
self.store_id,
|
|
346
346
|
callback=self.callback,
|
|
347
347
|
)
|
|
@@ -365,14 +365,14 @@ class ImportWorker(QRunnable):
|
|
|
365
365
|
self.log("Uploading files...")
|
|
366
366
|
for file in self.files:
|
|
367
367
|
try:
|
|
368
|
-
file_id = self.window.core.
|
|
368
|
+
file_id = self.window.core.api.openai.store.upload(file)
|
|
369
369
|
if file_id is not None:
|
|
370
|
-
stored_file = self.window.core.
|
|
370
|
+
stored_file = self.window.core.api.openai.store.add_file(
|
|
371
371
|
self.store_id,
|
|
372
372
|
file_id,
|
|
373
373
|
)
|
|
374
374
|
if stored_file is not None:
|
|
375
|
-
data = self.window.core.
|
|
375
|
+
data = self.window.core.api.openai.store.get_file(file_id)
|
|
376
376
|
self.window.core.assistants.files.insert(self.store_id, data) # insert to DB
|
|
377
377
|
msg = "Uploaded file: {}/{}".format((num + 1), len(self.files))
|
|
378
378
|
self.signals.status.emit("upload_files", msg)
|
|
@@ -403,11 +403,11 @@ class ImportWorker(QRunnable):
|
|
|
403
403
|
if self.store_id is None:
|
|
404
404
|
self.log("Importing all files...")
|
|
405
405
|
self.window.core.assistants.files.truncate_local() # clear local DB (all)
|
|
406
|
-
num = self.window.core.
|
|
406
|
+
num = self.window.core.api.openai.store.import_stores_files(self.callback) # import all files
|
|
407
407
|
else:
|
|
408
408
|
self.log("Importing files for store: {}".format(self.store_id))
|
|
409
409
|
self.window.core.assistants.files.truncate_local(self.store_id) # clear local DB (all)
|
|
410
|
-
items = self.window.core.
|
|
410
|
+
items = self.window.core.api.openai.store.import_store_files(
|
|
411
411
|
self.store_id,
|
|
412
412
|
[],
|
|
413
413
|
callback=self.callback,
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.29 18:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from .base import BaseProvider
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GoogleGenAIAudioInput(BaseProvider):
|
|
16
|
+
|
|
17
|
+
PROMPT_TRANSCRIBE = (
|
|
18
|
+
"You are a speech-to-text transcriber. "
|
|
19
|
+
"Return only the verbatim transcript as plain text. "
|
|
20
|
+
"Do not add any explanations, timestamps, labels or formatting."
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def __init__(self, *args, **kwargs):
|
|
24
|
+
"""
|
|
25
|
+
Google GenAI (Gemini) audio provider for transcription (via API).
|
|
26
|
+
|
|
27
|
+
:param args: args
|
|
28
|
+
:param kwargs: kwargs
|
|
29
|
+
"""
|
|
30
|
+
super(GoogleGenAIAudioInput, self).__init__(*args, **kwargs)
|
|
31
|
+
self.plugin = kwargs.get("plugin")
|
|
32
|
+
self.id = "google_genai"
|
|
33
|
+
self.name = "Google GenAI"
|
|
34
|
+
|
|
35
|
+
def init_options(self):
|
|
36
|
+
"""Initialize options"""
|
|
37
|
+
# Keep option shape consistent with Whisper provider
|
|
38
|
+
self.plugin.add_option(
|
|
39
|
+
"google_genai_audio_model",
|
|
40
|
+
type="text",
|
|
41
|
+
value="gemini-2.5-flash",
|
|
42
|
+
label="Model",
|
|
43
|
+
tab="google_genai",
|
|
44
|
+
description="Specify Gemini model supporting audio, e.g., gemini-2.5-flash",
|
|
45
|
+
)
|
|
46
|
+
self.plugin.add_option(
|
|
47
|
+
"google_genai_audio_prompt",
|
|
48
|
+
type="textarea",
|
|
49
|
+
value=self.PROMPT_TRANSCRIBE,
|
|
50
|
+
label="System Prompt",
|
|
51
|
+
tab="google_genai",
|
|
52
|
+
description="System prompt to guide the transcription output",
|
|
53
|
+
tooltip="System prompt for transcription",
|
|
54
|
+
persist=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def transcribe(self, path: str) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Audio to text transcription using Google GenAI (Gemini).
|
|
60
|
+
|
|
61
|
+
:param path: path to audio file to transcribe
|
|
62
|
+
:return: transcribed text
|
|
63
|
+
"""
|
|
64
|
+
# Get pre-configured GenAI client from the app core
|
|
65
|
+
client = self.plugin.window.core.api.google.get_client()
|
|
66
|
+
|
|
67
|
+
# Upload the audio file via the Files API
|
|
68
|
+
uploaded_file = client.files.upload(file=path)
|
|
69
|
+
|
|
70
|
+
# Ask the model to produce a plain text transcript only
|
|
71
|
+
# Using system_instruction keeps the public API surface simple (no extra options needed)
|
|
72
|
+
config = {
|
|
73
|
+
"system_instruction": self.plugin.get_option_value("google_genai_audio_prompt") or self.PROMPT_TRANSCRIBE,
|
|
74
|
+
"temperature": 0.0,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Generate content (transcription) with the selected model
|
|
78
|
+
model_name = self.plugin.get_option_value("google_genai_audio_model")
|
|
79
|
+
response = client.models.generate_content(
|
|
80
|
+
model=model_name,
|
|
81
|
+
contents=[uploaded_file],
|
|
82
|
+
config=config,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# The SDK exposes the unified .text property for convenience
|
|
86
|
+
return response.text or ""
|
|
87
|
+
|
|
88
|
+
def is_configured(self) -> bool:
|
|
89
|
+
"""
|
|
90
|
+
Check if provider is configured
|
|
91
|
+
|
|
92
|
+
:return: True if configured, False otherwise
|
|
93
|
+
"""
|
|
94
|
+
api_key = self.plugin.window.core.config.get("api_key_google")
|
|
95
|
+
return api_key is not None and api_key != ""
|
|
96
|
+
|
|
97
|
+
def get_config_message(self) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Return message to display when provider is not configured
|
|
100
|
+
|
|
101
|
+
:return: message
|
|
102
|
+
"""
|
|
103
|
+
return "Google GenAI API key is not set yet. Please configure it in settings."
|
|
@@ -43,7 +43,7 @@ class OpenAIWhisper(BaseProvider):
|
|
|
43
43
|
:param path: path to audio file to transcribe
|
|
44
44
|
:return: transcribed text
|
|
45
45
|
"""
|
|
46
|
-
client = self.plugin.window.core.
|
|
46
|
+
client = self.plugin.window.core.api.openai.get_client()
|
|
47
47
|
with open(path, "rb") as audio_file:
|
|
48
48
|
return client.audio.transcriptions.create(
|
|
49
49
|
model=self.plugin.get_option_value('whisper_model'),
|