pygpt-net 2.7.5__py3-none-any.whl → 2.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +14 -0
- pygpt_net/__init__.py +4 -4
- pygpt_net/controller/chat/remote_tools.py +3 -9
- pygpt_net/controller/chat/stream.py +2 -2
- pygpt_net/controller/chat/{handler/worker.py → stream_worker.py} +20 -64
- pygpt_net/controller/debug/fixtures.py +3 -2
- pygpt_net/controller/files/files.py +65 -4
- pygpt_net/core/debug/models.py +2 -2
- pygpt_net/core/filesystem/url.py +4 -1
- pygpt_net/core/render/web/body.py +3 -2
- pygpt_net/core/types/chunk.py +27 -0
- pygpt_net/data/config/config.json +14 -4
- pygpt_net/data/config/models.json +192 -4
- pygpt_net/data/config/settings.json +126 -36
- pygpt_net/data/js/app/template.js +1 -1
- pygpt_net/data/js/app.min.js +2 -2
- pygpt_net/data/locale/locale.de.ini +5 -0
- pygpt_net/data/locale/locale.en.ini +35 -8
- pygpt_net/data/locale/locale.es.ini +5 -0
- pygpt_net/data/locale/locale.fr.ini +5 -0
- pygpt_net/data/locale/locale.it.ini +5 -0
- pygpt_net/data/locale/locale.pl.ini +5 -0
- pygpt_net/data/locale/locale.uk.ini +5 -0
- pygpt_net/data/locale/locale.zh.ini +5 -0
- pygpt_net/data/locale/plugin.cmd_mouse_control.en.ini +2 -2
- pygpt_net/item/ctx.py +3 -5
- pygpt_net/js_rc.py +2449 -2447
- pygpt_net/plugin/cmd_mouse_control/config.py +8 -7
- pygpt_net/plugin/cmd_mouse_control/plugin.py +3 -4
- pygpt_net/plugin/cmd_mouse_control/worker.py +2 -1
- pygpt_net/plugin/cmd_mouse_control/worker_sandbox.py +2 -1
- pygpt_net/provider/api/anthropic/__init__.py +16 -9
- pygpt_net/provider/api/anthropic/chat.py +259 -11
- pygpt_net/provider/api/anthropic/computer.py +844 -0
- pygpt_net/provider/api/anthropic/remote_tools.py +172 -0
- pygpt_net/{controller/chat/handler/anthropic_stream.py → provider/api/anthropic/stream.py} +24 -10
- pygpt_net/provider/api/anthropic/tools.py +32 -77
- pygpt_net/provider/api/anthropic/utils.py +30 -0
- pygpt_net/provider/api/google/__init__.py +6 -5
- pygpt_net/provider/api/google/chat.py +3 -8
- pygpt_net/{controller/chat/handler/google_stream.py → provider/api/google/stream.py} +1 -1
- pygpt_net/provider/api/google/utils.py +185 -0
- pygpt_net/{controller/chat/handler → provider/api/langchain}/__init__.py +0 -0
- pygpt_net/{controller/chat/handler/langchain_stream.py → provider/api/langchain/stream.py} +1 -1
- pygpt_net/provider/api/llama_index/__init__.py +0 -0
- pygpt_net/{controller/chat/handler/llamaindex_stream.py → provider/api/llama_index/stream.py} +1 -1
- pygpt_net/provider/api/openai/__init__.py +7 -3
- pygpt_net/provider/api/openai/image.py +2 -2
- pygpt_net/provider/api/openai/responses.py +0 -0
- pygpt_net/{controller/chat/handler/openai_stream.py → provider/api/openai/stream.py} +1 -1
- pygpt_net/provider/api/openai/utils.py +69 -3
- pygpt_net/provider/api/x_ai/__init__.py +117 -17
- pygpt_net/provider/api/x_ai/chat.py +272 -102
- pygpt_net/provider/api/x_ai/image.py +149 -47
- pygpt_net/provider/api/x_ai/{remote.py → remote_tools.py} +165 -70
- pygpt_net/provider/api/x_ai/responses.py +507 -0
- pygpt_net/provider/api/x_ai/stream.py +715 -0
- pygpt_net/provider/api/x_ai/tools.py +59 -8
- pygpt_net/{controller/chat/handler → provider/api/x_ai}/utils.py +1 -2
- pygpt_net/provider/api/x_ai/vision.py +1 -4
- pygpt_net/provider/core/config/patch.py +22 -1
- pygpt_net/provider/core/model/patch.py +26 -1
- pygpt_net/tools/image_viewer/ui/dialogs.py +300 -13
- pygpt_net/tools/text_editor/ui/dialogs.py +3 -2
- pygpt_net/tools/text_editor/ui/widgets.py +5 -1
- pygpt_net/ui/base/context_menu.py +44 -1
- pygpt_net/ui/layout/toolbox/indexes.py +22 -19
- pygpt_net/ui/layout/toolbox/model.py +28 -5
- pygpt_net/ui/widget/dialog/base.py +16 -5
- pygpt_net/ui/widget/image/display.py +25 -8
- pygpt_net/ui/widget/tabs/output.py +9 -1
- pygpt_net/ui/widget/textarea/editor.py +14 -1
- pygpt_net/ui/widget/textarea/input.py +20 -7
- pygpt_net/ui/widget/textarea/notepad.py +24 -1
- pygpt_net/ui/widget/textarea/output.py +23 -1
- pygpt_net/ui/widget/textarea/web.py +16 -1
- {pygpt_net-2.7.5.dist-info → pygpt_net-2.7.7.dist-info}/METADATA +16 -2
- {pygpt_net-2.7.5.dist-info → pygpt_net-2.7.7.dist-info}/RECORD +80 -73
- pygpt_net/controller/chat/handler/xai_stream.py +0 -135
- {pygpt_net-2.7.5.dist-info → pygpt_net-2.7.7.dist-info}/LICENSE +0 -0
- {pygpt_net-2.7.5.dist-info → pygpt_net-2.7.7.dist-info}/WHEEL +0 -0
- {pygpt_net-2.7.5.dist-info → pygpt_net-2.7.7.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,844 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2026.01.05 20:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import time
|
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
from pygpt_net.item.ctx import CtxItem
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Computer:
|
|
20
|
+
"""
|
|
21
|
+
Anthropic Computer Use adapter.
|
|
22
|
+
|
|
23
|
+
Responsibilities:
|
|
24
|
+
- Provide Anthropic Computer Use tool spec (messages.create tools[]).
|
|
25
|
+
- Rewrite Anthropic computer tool_use payloads into app-compatible tool calls
|
|
26
|
+
for the Mouse & Keyboard plugin (same final shape as OpenAI/Google adapters).
|
|
27
|
+
- Handle both content_block_delta.input_json_delta and top-level input_json_delta
|
|
28
|
+
variants produced by Anthropic Beta streaming.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
COMPUTER_TOOL_NAMES = {
|
|
32
|
+
"computer",
|
|
33
|
+
"computer.use",
|
|
34
|
+
"anthropic/computer",
|
|
35
|
+
"computer_use",
|
|
36
|
+
"computer-use",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Known plugin command names (Worker supports these).
|
|
40
|
+
# Used to normalize arguments when upstream sends Anthropic-shaped payloads as direct function calls.
|
|
41
|
+
PLUGIN_COMMAND_NAMES = {
|
|
42
|
+
"open_web_browser",
|
|
43
|
+
"get_mouse_position",
|
|
44
|
+
"mouse_move",
|
|
45
|
+
"mouse_drag",
|
|
46
|
+
"mouse_click",
|
|
47
|
+
"mouse_scroll",
|
|
48
|
+
"get_screenshot",
|
|
49
|
+
"keyboard_key",
|
|
50
|
+
"keyboard_keys",
|
|
51
|
+
"keyboard_type",
|
|
52
|
+
"wait",
|
|
53
|
+
# host-native extras
|
|
54
|
+
"wait_5_seconds",
|
|
55
|
+
"go_back",
|
|
56
|
+
"go_forward",
|
|
57
|
+
"search",
|
|
58
|
+
"navigate",
|
|
59
|
+
"click_at",
|
|
60
|
+
"hover_at",
|
|
61
|
+
"type_text_at",
|
|
62
|
+
"key_combination",
|
|
63
|
+
"scroll_document",
|
|
64
|
+
"scroll_at",
|
|
65
|
+
"drag_and_drop",
|
|
66
|
+
# action-style
|
|
67
|
+
"click",
|
|
68
|
+
"double_click",
|
|
69
|
+
"move",
|
|
70
|
+
"type",
|
|
71
|
+
"keypress",
|
|
72
|
+
"scroll",
|
|
73
|
+
"drag",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Action name synonyms that may appear in Anthropic payloads -> plugin command names
|
|
77
|
+
ACTION_SYNONYMS = {
|
|
78
|
+
"hover": "mouse_move",
|
|
79
|
+
"move": "mouse_move",
|
|
80
|
+
"mouse_move": "mouse_move",
|
|
81
|
+
|
|
82
|
+
"click": "mouse_click",
|
|
83
|
+
"left_click": "mouse_click",
|
|
84
|
+
"right_click": "mouse_click",
|
|
85
|
+
"double_click": "mouse_click",
|
|
86
|
+
|
|
87
|
+
"scroll": "mouse_scroll",
|
|
88
|
+
"mouse_scroll": "mouse_scroll",
|
|
89
|
+
|
|
90
|
+
"drag": "mouse_drag",
|
|
91
|
+
"drag_and_drop": "mouse_drag",
|
|
92
|
+
"mouse_drag": "mouse_drag",
|
|
93
|
+
|
|
94
|
+
"type": "keyboard_type",
|
|
95
|
+
"input": "keyboard_type",
|
|
96
|
+
"keyboard_type": "keyboard_type",
|
|
97
|
+
|
|
98
|
+
"keypress": "keyboard_keys",
|
|
99
|
+
"key": "keyboard_keys",
|
|
100
|
+
"keys": "keyboard_keys",
|
|
101
|
+
"key_combination": "key_combination",
|
|
102
|
+
|
|
103
|
+
"screenshot": "get_screenshot",
|
|
104
|
+
"get_screenshot": "get_screenshot",
|
|
105
|
+
|
|
106
|
+
"wait": "wait",
|
|
107
|
+
"sleep": "wait",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
KEY_MODIFIERS = {"ctrl", "control", "alt", "shift", "cmd", "super", "start"}
|
|
111
|
+
|
|
112
|
+
def __init__(self, window=None):
|
|
113
|
+
"""
|
|
114
|
+
:param window: Window instance
|
|
115
|
+
"""
|
|
116
|
+
self.window = window
|
|
117
|
+
|
|
118
|
+
# --------------- Tool spec --------------- #
|
|
119
|
+
|
|
120
|
+
def get_current_env(self) -> Dict[str, Any]:
|
|
121
|
+
idx = self.window.ui.nodes["computer_env"].currentIndex()
|
|
122
|
+
return self.window.ui.nodes["computer_env"].itemData(idx)
|
|
123
|
+
|
|
124
|
+
def get_tool(self) -> dict:
|
|
125
|
+
is_sandbox = bool(self.window.core.config.get("remote_tools.computer_use.sandbox", False))
|
|
126
|
+
screen_w, screen_h = self._resolve_display_size(is_sandbox=is_sandbox)
|
|
127
|
+
tool_type = str(self.window.core.config.get("remote_tools.anthropic.computer.type", "computer_20250124")).strip() or "computer_20250124"
|
|
128
|
+
return {
|
|
129
|
+
"name": "computer",
|
|
130
|
+
"type": tool_type,
|
|
131
|
+
"display_width_px": int(screen_w),
|
|
132
|
+
"display_height_px": int(screen_h),
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
def _resolve_display_size(self, is_sandbox: bool) -> Tuple[int, int]:
|
|
136
|
+
screen_w = screen_h = 0
|
|
137
|
+
try:
|
|
138
|
+
screen = self.window.app.primaryScreen()
|
|
139
|
+
size = screen.size()
|
|
140
|
+
screen_w = int(size.width())
|
|
141
|
+
screen_h = int(size.height())
|
|
142
|
+
except Exception:
|
|
143
|
+
screen_w, screen_h = 1440, 900
|
|
144
|
+
|
|
145
|
+
if is_sandbox:
|
|
146
|
+
try:
|
|
147
|
+
vw = int(self.window.core.plugins.get_option("cmd_mouse_control", "sandbox_viewport_w"))
|
|
148
|
+
vh = int(self.window.core.plugins.get_option("cmd_mouse_control", "sandbox_viewport_h"))
|
|
149
|
+
if vw > 0 and vh > 0:
|
|
150
|
+
screen_w, screen_h = vw, vh
|
|
151
|
+
except Exception:
|
|
152
|
+
pass
|
|
153
|
+
|
|
154
|
+
return screen_w, screen_h
|
|
155
|
+
|
|
156
|
+
# --------------- Streaming handling --------------- #
|
|
157
|
+
|
|
158
|
+
def handle_stream_chunk(self, ctx: CtxItem, chunk, tool_calls: list) -> Tuple[List, bool]:
|
|
159
|
+
"""
|
|
160
|
+
Convert Computer Use 'tool_use' streaming events into plugin tool calls.
|
|
161
|
+
Supports:
|
|
162
|
+
- content_block_delta/input_json_delta
|
|
163
|
+
- top-level input_json_delta
|
|
164
|
+
"""
|
|
165
|
+
has_calls = False
|
|
166
|
+
etype = str(getattr(chunk, "type", "") or "")
|
|
167
|
+
|
|
168
|
+
cmem = self._ensure_ctx_memory(ctx)
|
|
169
|
+
|
|
170
|
+
if etype == "content_block_start":
|
|
171
|
+
cb = getattr(chunk, "content_block", None)
|
|
172
|
+
if cb and getattr(cb, "type", "") == "tool_use":
|
|
173
|
+
name = str(getattr(cb, "name", "") or "")
|
|
174
|
+
if name in self.COMPUTER_TOOL_NAMES:
|
|
175
|
+
idx = str(getattr(chunk, "index", 0) or 0)
|
|
176
|
+
tid = str(getattr(cb, "id", "") or self._gen_id(prefix="ac"))
|
|
177
|
+
cmem["index_to_id"][idx] = tid
|
|
178
|
+
cmem["buffers"].setdefault(tid, "")
|
|
179
|
+
cmem["active_ids"].append(tid)
|
|
180
|
+
|
|
181
|
+
elif etype == "input_json_delta":
|
|
182
|
+
pj = getattr(chunk, "partial_json", "") or ""
|
|
183
|
+
if cmem["active_ids"]:
|
|
184
|
+
tid = cmem["active_ids"][-1]
|
|
185
|
+
cmem["buffers"][tid] = cmem["buffers"].get(tid, "") + pj
|
|
186
|
+
|
|
187
|
+
elif etype == "content_block_delta":
|
|
188
|
+
delta = getattr(chunk, "delta", None)
|
|
189
|
+
if delta and getattr(delta, "type", "") == "input_json_delta":
|
|
190
|
+
idx = str(getattr(chunk, "index", 0) or 0)
|
|
191
|
+
tid = cmem["index_to_id"].get(idx)
|
|
192
|
+
if tid:
|
|
193
|
+
pj = getattr(delta, "partial_json", "") or ""
|
|
194
|
+
cmem["buffers"][tid] = cmem["buffers"].get(tid, "") + pj
|
|
195
|
+
|
|
196
|
+
elif etype == "content_block_stop":
|
|
197
|
+
idx = str(getattr(chunk, "index", 0) or 0)
|
|
198
|
+
tid = cmem["index_to_id"].pop(idx, None)
|
|
199
|
+
if not tid and cmem["active_ids"]:
|
|
200
|
+
tid = cmem["active_ids"].pop()
|
|
201
|
+
elif tid and cmem["active_ids"]:
|
|
202
|
+
if cmem["active_ids"] and cmem["active_ids"][-1] == tid:
|
|
203
|
+
cmem["active_ids"].pop()
|
|
204
|
+
else:
|
|
205
|
+
try:
|
|
206
|
+
cmem["active_ids"].remove(tid)
|
|
207
|
+
except ValueError:
|
|
208
|
+
pass
|
|
209
|
+
|
|
210
|
+
if tid:
|
|
211
|
+
payload = self._safe_json_loads(cmem["buffers"].pop(tid, ""))
|
|
212
|
+
if payload is not None:
|
|
213
|
+
try:
|
|
214
|
+
if not isinstance(ctx.extra, dict):
|
|
215
|
+
ctx.extra = {}
|
|
216
|
+
tu_list = ctx.extra.get("anthropic_tool_uses")
|
|
217
|
+
if not isinstance(tu_list, list):
|
|
218
|
+
tu_list = []
|
|
219
|
+
tu_list.append({"id": tid, "name": "computer", "input": payload})
|
|
220
|
+
ctx.extra["anthropic_tool_uses"] = tu_list
|
|
221
|
+
self.window.core.ctx.update_item(ctx)
|
|
222
|
+
except Exception:
|
|
223
|
+
pass
|
|
224
|
+
|
|
225
|
+
mapped = self._payload_to_tool_calls(tid, tid, payload)
|
|
226
|
+
if mapped:
|
|
227
|
+
tool_calls.extend(mapped)
|
|
228
|
+
has_calls = True
|
|
229
|
+
|
|
230
|
+
elif etype == "message_stop":
|
|
231
|
+
while cmem["active_ids"]:
|
|
232
|
+
tid = cmem["active_ids"].pop()
|
|
233
|
+
payload = self._safe_json_loads(cmem["buffers"].pop(tid, ""))
|
|
234
|
+
if payload is None:
|
|
235
|
+
continue
|
|
236
|
+
try:
|
|
237
|
+
if not isinstance(ctx.extra, dict):
|
|
238
|
+
ctx.extra = {}
|
|
239
|
+
tu_list = ctx.extra.get("anthropic_tool_uses")
|
|
240
|
+
if not isinstance(tu_list, list):
|
|
241
|
+
tu_list = []
|
|
242
|
+
tu_list.append({"id": tid, "name": "computer", "input": payload})
|
|
243
|
+
ctx.extra["anthropic_tool_uses"] = tu_list
|
|
244
|
+
self.window.core.ctx.update_item(ctx)
|
|
245
|
+
except Exception:
|
|
246
|
+
pass
|
|
247
|
+
mapped = self._payload_to_tool_calls(tid, tid, payload)
|
|
248
|
+
if mapped:
|
|
249
|
+
tool_calls.extend(mapped)
|
|
250
|
+
has_calls = True
|
|
251
|
+
|
|
252
|
+
return tool_calls, has_calls
|
|
253
|
+
|
|
254
|
+
# --------------- Public normalization for function tools --------------- #
|
|
255
|
+
|
|
256
|
+
def normalize_function_args_json(self, name: str, args_json: Optional[str]) -> Optional[str]:
|
|
257
|
+
"""
|
|
258
|
+
Normalize function-call arguments (client tools) to plugin shape.
|
|
259
|
+
Accepts JSON string, returns JSON string. Returns None on failure.
|
|
260
|
+
"""
|
|
261
|
+
if args_json is None:
|
|
262
|
+
return None
|
|
263
|
+
try:
|
|
264
|
+
data = json.loads(args_json)
|
|
265
|
+
except Exception:
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
target_name, coerced = self._retarget_function_name_and_args(name, data if isinstance(data, dict) else {})
|
|
269
|
+
norm = self._normalize_params_for_plugin(target_name, coerced)
|
|
270
|
+
norm = self._filter_args_for_plugin(target_name, norm)
|
|
271
|
+
try:
|
|
272
|
+
return json.dumps(norm, ensure_ascii=False)
|
|
273
|
+
except Exception:
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
# --------------- Non-stream helpers --------------- #
|
|
277
|
+
|
|
278
|
+
def rewrite_tool_calls(self, tool_calls: List[dict]) -> List[dict]:
|
|
279
|
+
"""
|
|
280
|
+
Rewrites:
|
|
281
|
+
- tool_use(computer) payloads into a sequence of plugin tool calls
|
|
282
|
+
- direct function calls that already use plugin commands but carry Anthropic-style args
|
|
283
|
+
(e.g., coordinate/action) into plugin-ready args
|
|
284
|
+
- action-name synonyms (e.g., left_click, hover) into canonical plugin commands
|
|
285
|
+
|
|
286
|
+
Important: this method mutates the incoming items IN-PLACE so that even if caller
|
|
287
|
+
ignores the return value and continues using the original list/reference, the
|
|
288
|
+
rewritten arguments and names are preserved (prevents leaking 'action'/'coordinate').
|
|
289
|
+
"""
|
|
290
|
+
out: List[dict] = []
|
|
291
|
+
for i, tc in enumerate(tool_calls or []):
|
|
292
|
+
try:
|
|
293
|
+
f = tc.get("function") or {}
|
|
294
|
+
name = str(f.get("name", "") or "")
|
|
295
|
+
args_raw = f.get("arguments", {})
|
|
296
|
+
args = self._safe_json_loads(args_raw) if isinstance(args_raw, str) else args_raw
|
|
297
|
+
|
|
298
|
+
# Case 1: Anthropic "computer" tool_use payload -> expand to sequence of plugin calls
|
|
299
|
+
if name in self.COMPUTER_TOOL_NAMES and isinstance(args, (dict, list)):
|
|
300
|
+
calls = self._payload_to_tool_calls(tc.get("id") or self._gen_id(),
|
|
301
|
+
tc.get("call_id") or tc.get("id") or self._gen_id(),
|
|
302
|
+
args)
|
|
303
|
+
if calls:
|
|
304
|
+
out.extend(calls)
|
|
305
|
+
continue
|
|
306
|
+
else:
|
|
307
|
+
out.append(tc)
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
# Case 2: Direct function calls -> normalize and FILTER, then mutate in place
|
|
311
|
+
if isinstance(args, dict):
|
|
312
|
+
target_name, coerced = self._retarget_function_name_and_args(name, args)
|
|
313
|
+
norm = self._normalize_params_for_plugin(target_name, coerced)
|
|
314
|
+
norm = self._filter_args_for_plugin(target_name, norm)
|
|
315
|
+
pruned = self._prune_none(norm)
|
|
316
|
+
|
|
317
|
+
f["name"] = target_name
|
|
318
|
+
f["arguments"] = json.dumps(pruned, ensure_ascii=False)
|
|
319
|
+
tc["function"] = f
|
|
320
|
+
|
|
321
|
+
# Mutate the original reference inside the incoming list as well
|
|
322
|
+
tool_calls[i] = tc
|
|
323
|
+
|
|
324
|
+
out.append(tc)
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
# Fallback: leave unchanged
|
|
328
|
+
out.append(tc)
|
|
329
|
+
|
|
330
|
+
except Exception:
|
|
331
|
+
out.append(tc)
|
|
332
|
+
return out
|
|
333
|
+
|
|
334
|
+
# --------------- Parsers / mappers --------------- #
|
|
335
|
+
|
|
336
|
+
def _payload_to_tool_calls(self, id_: str, call_id: str, payload: Any) -> List[dict]:
|
|
337
|
+
actions = self._extract_actions(payload)
|
|
338
|
+
out: List[dict] = []
|
|
339
|
+
for action in actions:
|
|
340
|
+
mapped = self._map_single_action(action, id_, call_id)
|
|
341
|
+
if mapped:
|
|
342
|
+
out.append(mapped)
|
|
343
|
+
return out
|
|
344
|
+
|
|
345
|
+
def _extract_actions(self, payload: Any) -> List[dict]:
|
|
346
|
+
if payload is None:
|
|
347
|
+
return []
|
|
348
|
+
|
|
349
|
+
def _coerce_action(obj: Any) -> Optional[dict]:
|
|
350
|
+
if obj is None:
|
|
351
|
+
return None
|
|
352
|
+
if isinstance(obj, dict):
|
|
353
|
+
# If Anthropic sends {"type": "...", ...}
|
|
354
|
+
if "type" in obj:
|
|
355
|
+
return obj
|
|
356
|
+
# If Anthropic sends {"action": "left_click", "coordinate": [...]}
|
|
357
|
+
if "action" in obj and isinstance(obj["action"], str) and obj["action"]:
|
|
358
|
+
act = {"type": str(obj["action"]).strip().lower()}
|
|
359
|
+
for k in ("x", "y", "button", "dx", "dy", "scroll_x", "scroll_y", "keys", "key",
|
|
360
|
+
"text", "value", "path", "from", "to", "coordinate", "destination", "offset", "delta",
|
|
361
|
+
"seconds", "sec", "count", "num_clicks", "unit"):
|
|
362
|
+
if k in obj:
|
|
363
|
+
act[k] = obj[k]
|
|
364
|
+
return act
|
|
365
|
+
# If Anthropic sends {"action": {...}}
|
|
366
|
+
if "action" in obj and isinstance(obj["action"], dict):
|
|
367
|
+
return obj["action"]
|
|
368
|
+
return None
|
|
369
|
+
if isinstance(obj, str):
|
|
370
|
+
s = obj.strip().lower()
|
|
371
|
+
if s:
|
|
372
|
+
return {"type": s}
|
|
373
|
+
return None
|
|
374
|
+
|
|
375
|
+
if isinstance(payload, dict):
|
|
376
|
+
if "actions" in payload and isinstance(payload["actions"], list):
|
|
377
|
+
out = []
|
|
378
|
+
for it in payload["actions"]:
|
|
379
|
+
coerced = _coerce_action(it)
|
|
380
|
+
if coerced:
|
|
381
|
+
out.append(coerced)
|
|
382
|
+
return out
|
|
383
|
+
coerced = _coerce_action(payload)
|
|
384
|
+
return [coerced] if coerced else []
|
|
385
|
+
|
|
386
|
+
if isinstance(payload, list):
|
|
387
|
+
out = []
|
|
388
|
+
for it in payload:
|
|
389
|
+
coerced = _coerce_action(it)
|
|
390
|
+
if coerced:
|
|
391
|
+
out.append(coerced)
|
|
392
|
+
return out
|
|
393
|
+
|
|
394
|
+
return []
|
|
395
|
+
|
|
396
|
+
def _extract_xy(self, action: dict) -> Tuple[Optional[int], Optional[int]]:
|
|
397
|
+
try:
|
|
398
|
+
if "x" in action and "y" in action:
|
|
399
|
+
return int(action["x"]), int(action["y"])
|
|
400
|
+
except Exception:
|
|
401
|
+
pass
|
|
402
|
+
for key in ("coordinate", "coordinates", "position", "point", "center", "location", "loc", "pos"):
|
|
403
|
+
val = action.get(key)
|
|
404
|
+
if isinstance(val, (list, tuple)) and len(val) >= 2:
|
|
405
|
+
try:
|
|
406
|
+
return int(val[0]), int(val[1])
|
|
407
|
+
except Exception:
|
|
408
|
+
pass
|
|
409
|
+
if isinstance(val, dict) and "x" in val and "y" in val:
|
|
410
|
+
try:
|
|
411
|
+
return int(val["x"]), int(val["y"])
|
|
412
|
+
except Exception:
|
|
413
|
+
pass
|
|
414
|
+
return None, None
|
|
415
|
+
|
|
416
|
+
def _extract_dxdy(self, action: dict) -> Tuple[int, int]:
|
|
417
|
+
def as_int(v, default=0):
|
|
418
|
+
try:
|
|
419
|
+
return int(v)
|
|
420
|
+
except Exception:
|
|
421
|
+
return default
|
|
422
|
+
if "dx" in action or "dy" in action:
|
|
423
|
+
return as_int(action.get("dx", 0)), as_int(action.get("dy", 0))
|
|
424
|
+
if "scroll_x" in action or "scroll_y" in action:
|
|
425
|
+
return as_int(action.get("scroll_x", 0)), as_int(action.get("scroll_y", 0))
|
|
426
|
+
for key in ("offset", "delta", "scroll", "wheel"):
|
|
427
|
+
val = action.get(key)
|
|
428
|
+
if isinstance(val, (list, tuple)) and len(val) >= 2:
|
|
429
|
+
return as_int(val[0]), as_int(val[1])
|
|
430
|
+
return 0, 0
|
|
431
|
+
|
|
432
|
+
def _parse_keys_list(self, keys_val: Any) -> List[str]:
|
|
433
|
+
"""
|
|
434
|
+
Parse keys that can be string like 'ctrl+shift+p' or list of tokens.
|
|
435
|
+
"""
|
|
436
|
+
if isinstance(keys_val, str):
|
|
437
|
+
return [p.strip() for p in keys_val.replace("+", " ").split() if p.strip()]
|
|
438
|
+
if isinstance(keys_val, list):
|
|
439
|
+
out = []
|
|
440
|
+
for k in keys_val:
|
|
441
|
+
if isinstance(k, str):
|
|
442
|
+
out.extend([p.strip() for p in k.replace("+", " ").split() if p.strip()])
|
|
443
|
+
else:
|
|
444
|
+
out.append(k)
|
|
445
|
+
return out
|
|
446
|
+
if keys_val is None:
|
|
447
|
+
return []
|
|
448
|
+
return [keys_val]
|
|
449
|
+
|
|
450
|
+
def _map_single_action(self, action: dict, id_: str, call_id: str) -> Optional[dict]:
|
|
451
|
+
atype = str(action.get("type", "") or action.get("action", "") or "").lower().strip()
|
|
452
|
+
|
|
453
|
+
# Clicks
|
|
454
|
+
if atype in {"click", "double_click", "dblclick", "dbl_click", "left_click", "right_click"}:
|
|
455
|
+
x, y = self._extract_xy(action)
|
|
456
|
+
button = action.get("button", "left")
|
|
457
|
+
num_clicks = int(action.get("count", action.get("num_clicks", 2 if "double" in atype or "dbl" in atype else 1)))
|
|
458
|
+
if atype == "right_click":
|
|
459
|
+
button = "right"
|
|
460
|
+
if atype == "left_click":
|
|
461
|
+
button = "left"
|
|
462
|
+
num_clicks = 1 if "num_clicks" not in action else num_clicks
|
|
463
|
+
args = {"button": button, "num_clicks": num_clicks}
|
|
464
|
+
if x is not None and y is not None:
|
|
465
|
+
args["x"] = x
|
|
466
|
+
args["y"] = y
|
|
467
|
+
return self._build_call(id_, call_id, "mouse_click", args)
|
|
468
|
+
|
|
469
|
+
# Move / Hover
|
|
470
|
+
if atype in {"move", "mouse_move", "hover"}:
|
|
471
|
+
x, y = self._extract_xy(action)
|
|
472
|
+
args = {}
|
|
473
|
+
if x is not None and y is not None:
|
|
474
|
+
args["x"] = x
|
|
475
|
+
args["y"] = y
|
|
476
|
+
return self._build_call(id_, call_id, "mouse_move", args)
|
|
477
|
+
|
|
478
|
+
# Scroll
|
|
479
|
+
if atype in {"scroll", "mouse_scroll"}:
|
|
480
|
+
x, y = self._extract_xy(action)
|
|
481
|
+
dx, dy = self._extract_dxdy(action)
|
|
482
|
+
args = {"dx": dx, "dy": dy, "unit": "px"}
|
|
483
|
+
if x is not None and y is not None:
|
|
484
|
+
args["x"] = x
|
|
485
|
+
args["y"] = y
|
|
486
|
+
return self._build_call(id_, call_id, "mouse_scroll", args)
|
|
487
|
+
|
|
488
|
+
# Type text
|
|
489
|
+
if atype in {"type", "keyboard_type", "input"}:
|
|
490
|
+
text = str(action.get("text", "") or action.get("value", "") or "")
|
|
491
|
+
return self._build_call(id_, call_id, "keyboard_type", {"text": text})
|
|
492
|
+
|
|
493
|
+
# Keys / key-combos
|
|
494
|
+
if atype in {"keypress", "key", "keys", "key_combination"}:
|
|
495
|
+
keys = self._parse_keys_list(action.get("keys", action.get("key")))
|
|
496
|
+
mods = [k for k in keys if isinstance(k, str) and k.lower() in self.KEY_MODIFIERS]
|
|
497
|
+
if atype == "key_combination" or len(mods) > 1:
|
|
498
|
+
return self._build_call(id_, call_id, "key_combination", {"keys": keys or []})
|
|
499
|
+
return self._build_call(id_, call_id, "keyboard_keys", {"keys": keys or []})
|
|
500
|
+
|
|
501
|
+
# Drag and drop
|
|
502
|
+
if atype in {"drag", "drag_and_drop", "mouse_drag"}:
|
|
503
|
+
path = action.get("path")
|
|
504
|
+
if isinstance(path, list) and len(path) >= 2 and isinstance(path[0], dict) and isinstance(path[1], dict):
|
|
505
|
+
try:
|
|
506
|
+
x0 = int(path[0].get("x")); y0 = int(path[0].get("y"))
|
|
507
|
+
x1 = int(path[1].get("x")); y1 = int(path[1].get("y"))
|
|
508
|
+
args = {"x": x0, "y": y0, "dx": x1, "dy": y1}
|
|
509
|
+
except Exception:
|
|
510
|
+
x0, y0 = self._extract_xy(path[0])
|
|
511
|
+
x1, y1 = self._extract_xy(path[1])
|
|
512
|
+
args = {"x": int(x0 or 0), "y": int(y0 or 0), "dx": int(x1 or 0), "dy": int(y1 or 0)}
|
|
513
|
+
return self._build_call(id_, call_id, "mouse_drag", args)
|
|
514
|
+
fx, fy = None, None
|
|
515
|
+
tx, ty = None, None
|
|
516
|
+
f = action.get("from")
|
|
517
|
+
t = action.get("to")
|
|
518
|
+
if isinstance(f, dict) or isinstance(f, (list, tuple)):
|
|
519
|
+
fx, fy = self._extract_xy({"coordinate": f} if not isinstance(f, dict) else f)
|
|
520
|
+
if isinstance(t, dict) or isinstance(t, (list, tuple)):
|
|
521
|
+
tx, ty = self._extract_xy({"coordinate": t} if not isinstance(t, dict) else t)
|
|
522
|
+
if fx is None or fy is None:
|
|
523
|
+
fx, fy = self._extract_xy(action)
|
|
524
|
+
if tx is None or ty is None:
|
|
525
|
+
for key in ("destination", "target", "end"):
|
|
526
|
+
val = action.get(key)
|
|
527
|
+
if val is not None:
|
|
528
|
+
tx, ty = self._extract_xy({"coordinate": val} if not isinstance(val, dict) else val)
|
|
529
|
+
break
|
|
530
|
+
if tx is None or ty is None:
|
|
531
|
+
tx, ty = self._extract_dxdy(action)
|
|
532
|
+
args = {"x": int(fx or 0), "y": int(fy or 0), "dx": int(tx or 0), "dy": int(ty or 0)}
|
|
533
|
+
return self._build_call(id_, call_id, "mouse_drag", args)
|
|
534
|
+
|
|
535
|
+
# Screenshot
|
|
536
|
+
if atype in {"screenshot", "get_screenshot"}:
|
|
537
|
+
return self._build_call(id_, call_id, "get_screenshot", {})
|
|
538
|
+
|
|
539
|
+
# Wait
|
|
540
|
+
if atype in {"wait", "sleep"}:
|
|
541
|
+
secs = int(action.get("seconds", action.get("sec", 2)))
|
|
542
|
+
return self._build_call(id_, call_id, "wait", {"seconds": secs})
|
|
543
|
+
|
|
544
|
+
# Fallback: short wait to avoid breaking flow
|
|
545
|
+
return self._build_call(id_, call_id, "wait", {"seconds": 1})
|
|
546
|
+
|
|
547
|
+
# --------------- Build / normalize calls --------------- #
|
|
548
|
+
|
|
549
|
+
def _normalize_params_for_plugin(self, name: str, args: dict) -> dict:
|
|
550
|
+
"""
|
|
551
|
+
Normalize arguments dict to match plugin Worker expectations.
|
|
552
|
+
Conservative and side-effect-free; only maps/renames, does not invent values.
|
|
553
|
+
"""
|
|
554
|
+
if not isinstance(args, dict):
|
|
555
|
+
return {}
|
|
556
|
+
out = dict(args)
|
|
557
|
+
|
|
558
|
+
# coordinate -> x,y
|
|
559
|
+
if "coordinate" in out and ("x" not in out or "y" not in out):
|
|
560
|
+
coord = out.get("coordinate")
|
|
561
|
+
if isinstance(coord, (list, tuple)) and len(coord) >= 2:
|
|
562
|
+
try:
|
|
563
|
+
out["x"] = int(coord[0])
|
|
564
|
+
out["y"] = int(coord[1])
|
|
565
|
+
except Exception:
|
|
566
|
+
pass
|
|
567
|
+
elif isinstance(coord, dict) and "x" in coord and "y" in coord:
|
|
568
|
+
out["x"] = coord.get("x")
|
|
569
|
+
out["y"] = coord.get("y")
|
|
570
|
+
out.pop("coordinate", None)
|
|
571
|
+
|
|
572
|
+
# scroll_x/scroll_y -> dx,dy for scroll commands
|
|
573
|
+
if name in ("mouse_scroll", "scroll"):
|
|
574
|
+
if "scroll_x" in out and "dx" not in out:
|
|
575
|
+
try:
|
|
576
|
+
out["dx"] = int(out.get("scroll_x", 0))
|
|
577
|
+
except Exception:
|
|
578
|
+
pass
|
|
579
|
+
if "scroll_y" in out and "dy" not in out:
|
|
580
|
+
try:
|
|
581
|
+
out["dy"] = int(out.get("scroll_y", 0))
|
|
582
|
+
except Exception:
|
|
583
|
+
pass
|
|
584
|
+
if "unit" in out:
|
|
585
|
+
unit = str(out.get("unit")).lower().strip()
|
|
586
|
+
if unit in ("px", "pixel", "pixels"):
|
|
587
|
+
out["unit"] = "px"
|
|
588
|
+
elif unit in ("step", "steps", "notch", "notches", "line", "lines"):
|
|
589
|
+
out["unit"] = "step"
|
|
590
|
+
|
|
591
|
+
# offset / delta -> dx,dy
|
|
592
|
+
if "dx" not in out or "dy" not in out:
|
|
593
|
+
for k in ("offset", "delta"):
|
|
594
|
+
val = out.get(k)
|
|
595
|
+
if isinstance(val, (list, tuple)) and len(val) >= 2:
|
|
596
|
+
try:
|
|
597
|
+
out.setdefault("dx", int(val[0]))
|
|
598
|
+
out.setdefault("dy", int(val[1]))
|
|
599
|
+
except Exception:
|
|
600
|
+
pass
|
|
601
|
+
|
|
602
|
+
# action -> button/num_clicks mapping
|
|
603
|
+
act = str(out.get("action", "") or "").lower()
|
|
604
|
+
if act:
|
|
605
|
+
is_double = ("double" in act) or ("dbl" in act)
|
|
606
|
+
if "right" in act:
|
|
607
|
+
out["button"] = "right"
|
|
608
|
+
else:
|
|
609
|
+
out["button"] = out.get("button", "left")
|
|
610
|
+
out["num_clicks"] = 2 if is_double else int(out.get("num_clicks", 1))
|
|
611
|
+
out.pop("action", None)
|
|
612
|
+
|
|
613
|
+
# click -> button (defensive)
|
|
614
|
+
if name in ("mouse_click", "click") and "click" in out and "button" not in out:
|
|
615
|
+
out["button"] = out.pop("click")
|
|
616
|
+
|
|
617
|
+
# destination -> dx,dy for drag
|
|
618
|
+
if name in ("mouse_drag", "drag"):
|
|
619
|
+
dest = out.get("destination") or out.get("target") or out.get("end")
|
|
620
|
+
if dest is not None and ("dx" not in out or "dy" not in out):
|
|
621
|
+
if isinstance(dest, (list, tuple)) and len(dest) >= 2:
|
|
622
|
+
try:
|
|
623
|
+
out["dx"] = int(dest[0])
|
|
624
|
+
out["dy"] = int(dest[1])
|
|
625
|
+
except Exception:
|
|
626
|
+
pass
|
|
627
|
+
elif isinstance(dest, dict) and "x" in dest and "y" in dest:
|
|
628
|
+
out["dx"] = dest.get("x")
|
|
629
|
+
out["dy"] = dest.get("y")
|
|
630
|
+
out.pop("destination", None)
|
|
631
|
+
out.pop("target", None)
|
|
632
|
+
out.pop("end", None)
|
|
633
|
+
|
|
634
|
+
# keys normalization
|
|
635
|
+
if name in ("keyboard_keys", "key_combination"):
|
|
636
|
+
if "keys" not in out and "key" in out:
|
|
637
|
+
out["keys"] = [out.get("key")]
|
|
638
|
+
out.pop("key", None)
|
|
639
|
+
if isinstance(out.get("keys"), str):
|
|
640
|
+
out["keys"] = [p.strip() for p in out["keys"].replace("+", " ").split() if p.strip()]
|
|
641
|
+
|
|
642
|
+
# text normalization for type
|
|
643
|
+
if name == "keyboard_type":
|
|
644
|
+
if "text" not in out and "value" in out:
|
|
645
|
+
out["text"] = out.get("value")
|
|
646
|
+
out.pop("value", None)
|
|
647
|
+
|
|
648
|
+
# ensure unit for scroll
|
|
649
|
+
if name in ("mouse_scroll", "scroll"):
|
|
650
|
+
if "unit" not in out:
|
|
651
|
+
out["unit"] = "px"
|
|
652
|
+
|
|
653
|
+
return out
|
|
654
|
+
|
|
655
|
+
def _append_call(self, tool_calls: list, id_: str, call_id: str, name: str, args: dict) -> None:
|
|
656
|
+
tool_calls.append(self._build_call(id_, call_id, name, args))
|
|
657
|
+
|
|
658
|
+
def _build_call(self, id_: str, call_id: str, name: str, args: dict) -> dict:
|
|
659
|
+
norm = self._normalize_params_for_plugin(name, args or {})
|
|
660
|
+
norm = self._filter_args_for_plugin(name, norm)
|
|
661
|
+
norm = self._prune_none(norm)
|
|
662
|
+
if name != "get_screenshot":
|
|
663
|
+
norm["no_screenshot"] = True
|
|
664
|
+
return {
|
|
665
|
+
"id": id_,
|
|
666
|
+
"call_id": call_id,
|
|
667
|
+
"type": "computer_call",
|
|
668
|
+
"function": {
|
|
669
|
+
"name": name,
|
|
670
|
+
"arguments": json.dumps(norm, ensure_ascii=False),
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
# --------------- Utils --------------- #
|
|
675
|
+
|
|
676
|
+
def _retarget_function_name_and_args(self, name: str, args: dict) -> Tuple[str, dict]:
|
|
677
|
+
"""
|
|
678
|
+
Convert action-style/synonym function names to canonical plugin command names and
|
|
679
|
+
adjust defaults (e.g., left_click/right_click/double_click).
|
|
680
|
+
"""
|
|
681
|
+
src = (name or "").strip().lower()
|
|
682
|
+
target = self.ACTION_SYNONYMS.get(src, src)
|
|
683
|
+
out = dict(args or {})
|
|
684
|
+
|
|
685
|
+
# Click synonyms defaulting button/click count
|
|
686
|
+
if src in ("left_click", "right_click"):
|
|
687
|
+
out.setdefault("button", "left" if src == "left_click" else "right")
|
|
688
|
+
out.setdefault("num_clicks", 1)
|
|
689
|
+
elif src == "double_click":
|
|
690
|
+
out.setdefault("button", "left")
|
|
691
|
+
out.setdefault("num_clicks", 2)
|
|
692
|
+
|
|
693
|
+
# coordinate -> x,y (defensive retarget)
|
|
694
|
+
if "coordinate" in out and ("x" not in out or "y" not in out):
|
|
695
|
+
coord = out.get("coordinate")
|
|
696
|
+
if isinstance(coord, (list, tuple)) and len(coord) >= 2:
|
|
697
|
+
try:
|
|
698
|
+
out["x"] = int(coord[0])
|
|
699
|
+
out["y"] = int(coord[1])
|
|
700
|
+
except Exception:
|
|
701
|
+
pass
|
|
702
|
+
elif isinstance(coord, dict) and "x" in coord and "y" in coord:
|
|
703
|
+
out["x"] = coord.get("x")
|
|
704
|
+
out["y"] = coord.get("y")
|
|
705
|
+
|
|
706
|
+
return target, out
|
|
707
|
+
|
|
708
|
+
def _filter_args_for_plugin(self, name: str, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
709
|
+
"""
|
|
710
|
+
Strict allow-list per command to ensure no unsupported keys like "action" or "coordinate"
|
|
711
|
+
are passed to Worker. Also performs a few defensive conversions (click->button, etc.).
|
|
712
|
+
"""
|
|
713
|
+
allow: Dict[str, set] = {
|
|
714
|
+
"mouse_move": {"x", "y", "click", "num_clicks"},
|
|
715
|
+
"mouse_click": {"x", "y", "button", "num_clicks"},
|
|
716
|
+
"mouse_scroll": {"x", "y", "dx", "dy", "unit"},
|
|
717
|
+
"mouse_drag": {"x", "y", "dx", "dy"},
|
|
718
|
+
"keyboard_key": {"key", "modifier"},
|
|
719
|
+
"keyboard_keys": {"keys"},
|
|
720
|
+
"keyboard_type": {"text", "modifier"},
|
|
721
|
+
"open_web_browser": {"url", "no_screenshot"},
|
|
722
|
+
"get_mouse_position": {"no_screenshot"},
|
|
723
|
+
"get_screenshot": {"no_screenshot"},
|
|
724
|
+
"wait": {"seconds", "no_screenshot"},
|
|
725
|
+
# native extras
|
|
726
|
+
"wait_5_seconds": set(),
|
|
727
|
+
"go_back": set(),
|
|
728
|
+
"go_forward": set(),
|
|
729
|
+
"search": set(),
|
|
730
|
+
"navigate": {"url"},
|
|
731
|
+
"click_at": {"x", "y"},
|
|
732
|
+
"hover_at": {"x", "y"},
|
|
733
|
+
"type_text_at": {"x", "y", "text", "press_enter", "clear_before_typing"},
|
|
734
|
+
"key_combination": {"keys"},
|
|
735
|
+
"scroll_document": {"direction", "magnitude"},
|
|
736
|
+
"scroll_at": {"direction", "magnitude", "x", "y"},
|
|
737
|
+
"drag_and_drop": {"x", "y", "destination_x", "destination_y"},
|
|
738
|
+
# action-style
|
|
739
|
+
"click": {"x", "y", "button", "num_clicks"},
|
|
740
|
+
"double_click": {"x", "y", "button", "num_clicks"},
|
|
741
|
+
"move": {"x", "y"},
|
|
742
|
+
"type": {"text"},
|
|
743
|
+
"keypress": {"keys"},
|
|
744
|
+
"scroll": {"x", "y", "dx", "dy", "unit"},
|
|
745
|
+
"drag": {"x", "y", "dx", "dy", "path"},
|
|
746
|
+
}
|
|
747
|
+
res: Dict[str, Any] = {}
|
|
748
|
+
|
|
749
|
+
# coordinate -> x,y (final defensive conversion)
|
|
750
|
+
if "coordinate" in args and ("x" not in args or "y" not in args):
|
|
751
|
+
coord = args.get("coordinate")
|
|
752
|
+
if isinstance(coord, (list, tuple)) and len(coord) >= 2:
|
|
753
|
+
try:
|
|
754
|
+
args["x"] = int(coord[0])
|
|
755
|
+
args["y"] = int(coord[1])
|
|
756
|
+
except Exception:
|
|
757
|
+
pass
|
|
758
|
+
elif isinstance(coord, dict) and "x" in coord and "y" in coord:
|
|
759
|
+
args["x"] = coord.get("x")
|
|
760
|
+
args["y"] = coord.get("y")
|
|
761
|
+
|
|
762
|
+
# action -> button/num_clicks (final defensive conversion)
|
|
763
|
+
if "action" in args and (name in ("mouse_click", "click", "mouse_move")):
|
|
764
|
+
act = str(args.get("action") or "").lower()
|
|
765
|
+
if "right" in act:
|
|
766
|
+
args["button"] = "right"
|
|
767
|
+
else:
|
|
768
|
+
args["button"] = args.get("button", "left")
|
|
769
|
+
args["num_clicks"] = 2 if ("double" in act or "dbl" in act) else int(args.get("num_clicks", 1))
|
|
770
|
+
|
|
771
|
+
# click -> button for mouse_click
|
|
772
|
+
if name in ("mouse_click", "click") and "button" not in args and "click" in args:
|
|
773
|
+
args["button"] = args.get("click")
|
|
774
|
+
|
|
775
|
+
allowed = allow.get(name)
|
|
776
|
+
if allowed is None:
|
|
777
|
+
tmp = dict(args)
|
|
778
|
+
tmp.pop("action", None)
|
|
779
|
+
tmp.pop("coordinate", None)
|
|
780
|
+
return tmp
|
|
781
|
+
|
|
782
|
+
for k in allowed:
|
|
783
|
+
if k in args and args[k] is not None:
|
|
784
|
+
res[k] = args[k]
|
|
785
|
+
|
|
786
|
+
# Normalize unit for scrolling
|
|
787
|
+
if name in ("mouse_scroll", "scroll"):
|
|
788
|
+
unit = str(res.get("unit", "px")).lower()
|
|
789
|
+
res["unit"] = "px" if unit in ("px", "pixel", "pixels") else "step"
|
|
790
|
+
|
|
791
|
+
return res
|
|
792
|
+
|
|
793
|
+
def _ensure_ctx_memory(self, ctx: CtxItem) -> Dict[str, Dict[str, str]]:
|
|
794
|
+
if not isinstance(ctx.extra, dict):
|
|
795
|
+
ctx.extra = {}
|
|
796
|
+
if "anthropic_computer" not in ctx.extra or not isinstance(ctx.extra["anthropic_computer"], dict):
|
|
797
|
+
ctx.extra["anthropic_computer"] = {
|
|
798
|
+
"buffers": {},
|
|
799
|
+
"index_to_id": {},
|
|
800
|
+
"active_ids": [],
|
|
801
|
+
}
|
|
802
|
+
else:
|
|
803
|
+
mem = ctx.extra["anthropic_computer"]
|
|
804
|
+
if "buffers" not in mem:
|
|
805
|
+
mem["buffers"] = {}
|
|
806
|
+
if "index_to_id" not in mem:
|
|
807
|
+
mem["index_to_id"] = {}
|
|
808
|
+
if "active_ids" not in mem or not isinstance(mem["active_ids"], list):
|
|
809
|
+
mem["active_ids"] = []
|
|
810
|
+
return ctx.extra["anthropic_computer"]
|
|
811
|
+
|
|
812
|
+
@staticmethod
|
|
813
|
+
def _safe_json_loads(s: str) -> Optional[Any]:
|
|
814
|
+
if not isinstance(s, str):
|
|
815
|
+
return None
|
|
816
|
+
s = s.strip()
|
|
817
|
+
if not s:
|
|
818
|
+
return None
|
|
819
|
+
try:
|
|
820
|
+
return json.loads(s)
|
|
821
|
+
except Exception:
|
|
822
|
+
try:
|
|
823
|
+
fixed = s
|
|
824
|
+
if fixed.count("{") > fixed.count("}"):
|
|
825
|
+
fixed += "}" * (fixed.count("{") - fixed.count("}"))
|
|
826
|
+
if fixed.count("[") > fixed.count("]"):
|
|
827
|
+
fixed += "]" * (fixed.count("[") - fixed.count("]"))
|
|
828
|
+
return json.loads(fixed)
|
|
829
|
+
except Exception:
|
|
830
|
+
return None
|
|
831
|
+
|
|
832
|
+
@staticmethod
|
|
833
|
+
def _gen_id(prefix: str = "ac") -> str:
|
|
834
|
+
return f"{prefix}-{int(time.time() * 1000)}"
|
|
835
|
+
|
|
836
|
+
@staticmethod
|
|
837
|
+
def _prune_none(d: Dict[str, Any]) -> Dict[str, Any]:
|
|
838
|
+
"""
|
|
839
|
+
Remove keys with None values to avoid passing None to Worker.
|
|
840
|
+
"""
|
|
841
|
+
try:
|
|
842
|
+
return {k: v for k, v in d.items() if v is not None}
|
|
843
|
+
except Exception:
|
|
844
|
+
return d
|