lm-deluge 0.0.82__py3-none-any.whl → 0.0.84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/api_requests/anthropic.py +9 -0
- lm_deluge/api_requests/gemini.py +34 -2
- lm_deluge/api_requests/openai.py +1 -1
- lm_deluge/client.py +100 -0
- lm_deluge/models/__init__.py +3 -1
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +14 -0
- lm_deluge/models/kimi.py +2 -0
- lm_deluge/models/openrouter.py +10 -0
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +1 -0
- lm_deluge/prompt.py +39 -11
- lm_deluge/tool/__init__.py +11 -4
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +8 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +905 -15
- lm_deluge/tool/prefab/memory.py +269 -1
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/web_search.py +195 -0
- lm_deluge/warnings.py +1 -0
- {lm_deluge-0.0.82.dist-info → lm_deluge-0.0.84.dist-info}/METADATA +1 -1
- {lm_deluge-0.0.82.dist-info → lm_deluge-0.0.84.dist-info}/RECORD +42 -28
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/llm_tools/__init__.py +0 -25
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/openai.py +0 -0
- {lm_deluge-0.0.82.dist-info → lm_deluge-0.0.84.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.82.dist-info → lm_deluge-0.0.84.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.82.dist-info → lm_deluge-0.0.84.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
# Tool version identifiers corresponding to Anthropic's versioned tools
|
|
4
|
+
# - 2024-10-22: Claude 3.5/3.6 (original computer use)
|
|
5
|
+
# - 2025-01-24: Claude Sonnet 3.7 and Claude 4 models
|
|
6
|
+
# - 2025-11-24: Claude Opus 4.5 (adds zoom action)
|
|
7
|
+
ToolVersion = Literal["2024-10-22", "2025-01-24", "2025-11-24"]
|
|
8
|
+
ToolType = Literal["bash", "computer", "editor"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def model_to_version(model: str) -> ToolVersion:
|
|
12
|
+
"""
|
|
13
|
+
Determine the appropriate tool version for a given model.
|
|
14
|
+
|
|
15
|
+
Model compatibility:
|
|
16
|
+
- Claude Opus 4.5 (claude-opus-4-5-*): Uses 2025-11-24 tools with zoom support
|
|
17
|
+
- Claude 4 models (claude-4-*, claude-sonnet-4-*, claude-opus-4-*, etc.): Uses 2025-01-24 tools
|
|
18
|
+
- Claude Sonnet 3.7 (deprecated): Uses 2025-01-24 tools
|
|
19
|
+
- Claude 3.5/3.6: Uses 2024-10-22 tools
|
|
20
|
+
"""
|
|
21
|
+
model_lower = model.lower()
|
|
22
|
+
|
|
23
|
+
# Check for valid model families
|
|
24
|
+
if not any(x in model_lower for x in ["opus", "sonnet", "haiku"]):
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Cannot use computer tools with model '{model}'. "
|
|
27
|
+
"Computer use requires Claude Opus, Sonnet, or Haiku models."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Claude Opus 4.5 - newest tool version with zoom support
|
|
31
|
+
# Matches: claude-opus-4-5-*, claude-4.5-opus, etc.
|
|
32
|
+
if (
|
|
33
|
+
"opus-4-5" in model_lower
|
|
34
|
+
or "opus-4.5" in model_lower
|
|
35
|
+
or "4.5-opus" in model_lower
|
|
36
|
+
):
|
|
37
|
+
return "2025-11-24"
|
|
38
|
+
|
|
39
|
+
# Claude 4 models (Sonnet 4.5, Opus 4, Sonnet 4, Haiku 4.5, etc.)
|
|
40
|
+
# Matches aliases like claude-4-sonnet, claude-4.5-sonnet
|
|
41
|
+
# and full names like claude-sonnet-4-20250514, claude-sonnet-4-5-20250929
|
|
42
|
+
claude_4_patterns = [
|
|
43
|
+
"claude-4", # alias prefix: claude-4-sonnet, claude-4-opus
|
|
44
|
+
"4.5-sonnet", # alias: claude-4.5-sonnet
|
|
45
|
+
"4.5-haiku", # alias: claude-4.5-haiku
|
|
46
|
+
"sonnet-4-5", # full name: claude-sonnet-4-5-*
|
|
47
|
+
"sonnet-4-", # full name: claude-sonnet-4-* (note trailing dash to avoid matching 3-5)
|
|
48
|
+
"opus-4-", # full name: claude-opus-4-* (but not opus-4-5 handled above)
|
|
49
|
+
"haiku-4-5", # full name: claude-haiku-4-5-*
|
|
50
|
+
]
|
|
51
|
+
if any(p in model_lower for p in claude_4_patterns):
|
|
52
|
+
return "2025-01-24"
|
|
53
|
+
|
|
54
|
+
# Claude Sonnet 3.7 (deprecated but still supported)
|
|
55
|
+
if "3.7" in model_lower or "3-7" in model_lower:
|
|
56
|
+
return "2025-01-24"
|
|
57
|
+
|
|
58
|
+
# Claude 3.5/3.6 (older models)
|
|
59
|
+
if any(x in model_lower for x in ["3.5", "3-5", "3.6", "3-6"]):
|
|
60
|
+
return "2024-10-22"
|
|
61
|
+
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"Unsupported model '{model}' for Anthropic computer use. "
|
|
64
|
+
"Supported: Claude Opus 4.5, Claude 4 models, Sonnet 3.7, or 3.5/3.6."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_beta_header(model: str) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Get the appropriate beta header for computer use with the given model.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Beta header string to use in the API request.
|
|
74
|
+
"""
|
|
75
|
+
version = model_to_version(model)
|
|
76
|
+
|
|
77
|
+
if version == "2025-11-24":
|
|
78
|
+
return "computer-use-2025-11-24"
|
|
79
|
+
elif version == "2025-01-24":
|
|
80
|
+
return "computer-use-2025-01-24"
|
|
81
|
+
else: # 2024-10-22
|
|
82
|
+
return "computer-use-2024-10-22"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_anthropic_cu_tools(
|
|
86
|
+
model: str,
|
|
87
|
+
display_width: int = 1024,
|
|
88
|
+
display_height: int = 768,
|
|
89
|
+
exclude_tools: list[ToolType] | None = None,
|
|
90
|
+
enable_zoom: bool = False,
|
|
91
|
+
) -> list[dict]:
|
|
92
|
+
"""
|
|
93
|
+
Get the computer use tools for the given model.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
model: The model name (e.g., "claude-opus-4-5-20251124", "claude-4-sonnet")
|
|
97
|
+
display_width: Display width in pixels (recommended <= 1280)
|
|
98
|
+
display_height: Display height in pixels (recommended <= 800)
|
|
99
|
+
exclude_tools: List of tool types to exclude ("bash", "computer", "editor")
|
|
100
|
+
enable_zoom: Enable zoom action for Opus 4.5 (computer_20251124 only)
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of tool definitions for the Anthropic API.
|
|
104
|
+
|
|
105
|
+
Note:
|
|
106
|
+
Keep display resolution at or below 1280x800 (WXGA) for best performance.
|
|
107
|
+
Higher resolutions may cause accuracy issues due to image resizing.
|
|
108
|
+
"""
|
|
109
|
+
version = model_to_version(model)
|
|
110
|
+
|
|
111
|
+
if version == "2024-10-22":
|
|
112
|
+
# Claude 3.5/3.6 - original computer use
|
|
113
|
+
result = [
|
|
114
|
+
{
|
|
115
|
+
"name": "computer",
|
|
116
|
+
"type": "computer_20241022",
|
|
117
|
+
"display_width_px": display_width,
|
|
118
|
+
"display_height_px": display_height,
|
|
119
|
+
"display_number": None,
|
|
120
|
+
},
|
|
121
|
+
{"name": "str_replace_editor", "type": "text_editor_20241022"},
|
|
122
|
+
{"name": "bash", "type": "bash_20241022"},
|
|
123
|
+
]
|
|
124
|
+
elif version == "2025-01-24":
|
|
125
|
+
# Claude 4 models and Sonnet 3.7
|
|
126
|
+
# Uses computer_20250124 and text_editor_20250728
|
|
127
|
+
result = [
|
|
128
|
+
{
|
|
129
|
+
"name": "computer",
|
|
130
|
+
"type": "computer_20250124",
|
|
131
|
+
"display_width_px": display_width,
|
|
132
|
+
"display_height_px": display_height,
|
|
133
|
+
"display_number": None,
|
|
134
|
+
},
|
|
135
|
+
{"name": "str_replace_based_edit_tool", "type": "text_editor_20250728"},
|
|
136
|
+
{"name": "bash", "type": "bash_20250124"},
|
|
137
|
+
]
|
|
138
|
+
elif version == "2025-11-24":
|
|
139
|
+
# Claude Opus 4.5 - newest with zoom support
|
|
140
|
+
computer_tool: dict = {
|
|
141
|
+
"name": "computer",
|
|
142
|
+
"type": "computer_20251124",
|
|
143
|
+
"display_width_px": display_width,
|
|
144
|
+
"display_height_px": display_height,
|
|
145
|
+
"display_number": None,
|
|
146
|
+
}
|
|
147
|
+
# Enable zoom action if requested (allows Claude to zoom into screen regions)
|
|
148
|
+
if enable_zoom:
|
|
149
|
+
computer_tool["enable_zoom"] = True
|
|
150
|
+
|
|
151
|
+
result = [
|
|
152
|
+
computer_tool,
|
|
153
|
+
{"name": "str_replace_based_edit_tool", "type": "text_editor_20250728"},
|
|
154
|
+
{"name": "bash", "type": "bash_20250124"},
|
|
155
|
+
]
|
|
156
|
+
else:
|
|
157
|
+
raise ValueError(f"Invalid tool version: {version}")
|
|
158
|
+
|
|
159
|
+
if exclude_tools is None:
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
if "bash" in exclude_tools:
|
|
163
|
+
result = [x for x in result if x["name"] != "bash"]
|
|
164
|
+
if "editor" in exclude_tools:
|
|
165
|
+
result = [x for x in result if "edit" not in x["name"]]
|
|
166
|
+
if "computer" in exclude_tools:
|
|
167
|
+
result = [x for x in result if x["name"] != "computer"]
|
|
168
|
+
|
|
169
|
+
return result
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def bash_tool(model: str = "claude-4-sonnet") -> dict:
|
|
173
|
+
"""
|
|
174
|
+
Get the bash tool definition for the given model.
|
|
175
|
+
|
|
176
|
+
The bash tool allows Claude to execute shell commands.
|
|
177
|
+
|
|
178
|
+
Note: Claude 3.5 requires the computer-use-2024-10-22 beta header.
|
|
179
|
+
The bash tool is generally available in Claude 4 and Sonnet 3.7.
|
|
180
|
+
"""
|
|
181
|
+
version = model_to_version(model)
|
|
182
|
+
|
|
183
|
+
if version in ("2025-11-24", "2025-01-24"):
|
|
184
|
+
return {"type": "bash_20250124", "name": "bash"}
|
|
185
|
+
else: # 2024-10-22
|
|
186
|
+
return {"type": "bash_20241022", "name": "bash"}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def text_editor_tool(model: str = "claude-4-sonnet") -> dict:
|
|
190
|
+
"""
|
|
191
|
+
Get the text editor tool definition for the given model.
|
|
192
|
+
|
|
193
|
+
The text editor tool allows Claude to view, create, and edit files.
|
|
194
|
+
|
|
195
|
+
Note:
|
|
196
|
+
- Claude 4 and Opus 4.5 use text_editor_20250728 with name "str_replace_based_edit_tool"
|
|
197
|
+
(no undo_edit command, has optional max_characters parameter)
|
|
198
|
+
- Claude Sonnet 3.7 uses text_editor_20250124 with name "str_replace_editor"
|
|
199
|
+
(includes undo_edit command)
|
|
200
|
+
- Claude 3.5/3.6 uses text_editor_20241022 with name "str_replace_editor"
|
|
201
|
+
"""
|
|
202
|
+
version = model_to_version(model)
|
|
203
|
+
|
|
204
|
+
if version in ("2025-11-24", "2025-01-24"):
|
|
205
|
+
return {"type": "text_editor_20250728", "name": "str_replace_based_edit_tool"}
|
|
206
|
+
else: # 2024-10-22
|
|
207
|
+
return {"type": "text_editor_20241022", "name": "str_replace_editor"}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def computer_tool(
|
|
211
|
+
model: str = "claude-4-sonnet",
|
|
212
|
+
display_width: int = 1024,
|
|
213
|
+
display_height: int = 768,
|
|
214
|
+
enable_zoom: bool = False,
|
|
215
|
+
) -> dict:
|
|
216
|
+
"""
|
|
217
|
+
Get the computer use tool definition for the given model.
|
|
218
|
+
|
|
219
|
+
The computer tool allows Claude to see and control desktop environments
|
|
220
|
+
through screenshots and mouse/keyboard actions.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
model: The model name
|
|
224
|
+
display_width: Display width in pixels (recommended <= 1280)
|
|
225
|
+
display_height: Display height in pixels (recommended <= 800)
|
|
226
|
+
enable_zoom: Enable zoom action (Opus 4.5 only). When enabled, Claude can
|
|
227
|
+
use the zoom action to view specific screen regions at full resolution.
|
|
228
|
+
|
|
229
|
+
Available actions by version:
|
|
230
|
+
- All versions: screenshot, left_click, type, key, mouse_move
|
|
231
|
+
- computer_20250124+: scroll, left_click_drag, right_click, middle_click,
|
|
232
|
+
double_click, triple_click, left_mouse_down, left_mouse_up, hold_key, wait
|
|
233
|
+
- computer_20251124 (Opus 4.5): All above + zoom (requires enable_zoom=True)
|
|
234
|
+
"""
|
|
235
|
+
version = model_to_version(model)
|
|
236
|
+
|
|
237
|
+
if version == "2025-11-24":
|
|
238
|
+
tool: dict = {
|
|
239
|
+
"name": "computer",
|
|
240
|
+
"type": "computer_20251124",
|
|
241
|
+
"display_width_px": display_width,
|
|
242
|
+
"display_height_px": display_height,
|
|
243
|
+
"display_number": None,
|
|
244
|
+
}
|
|
245
|
+
if enable_zoom:
|
|
246
|
+
tool["enable_zoom"] = True
|
|
247
|
+
return tool
|
|
248
|
+
elif version == "2025-01-24":
|
|
249
|
+
return {
|
|
250
|
+
"name": "computer",
|
|
251
|
+
"type": "computer_20250124",
|
|
252
|
+
"display_width_px": display_width,
|
|
253
|
+
"display_height_px": display_height,
|
|
254
|
+
"display_number": None,
|
|
255
|
+
}
|
|
256
|
+
else: # 2024-10-22
|
|
257
|
+
return {
|
|
258
|
+
"name": "computer",
|
|
259
|
+
"type": "computer_20241022",
|
|
260
|
+
"display_width_px": display_width,
|
|
261
|
+
"display_height_px": display_height,
|
|
262
|
+
"display_number": None,
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def web_search_tool(
|
|
267
|
+
max_uses: int = 5,
|
|
268
|
+
allowed_domains: list[str] | None = None,
|
|
269
|
+
blocked_domains: list[str] | None = None,
|
|
270
|
+
) -> dict:
|
|
271
|
+
"""
|
|
272
|
+
Get the web search tool definition.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
max_uses: Maximum number of searches per request (default: 5)
|
|
276
|
+
allowed_domains: Only include results from these domains
|
|
277
|
+
blocked_domains: Never include results from these domains
|
|
278
|
+
|
|
279
|
+
Note: You can use either allowed_domains or blocked_domains, but not both.
|
|
280
|
+
"""
|
|
281
|
+
res: dict = {
|
|
282
|
+
"type": "web_search_20250305",
|
|
283
|
+
"name": "web_search",
|
|
284
|
+
"max_uses": max_uses,
|
|
285
|
+
}
|
|
286
|
+
if allowed_domains:
|
|
287
|
+
res["allowed_domains"] = allowed_domains
|
|
288
|
+
if blocked_domains:
|
|
289
|
+
res["blocked_domains"] = blocked_domains
|
|
290
|
+
return res
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def code_execution_tool() -> dict:
|
|
294
|
+
"""
|
|
295
|
+
Get the code execution tool definition.
|
|
296
|
+
|
|
297
|
+
The code execution tool is currently in beta.
|
|
298
|
+
This feature requires the beta header: "anthropic-beta": "code-execution-2025-05-22"
|
|
299
|
+
"""
|
|
300
|
+
return {"type": "code_execution_20250522", "name": "code_execution"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gemini built-in tools including computer use.
|
|
3
|
+
|
|
4
|
+
Gemini computer use works differently from OpenAI/Anthropic:
|
|
5
|
+
- Uses a special ComputerUse tool type in the API request
|
|
6
|
+
- Returns actions as regular function_call objects
|
|
7
|
+
- Uses normalized coordinates (0-999) that must be denormalized
|
|
8
|
+
- Function responses include screenshots as FunctionResponsePart
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def computer_use_gemini(
|
|
15
|
+
environment: Literal["browser", "android"] = "browser",
|
|
16
|
+
excluded_functions: list[str] | None = None,
|
|
17
|
+
) -> dict:
|
|
18
|
+
"""
|
|
19
|
+
Create a Gemini computer use tool configuration.
|
|
20
|
+
|
|
21
|
+
This returns a dict that will be specially handled when building
|
|
22
|
+
the Gemini API request.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
environment: The environment type - "browser" or "android"
|
|
26
|
+
excluded_functions: List of predefined function names to exclude.
|
|
27
|
+
Available functions:
|
|
28
|
+
- open_web_browser, wait_5_seconds, go_back, go_forward
|
|
29
|
+
- search, navigate, click_at, hover_at, type_text_at
|
|
30
|
+
- key_combination, scroll_document, scroll_at, drag_and_drop
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
A dict that will be converted to ComputerUse tool config
|
|
34
|
+
"""
|
|
35
|
+
result: dict[str, str | list[str]] = {
|
|
36
|
+
"type": "gemini_computer_use",
|
|
37
|
+
"environment": environment,
|
|
38
|
+
}
|
|
39
|
+
if excluded_functions:
|
|
40
|
+
result["excluded_predefined_functions"] = excluded_functions
|
|
41
|
+
return result
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Constants for Gemini computer use action names
|
|
45
|
+
GEMINI_CU_ACTIONS = [
|
|
46
|
+
"open_web_browser",
|
|
47
|
+
"wait_5_seconds",
|
|
48
|
+
"go_back",
|
|
49
|
+
"go_forward",
|
|
50
|
+
"search",
|
|
51
|
+
"navigate",
|
|
52
|
+
"click_at",
|
|
53
|
+
"hover_at",
|
|
54
|
+
"type_text_at",
|
|
55
|
+
"key_combination",
|
|
56
|
+
"scroll_document",
|
|
57
|
+
"scroll_at",
|
|
58
|
+
"drag_and_drop",
|
|
59
|
+
]
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Computer Use Actions (CUA) module.
|
|
3
|
+
|
|
4
|
+
This module provides a provider-agnostic abstraction for computer use actions
|
|
5
|
+
and executors that can run them on various backends.
|
|
6
|
+
|
|
7
|
+
Key components:
|
|
8
|
+
- CUAction: Union type of all possible computer use actions
|
|
9
|
+
- ComputerExecutor: Abstract base class for action executors
|
|
10
|
+
- KernelExecutor: Execute actions on Kernel's browser-as-a-service
|
|
11
|
+
- TryCUAExecutor: Execute actions on TryCUA's computer-server (desktop control)
|
|
12
|
+
|
|
13
|
+
Usage with Kernel (browser):
|
|
14
|
+
from lm_deluge.tool.cua import (
|
|
15
|
+
KernelBrowser,
|
|
16
|
+
KernelExecutor,
|
|
17
|
+
anthropic_tool_call_to_action,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Create a browser and executor
|
|
21
|
+
with KernelBrowser() as browser:
|
|
22
|
+
executor = KernelExecutor(browser.session_id)
|
|
23
|
+
|
|
24
|
+
# Convert Anthropic tool call to action
|
|
25
|
+
action = anthropic_tool_call_to_action(tool_call.arguments)
|
|
26
|
+
|
|
27
|
+
# Execute and get result
|
|
28
|
+
result = executor.execute(action)
|
|
29
|
+
|
|
30
|
+
Usage with TryCUA (desktop):
|
|
31
|
+
from lm_deluge.tool.cua import (
|
|
32
|
+
TryCUAConnection,
|
|
33
|
+
TryCUAExecutor,
|
|
34
|
+
Screenshot,
|
|
35
|
+
Click,
|
|
36
|
+
Type,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Connect to a TryCUA computer-server
|
|
40
|
+
with TryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
41
|
+
executor = TryCUAExecutor(conn)
|
|
42
|
+
|
|
43
|
+
# Execute actions
|
|
44
|
+
result = executor.execute(Screenshot(kind="screenshot"))
|
|
45
|
+
executor.execute(Click(kind="click", x=100, y=200, button="left"))
|
|
46
|
+
executor.execute(Type(kind="type", text="Hello!"))
|
|
47
|
+
|
|
48
|
+
# Async version
|
|
49
|
+
async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
50
|
+
executor = AsyncTryCUAExecutor(conn)
|
|
51
|
+
result = await executor.execute(Screenshot(kind="screenshot"))
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
from .actions import (
|
|
55
|
+
Bash,
|
|
56
|
+
Click,
|
|
57
|
+
CUAction,
|
|
58
|
+
CursorPos,
|
|
59
|
+
DoubleClick,
|
|
60
|
+
Drag,
|
|
61
|
+
Edit,
|
|
62
|
+
GoBack,
|
|
63
|
+
GoForward,
|
|
64
|
+
HoldKey,
|
|
65
|
+
Keypress,
|
|
66
|
+
MouseDown,
|
|
67
|
+
MouseUp,
|
|
68
|
+
Move,
|
|
69
|
+
Navigate,
|
|
70
|
+
Scroll,
|
|
71
|
+
Screenshot,
|
|
72
|
+
Search,
|
|
73
|
+
TripleClick,
|
|
74
|
+
Type,
|
|
75
|
+
Wait,
|
|
76
|
+
)
|
|
77
|
+
from .base import ComputerExecutor, CUActionResult
|
|
78
|
+
from .base import Screenshot as ScreenshotResult
|
|
79
|
+
from .converters import (
|
|
80
|
+
anthropic_tool_call_to_action,
|
|
81
|
+
openai_computer_call_to_action,
|
|
82
|
+
gemini_function_call_to_action,
|
|
83
|
+
)
|
|
84
|
+
from .batch import create_computer_batch_tool
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Lazy imports for optional dependencies
|
|
88
|
+
def __getattr__(name: str):
|
|
89
|
+
if name in (
|
|
90
|
+
"KernelBrowser",
|
|
91
|
+
"KernelExecutor",
|
|
92
|
+
"AsyncKernelBrowser",
|
|
93
|
+
"AsyncKernelExecutor",
|
|
94
|
+
):
|
|
95
|
+
from .kernel import (
|
|
96
|
+
KernelBrowser,
|
|
97
|
+
KernelExecutor,
|
|
98
|
+
AsyncKernelBrowser,
|
|
99
|
+
AsyncKernelExecutor,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
"KernelBrowser": KernelBrowser,
|
|
104
|
+
"KernelExecutor": KernelExecutor,
|
|
105
|
+
"AsyncKernelBrowser": AsyncKernelBrowser,
|
|
106
|
+
"AsyncKernelExecutor": AsyncKernelExecutor,
|
|
107
|
+
}[name]
|
|
108
|
+
if name in (
|
|
109
|
+
"TryCUAConnection",
|
|
110
|
+
"TryCUAExecutor",
|
|
111
|
+
"AsyncTryCUAConnection",
|
|
112
|
+
"AsyncTryCUAExecutor",
|
|
113
|
+
):
|
|
114
|
+
from .trycua import (
|
|
115
|
+
TryCUAConnection,
|
|
116
|
+
TryCUAExecutor,
|
|
117
|
+
AsyncTryCUAConnection,
|
|
118
|
+
AsyncTryCUAExecutor,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
"TryCUAConnection": TryCUAConnection,
|
|
123
|
+
"TryCUAExecutor": TryCUAExecutor,
|
|
124
|
+
"AsyncTryCUAConnection": AsyncTryCUAConnection,
|
|
125
|
+
"AsyncTryCUAExecutor": AsyncTryCUAExecutor,
|
|
126
|
+
}[name]
|
|
127
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
__all__ = [
|
|
131
|
+
# Actions
|
|
132
|
+
"CUAction",
|
|
133
|
+
"Click",
|
|
134
|
+
"DoubleClick",
|
|
135
|
+
"TripleClick",
|
|
136
|
+
"Move",
|
|
137
|
+
"Drag",
|
|
138
|
+
"Scroll",
|
|
139
|
+
"Keypress",
|
|
140
|
+
"Type",
|
|
141
|
+
"Wait",
|
|
142
|
+
"Screenshot",
|
|
143
|
+
"MouseDown",
|
|
144
|
+
"MouseUp",
|
|
145
|
+
"CursorPos",
|
|
146
|
+
"HoldKey",
|
|
147
|
+
"Navigate",
|
|
148
|
+
"GoBack",
|
|
149
|
+
"GoForward",
|
|
150
|
+
"Search",
|
|
151
|
+
"Bash",
|
|
152
|
+
"Edit",
|
|
153
|
+
# Base classes
|
|
154
|
+
"ComputerExecutor",
|
|
155
|
+
"CUActionResult",
|
|
156
|
+
"ScreenshotResult",
|
|
157
|
+
# Converters
|
|
158
|
+
"anthropic_tool_call_to_action",
|
|
159
|
+
"openai_computer_call_to_action",
|
|
160
|
+
"gemini_function_call_to_action",
|
|
161
|
+
# Batch tool
|
|
162
|
+
"create_computer_batch_tool",
|
|
163
|
+
# Kernel executor (lazy loaded)
|
|
164
|
+
"KernelBrowser", # pyright: ignore[reportUnsupportedDunderAll]
|
|
165
|
+
"KernelExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
166
|
+
"AsyncKernelBrowser", # pyright: ignore[reportUnsupportedDunderAll]
|
|
167
|
+
"AsyncKernelExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
168
|
+
# TryCUA executor (lazy loaded)
|
|
169
|
+
"TryCUAConnection", # pyright: ignore[reportUnsupportedDunderAll]
|
|
170
|
+
"TryCUAExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
171
|
+
"AsyncTryCUAConnection", # pyright: ignore[reportUnsupportedDunderAll]
|
|
172
|
+
"AsyncTryCUAExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
173
|
+
]
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from typing import Any, List, Literal, TypedDict, Union
|
|
2
|
+
|
|
3
|
+
Coord = tuple[int, int]
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CUActionBase(TypedDict):
|
|
7
|
+
kind: str | Any # discriminator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Click(CUActionBase):
|
|
11
|
+
kind: Literal["click"]
|
|
12
|
+
x: int | None # if missing, current cursor position
|
|
13
|
+
y: int | None
|
|
14
|
+
button: Literal["left", "right", "middle", "back", "forward"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DoubleClick(CUActionBase):
|
|
18
|
+
kind: Literal["double_click"]
|
|
19
|
+
x: int | None # if missing, current cursor position
|
|
20
|
+
y: int | None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Move(CUActionBase):
|
|
24
|
+
kind: Literal["move"]
|
|
25
|
+
x: int
|
|
26
|
+
y: int
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Drag(CUActionBase):
|
|
30
|
+
kind: Literal["drag"]
|
|
31
|
+
start_x: int | None # if missing, current cursor position
|
|
32
|
+
start_y: int | None # if missing, current cursor position
|
|
33
|
+
path: List[Coord] # path to drag after mousedown
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Scroll(CUActionBase):
|
|
37
|
+
kind: Literal["scroll"]
|
|
38
|
+
x: int | None # if not provided, current cursor position
|
|
39
|
+
y: int | None # if not provided, current cursor position
|
|
40
|
+
dx: int # scroll_x in OpenAI
|
|
41
|
+
dy: int # scroll_y in OpenAI
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Keypress(CUActionBase):
|
|
45
|
+
kind: Literal["keypress"]
|
|
46
|
+
keys: List[str]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Type(CUActionBase):
|
|
50
|
+
kind: Literal["type"]
|
|
51
|
+
text: str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Wait(CUActionBase):
|
|
55
|
+
kind: Literal["wait"]
|
|
56
|
+
ms: int
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Screenshot(CUActionBase):
|
|
60
|
+
kind: Literal["screenshot"]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class MouseDown(CUActionBase):
|
|
64
|
+
kind: Literal["mouse_down"]
|
|
65
|
+
button: Literal["left", "right", "middle", "back", "forward"]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class MouseUp(CUActionBase):
|
|
69
|
+
kind: Literal["mouse_up"]
|
|
70
|
+
button: Literal["left", "right", "middle", "back", "forward"]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class CursorPos(CUActionBase):
|
|
74
|
+
kind: Literal["cursor_position"]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class HoldKey(CUActionBase):
|
|
78
|
+
kind: Literal["hold_key"]
|
|
79
|
+
key: str
|
|
80
|
+
ms: int # duration
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TripleClick(CUActionBase):
|
|
84
|
+
kind: Literal["triple_click"]
|
|
85
|
+
x: int | None # if missing, current cursor position
|
|
86
|
+
y: int | None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ── Browser‑level actions ────────────────────────────────────────────
|
|
90
|
+
class Navigate(CUActionBase):
|
|
91
|
+
kind: Literal["navigate"]
|
|
92
|
+
url: str
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class GoBack(CUActionBase):
|
|
96
|
+
kind: Literal["go_back"]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class GoForward(CUActionBase):
|
|
100
|
+
kind: Literal["go_forward"]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class Search(CUActionBase):
|
|
104
|
+
kind: Literal["search"]
|
|
105
|
+
query: str
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ── Bash / Editor (provider‑independent) ────────────────────────────
|
|
109
|
+
class Bash(CUActionBase):
|
|
110
|
+
kind: Literal["bash"]
|
|
111
|
+
command: str | None
|
|
112
|
+
restart: bool | None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class Edit(CUActionBase):
|
|
116
|
+
kind: Literal["edit"]
|
|
117
|
+
command: Literal["view", "create", "str_replace", "insert", "undo_edit"]
|
|
118
|
+
path: str
|
|
119
|
+
# optional, keep names identical to Anthropic spec
|
|
120
|
+
file_text: str | None
|
|
121
|
+
view_range: List[int] | None
|
|
122
|
+
old_str: str | None
|
|
123
|
+
new_str: str | None
|
|
124
|
+
insert_line: int | None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
CUAction = Union[
|
|
128
|
+
Click,
|
|
129
|
+
DoubleClick,
|
|
130
|
+
TripleClick,
|
|
131
|
+
MouseDown,
|
|
132
|
+
MouseUp,
|
|
133
|
+
Drag,
|
|
134
|
+
Move,
|
|
135
|
+
Scroll,
|
|
136
|
+
Keypress,
|
|
137
|
+
Type,
|
|
138
|
+
HoldKey,
|
|
139
|
+
Wait,
|
|
140
|
+
Screenshot,
|
|
141
|
+
CursorPos,
|
|
142
|
+
Navigate,
|
|
143
|
+
GoBack,
|
|
144
|
+
GoForward,
|
|
145
|
+
Search,
|
|
146
|
+
Bash,
|
|
147
|
+
Edit,
|
|
148
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import TypedDict
|
|
3
|
+
|
|
4
|
+
from .actions import CUAction
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Screenshot(TypedDict):
|
|
8
|
+
media_type: str
|
|
9
|
+
content: bytes
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CUActionResult(TypedDict):
|
|
13
|
+
screenshot: Screenshot | None
|
|
14
|
+
data: dict # for structured metadata
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ComputerExecutor(abc.ABC):
|
|
18
|
+
"""
|
|
19
|
+
A computer executor is any class that can take an action (from actions.py)
|
|
20
|
+
and "execute" it. This allows us to plug any API provider (OpenAI, Anthropic)
|
|
21
|
+
into any computer-use backend (BrowserBase, Kernel, Modal sandbox) by:
|
|
22
|
+
- Mapping each provider's tools to some (sub)set of CUActions
|
|
23
|
+
- Defining how to run each CUAction on that backend
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def execute(self, action: CUAction) -> CUActionResult:
|
|
27
|
+
raise NotImplementedError("Subclasses must implement execute method")
|