lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
# Tool version identifiers corresponding to Anthropic's versioned tools
|
|
4
|
+
# - 2024-10-22: Claude 3.5/3.6 (original computer use)
|
|
5
|
+
# - 2025-01-24: Claude Sonnet 3.7 and Claude 4 models
|
|
6
|
+
# - 2025-11-24: Claude Opus 4.5 (adds zoom action)
|
|
7
|
+
ToolVersion = Literal["2024-10-22", "2025-01-24", "2025-11-24"]
|
|
8
|
+
ToolType = Literal["bash", "computer", "editor"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def model_to_version(model: str) -> ToolVersion:
|
|
12
|
+
"""
|
|
13
|
+
Determine the appropriate tool version for a given model.
|
|
14
|
+
|
|
15
|
+
Model compatibility:
|
|
16
|
+
- Claude Opus 4.5 (claude-opus-4-5-*): Uses 2025-11-24 tools with zoom support
|
|
17
|
+
- Claude 4 models (claude-4-*, claude-sonnet-4-*, claude-opus-4-*, etc.): Uses 2025-01-24 tools
|
|
18
|
+
- Claude Sonnet 3.7 (deprecated): Uses 2025-01-24 tools
|
|
19
|
+
- Claude 3.5/3.6: Uses 2024-10-22 tools
|
|
20
|
+
"""
|
|
21
|
+
model_lower = model.lower()
|
|
22
|
+
|
|
23
|
+
# Check for valid model families
|
|
24
|
+
if not any(x in model_lower for x in ["opus", "sonnet", "haiku"]):
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Cannot use computer tools with model '{model}'. "
|
|
27
|
+
"Computer use requires Claude Opus, Sonnet, or Haiku models."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Claude Opus 4.5 - newest tool version with zoom support
|
|
31
|
+
# Matches: claude-opus-4-5-*, claude-4.5-opus, etc.
|
|
32
|
+
if (
|
|
33
|
+
"opus-4-5" in model_lower
|
|
34
|
+
or "opus-4.5" in model_lower
|
|
35
|
+
or "4.5-opus" in model_lower
|
|
36
|
+
):
|
|
37
|
+
return "2025-11-24"
|
|
38
|
+
|
|
39
|
+
# Claude 4 models (Sonnet 4.5, Opus 4, Sonnet 4, Haiku 4.5, etc.)
|
|
40
|
+
# Matches aliases like claude-4-sonnet, claude-4.5-sonnet
|
|
41
|
+
# and full names like claude-sonnet-4-20250514, claude-sonnet-4-5-20250929
|
|
42
|
+
claude_4_patterns = [
|
|
43
|
+
"claude-4", # alias prefix: claude-4-sonnet, claude-4-opus
|
|
44
|
+
"4.5-sonnet", # alias: claude-4.5-sonnet
|
|
45
|
+
"4.5-haiku", # alias: claude-4.5-haiku
|
|
46
|
+
"sonnet-4-5", # full name: claude-sonnet-4-5-*
|
|
47
|
+
"sonnet-4-", # full name: claude-sonnet-4-* (note trailing dash to avoid matching 3-5)
|
|
48
|
+
"opus-4-", # full name: claude-opus-4-* (but not opus-4-5 handled above)
|
|
49
|
+
"haiku-4-5", # full name: claude-haiku-4-5-*
|
|
50
|
+
]
|
|
51
|
+
if any(p in model_lower for p in claude_4_patterns):
|
|
52
|
+
return "2025-01-24"
|
|
53
|
+
|
|
54
|
+
# Claude Sonnet 3.7 (deprecated but still supported)
|
|
55
|
+
if "3.7" in model_lower or "3-7" in model_lower:
|
|
56
|
+
return "2025-01-24"
|
|
57
|
+
|
|
58
|
+
# Claude 3.5/3.6 (older models)
|
|
59
|
+
if any(x in model_lower for x in ["3.5", "3-5", "3.6", "3-6"]):
|
|
60
|
+
return "2024-10-22"
|
|
61
|
+
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"Unsupported model '{model}' for Anthropic computer use. "
|
|
64
|
+
"Supported: Claude Opus 4.5, Claude 4 models, Sonnet 3.7, or 3.5/3.6."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_beta_header(model: str) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Get the appropriate beta header for computer use with the given model.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Beta header string to use in the API request.
|
|
74
|
+
"""
|
|
75
|
+
version = model_to_version(model)
|
|
76
|
+
|
|
77
|
+
if version == "2025-11-24":
|
|
78
|
+
return "computer-use-2025-11-24"
|
|
79
|
+
elif version == "2025-01-24":
|
|
80
|
+
return "computer-use-2025-01-24"
|
|
81
|
+
else: # 2024-10-22
|
|
82
|
+
return "computer-use-2024-10-22"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_anthropic_cu_tools(
|
|
86
|
+
model: str,
|
|
87
|
+
display_width: int = 1024,
|
|
88
|
+
display_height: int = 768,
|
|
89
|
+
exclude_tools: list[ToolType] | None = None,
|
|
90
|
+
enable_zoom: bool = False,
|
|
91
|
+
) -> list[dict]:
|
|
92
|
+
"""
|
|
93
|
+
Get the computer use tools for the given model.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
model: The model name (e.g., "claude-opus-4-5-20251124", "claude-4-sonnet")
|
|
97
|
+
display_width: Display width in pixels (recommended <= 1280)
|
|
98
|
+
display_height: Display height in pixels (recommended <= 800)
|
|
99
|
+
exclude_tools: List of tool types to exclude ("bash", "computer", "editor")
|
|
100
|
+
enable_zoom: Enable zoom action for Opus 4.5 (computer_20251124 only)
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of tool definitions for the Anthropic API.
|
|
104
|
+
|
|
105
|
+
Note:
|
|
106
|
+
Keep display resolution at or below 1280x800 (WXGA) for best performance.
|
|
107
|
+
Higher resolutions may cause accuracy issues due to image resizing.
|
|
108
|
+
"""
|
|
109
|
+
version = model_to_version(model)
|
|
110
|
+
|
|
111
|
+
if version == "2024-10-22":
|
|
112
|
+
# Claude 3.5/3.6 - original computer use
|
|
113
|
+
result = [
|
|
114
|
+
{
|
|
115
|
+
"name": "computer",
|
|
116
|
+
"type": "computer_20241022",
|
|
117
|
+
"display_width_px": display_width,
|
|
118
|
+
"display_height_px": display_height,
|
|
119
|
+
"display_number": None,
|
|
120
|
+
},
|
|
121
|
+
{"name": "str_replace_editor", "type": "text_editor_20241022"},
|
|
122
|
+
{"name": "bash", "type": "bash_20241022"},
|
|
123
|
+
]
|
|
124
|
+
elif version == "2025-01-24":
|
|
125
|
+
# Claude 4 models and Sonnet 3.7
|
|
126
|
+
# Uses computer_20250124 and text_editor_20250728
|
|
127
|
+
result = [
|
|
128
|
+
{
|
|
129
|
+
"name": "computer",
|
|
130
|
+
"type": "computer_20250124",
|
|
131
|
+
"display_width_px": display_width,
|
|
132
|
+
"display_height_px": display_height,
|
|
133
|
+
"display_number": None,
|
|
134
|
+
},
|
|
135
|
+
{"name": "str_replace_based_edit_tool", "type": "text_editor_20250728"},
|
|
136
|
+
{"name": "bash", "type": "bash_20250124"},
|
|
137
|
+
]
|
|
138
|
+
elif version == "2025-11-24":
|
|
139
|
+
# Claude Opus 4.5 - newest with zoom support
|
|
140
|
+
computer_tool: dict = {
|
|
141
|
+
"name": "computer",
|
|
142
|
+
"type": "computer_20251124",
|
|
143
|
+
"display_width_px": display_width,
|
|
144
|
+
"display_height_px": display_height,
|
|
145
|
+
"display_number": None,
|
|
146
|
+
}
|
|
147
|
+
# Enable zoom action if requested (allows Claude to zoom into screen regions)
|
|
148
|
+
if enable_zoom:
|
|
149
|
+
computer_tool["enable_zoom"] = True
|
|
150
|
+
|
|
151
|
+
result = [
|
|
152
|
+
computer_tool,
|
|
153
|
+
{"name": "str_replace_based_edit_tool", "type": "text_editor_20250728"},
|
|
154
|
+
{"name": "bash", "type": "bash_20250124"},
|
|
155
|
+
]
|
|
156
|
+
else:
|
|
157
|
+
raise ValueError(f"Invalid tool version: {version}")
|
|
158
|
+
|
|
159
|
+
if exclude_tools is None:
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
if "bash" in exclude_tools:
|
|
163
|
+
result = [x for x in result if x["name"] != "bash"]
|
|
164
|
+
if "editor" in exclude_tools:
|
|
165
|
+
result = [x for x in result if "edit" not in x["name"]]
|
|
166
|
+
if "computer" in exclude_tools:
|
|
167
|
+
result = [x for x in result if x["name"] != "computer"]
|
|
168
|
+
|
|
169
|
+
return result
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def bash_tool(model: str = "claude-4-sonnet") -> dict:
|
|
173
|
+
"""
|
|
174
|
+
Get the bash tool definition for the given model.
|
|
175
|
+
|
|
176
|
+
The bash tool allows Claude to execute shell commands.
|
|
177
|
+
|
|
178
|
+
Note: Claude 3.5 requires the computer-use-2024-10-22 beta header.
|
|
179
|
+
The bash tool is generally available in Claude 4 and Sonnet 3.7.
|
|
180
|
+
"""
|
|
181
|
+
version = model_to_version(model)
|
|
182
|
+
|
|
183
|
+
if version in ("2025-11-24", "2025-01-24"):
|
|
184
|
+
return {"type": "bash_20250124", "name": "bash"}
|
|
185
|
+
else: # 2024-10-22
|
|
186
|
+
return {"type": "bash_20241022", "name": "bash"}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def text_editor_tool(model: str = "claude-4-sonnet") -> dict:
|
|
190
|
+
"""
|
|
191
|
+
Get the text editor tool definition for the given model.
|
|
192
|
+
|
|
193
|
+
The text editor tool allows Claude to view, create, and edit files.
|
|
194
|
+
|
|
195
|
+
Note:
|
|
196
|
+
- Claude 4 and Opus 4.5 use text_editor_20250728 with name "str_replace_based_edit_tool"
|
|
197
|
+
(no undo_edit command, has optional max_characters parameter)
|
|
198
|
+
- Claude Sonnet 3.7 uses text_editor_20250124 with name "str_replace_editor"
|
|
199
|
+
(includes undo_edit command)
|
|
200
|
+
- Claude 3.5/3.6 uses text_editor_20241022 with name "str_replace_editor"
|
|
201
|
+
"""
|
|
202
|
+
version = model_to_version(model)
|
|
203
|
+
|
|
204
|
+
if version in ("2025-11-24", "2025-01-24"):
|
|
205
|
+
return {"type": "text_editor_20250728", "name": "str_replace_based_edit_tool"}
|
|
206
|
+
else: # 2024-10-22
|
|
207
|
+
return {"type": "text_editor_20241022", "name": "str_replace_editor"}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def computer_tool(
|
|
211
|
+
model: str = "claude-4-sonnet",
|
|
212
|
+
display_width: int = 1024,
|
|
213
|
+
display_height: int = 768,
|
|
214
|
+
enable_zoom: bool = False,
|
|
215
|
+
) -> dict:
|
|
216
|
+
"""
|
|
217
|
+
Get the computer use tool definition for the given model.
|
|
218
|
+
|
|
219
|
+
The computer tool allows Claude to see and control desktop environments
|
|
220
|
+
through screenshots and mouse/keyboard actions.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
model: The model name
|
|
224
|
+
display_width: Display width in pixels (recommended <= 1280)
|
|
225
|
+
display_height: Display height in pixels (recommended <= 800)
|
|
226
|
+
enable_zoom: Enable zoom action (Opus 4.5 only). When enabled, Claude can
|
|
227
|
+
use the zoom action to view specific screen regions at full resolution.
|
|
228
|
+
|
|
229
|
+
Available actions by version:
|
|
230
|
+
- All versions: screenshot, left_click, type, key, mouse_move
|
|
231
|
+
- computer_20250124+: scroll, left_click_drag, right_click, middle_click,
|
|
232
|
+
double_click, triple_click, left_mouse_down, left_mouse_up, hold_key, wait
|
|
233
|
+
- computer_20251124 (Opus 4.5): All above + zoom (requires enable_zoom=True)
|
|
234
|
+
"""
|
|
235
|
+
version = model_to_version(model)
|
|
236
|
+
|
|
237
|
+
if version == "2025-11-24":
|
|
238
|
+
tool: dict = {
|
|
239
|
+
"name": "computer",
|
|
240
|
+
"type": "computer_20251124",
|
|
241
|
+
"display_width_px": display_width,
|
|
242
|
+
"display_height_px": display_height,
|
|
243
|
+
"display_number": None,
|
|
244
|
+
}
|
|
245
|
+
if enable_zoom:
|
|
246
|
+
tool["enable_zoom"] = True
|
|
247
|
+
return tool
|
|
248
|
+
elif version == "2025-01-24":
|
|
249
|
+
return {
|
|
250
|
+
"name": "computer",
|
|
251
|
+
"type": "computer_20250124",
|
|
252
|
+
"display_width_px": display_width,
|
|
253
|
+
"display_height_px": display_height,
|
|
254
|
+
"display_number": None,
|
|
255
|
+
}
|
|
256
|
+
else: # 2024-10-22
|
|
257
|
+
return {
|
|
258
|
+
"name": "computer",
|
|
259
|
+
"type": "computer_20241022",
|
|
260
|
+
"display_width_px": display_width,
|
|
261
|
+
"display_height_px": display_height,
|
|
262
|
+
"display_number": None,
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def web_search_tool(
|
|
267
|
+
max_uses: int = 5,
|
|
268
|
+
allowed_domains: list[str] | None = None,
|
|
269
|
+
blocked_domains: list[str] | None = None,
|
|
270
|
+
) -> dict:
|
|
271
|
+
"""
|
|
272
|
+
Get the web search tool definition.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
max_uses: Maximum number of searches per request (default: 5)
|
|
276
|
+
allowed_domains: Only include results from these domains
|
|
277
|
+
blocked_domains: Never include results from these domains
|
|
278
|
+
|
|
279
|
+
Note: You can use either allowed_domains or blocked_domains, but not both.
|
|
280
|
+
"""
|
|
281
|
+
res: dict = {
|
|
282
|
+
"type": "web_search_20250305",
|
|
283
|
+
"name": "web_search",
|
|
284
|
+
"max_uses": max_uses,
|
|
285
|
+
}
|
|
286
|
+
if allowed_domains:
|
|
287
|
+
res["allowed_domains"] = allowed_domains
|
|
288
|
+
if blocked_domains:
|
|
289
|
+
res["blocked_domains"] = blocked_domains
|
|
290
|
+
return res
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def code_execution_tool() -> dict:
|
|
294
|
+
"""
|
|
295
|
+
Get the code execution tool definition.
|
|
296
|
+
|
|
297
|
+
The code execution tool is currently in beta.
|
|
298
|
+
This feature requires the beta header: "anthropic-beta": "code-execution-2025-05-22"
|
|
299
|
+
"""
|
|
300
|
+
return {"type": "code_execution_20250522", "name": "code_execution"}
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gemini built-in tools including computer use.
|
|
3
|
+
|
|
4
|
+
Gemini computer use works differently from OpenAI/Anthropic:
|
|
5
|
+
- Uses a special ComputerUse tool type in the API request
|
|
6
|
+
- Returns actions as regular function_call objects
|
|
7
|
+
- Uses normalized coordinates (0-999) that must be denormalized
|
|
8
|
+
- Function responses include screenshots as FunctionResponsePart
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def computer_use_gemini(
|
|
15
|
+
environment: Literal["browser", "android"] = "browser",
|
|
16
|
+
excluded_functions: list[str] | None = None,
|
|
17
|
+
) -> dict:
|
|
18
|
+
"""
|
|
19
|
+
Create a Gemini computer use tool configuration.
|
|
20
|
+
|
|
21
|
+
This returns a dict that will be specially handled when building
|
|
22
|
+
the Gemini API request.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
environment: The environment type - "browser" or "android"
|
|
26
|
+
excluded_functions: List of predefined function names to exclude.
|
|
27
|
+
Available functions:
|
|
28
|
+
- open_web_browser, wait_5_seconds, go_back, go_forward
|
|
29
|
+
- search, navigate, click_at, hover_at, type_text_at
|
|
30
|
+
- key_combination, scroll_document, scroll_at, drag_and_drop
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
A dict that will be converted to ComputerUse tool config
|
|
34
|
+
"""
|
|
35
|
+
result: dict[str, str | list[str]] = {
|
|
36
|
+
"type": "gemini_computer_use",
|
|
37
|
+
"environment": environment,
|
|
38
|
+
}
|
|
39
|
+
if excluded_functions:
|
|
40
|
+
result["excluded_predefined_functions"] = excluded_functions
|
|
41
|
+
return result
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Constants for Gemini computer use action names
|
|
45
|
+
GEMINI_CU_ACTIONS = [
|
|
46
|
+
"open_web_browser",
|
|
47
|
+
"wait_5_seconds",
|
|
48
|
+
"go_back",
|
|
49
|
+
"go_forward",
|
|
50
|
+
"search",
|
|
51
|
+
"navigate",
|
|
52
|
+
"click_at",
|
|
53
|
+
"hover_at",
|
|
54
|
+
"type_text_at",
|
|
55
|
+
"key_combination",
|
|
56
|
+
"scroll_document",
|
|
57
|
+
"scroll_at",
|
|
58
|
+
"drag_and_drop",
|
|
59
|
+
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
def image_generation_openai():
|
|
2
|
+
# TODO: handle result properly
|
|
3
|
+
return {"type": "image_generation"}
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def code_interpreter_openai(container: dict | None = None):
|
|
7
|
+
if container is None:
|
|
8
|
+
container = {"type": "auto"}
|
|
9
|
+
return {"type": "code_interpreter", "container": container}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def local_shell_openai():
|
|
13
|
+
return {"type": "local_shell"}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def web_search_openai(
|
|
17
|
+
preview: bool = False,
|
|
18
|
+
user_location: dict | None = None,
|
|
19
|
+
allowed_domains: list[str] | None = None,
|
|
20
|
+
search_context_size: str | None = None,
|
|
21
|
+
):
|
|
22
|
+
"""OpenAI's built-in web search tool for the Responses API.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
preview: If True, use web_search_preview. If False (default), use
|
|
26
|
+
the GA web_search tool.
|
|
27
|
+
user_location: Optional approximate user location to refine search results.
|
|
28
|
+
Should be a dict with "type": "approximate" and an "approximate" key
|
|
29
|
+
containing any of: country (ISO code), city, region, timezone.
|
|
30
|
+
Note: Not supported for deep research models.
|
|
31
|
+
allowed_domains: Optional list of domains to restrict search results to.
|
|
32
|
+
Up to 100 URLs, without http/https prefix (e.g. "openai.com").
|
|
33
|
+
Only available with web_search (not preview).
|
|
34
|
+
search_context_size: Controls how much context from web search results
|
|
35
|
+
is provided to the model. Options: "low", "medium" (default), "high".
|
|
36
|
+
Higher values use more tokens but may improve response quality.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
A dict representing the web search tool configuration.
|
|
40
|
+
"""
|
|
41
|
+
tool: dict = {}
|
|
42
|
+
if preview:
|
|
43
|
+
tool["type"] = "web_search_preview"
|
|
44
|
+
if user_location:
|
|
45
|
+
tool["user_location"] = user_location
|
|
46
|
+
if search_context_size:
|
|
47
|
+
tool["search_context_size"] = search_context_size
|
|
48
|
+
return tool
|
|
49
|
+
|
|
50
|
+
# GA web_search tool
|
|
51
|
+
tool["type"] = "web_search"
|
|
52
|
+
|
|
53
|
+
if user_location:
|
|
54
|
+
tool["user_location"] = user_location
|
|
55
|
+
|
|
56
|
+
if search_context_size:
|
|
57
|
+
tool["search_context_size"] = search_context_size
|
|
58
|
+
|
|
59
|
+
# Domain filtering uses a nested filters structure
|
|
60
|
+
if allowed_domains:
|
|
61
|
+
tool["filters"] = {"allowed_domains": allowed_domains}
|
|
62
|
+
|
|
63
|
+
return tool
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def computer_use_openai(
|
|
67
|
+
display_width: int = 1024, display_height: int = 768, environment: str = "browser"
|
|
68
|
+
):
|
|
69
|
+
return {
|
|
70
|
+
"type": "computer_use_preview",
|
|
71
|
+
"display_width": display_width,
|
|
72
|
+
"display_height": display_height,
|
|
73
|
+
"environment": environment,
|
|
74
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Computer Use Actions (CUA) module.
|
|
3
|
+
|
|
4
|
+
This module provides a provider-agnostic abstraction for computer use actions
|
|
5
|
+
and executors that can run them on various backends.
|
|
6
|
+
|
|
7
|
+
Key components:
|
|
8
|
+
- CUAction: Union type of all possible computer use actions
|
|
9
|
+
- ComputerExecutor: Abstract base class for action executors
|
|
10
|
+
- KernelExecutor: Execute actions on Kernel's browser-as-a-service
|
|
11
|
+
- TryCUAExecutor: Execute actions on TryCUA's computer-server (desktop control)
|
|
12
|
+
|
|
13
|
+
Usage with Kernel (browser):
|
|
14
|
+
from lm_deluge.tool.cua import (
|
|
15
|
+
KernelBrowser,
|
|
16
|
+
KernelExecutor,
|
|
17
|
+
anthropic_tool_call_to_action,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Create a browser and executor
|
|
21
|
+
with KernelBrowser() as browser:
|
|
22
|
+
executor = KernelExecutor(browser.session_id)
|
|
23
|
+
|
|
24
|
+
# Convert Anthropic tool call to action
|
|
25
|
+
action = anthropic_tool_call_to_action(tool_call.arguments)
|
|
26
|
+
|
|
27
|
+
# Execute and get result
|
|
28
|
+
result = executor.execute(action)
|
|
29
|
+
|
|
30
|
+
Usage with TryCUA (desktop):
|
|
31
|
+
from lm_deluge.tool.cua import (
|
|
32
|
+
TryCUAConnection,
|
|
33
|
+
TryCUAExecutor,
|
|
34
|
+
Screenshot,
|
|
35
|
+
Click,
|
|
36
|
+
Type,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Connect to a TryCUA computer-server
|
|
40
|
+
with TryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
41
|
+
executor = TryCUAExecutor(conn)
|
|
42
|
+
|
|
43
|
+
# Execute actions
|
|
44
|
+
result = executor.execute(Screenshot(kind="screenshot"))
|
|
45
|
+
executor.execute(Click(kind="click", x=100, y=200, button="left"))
|
|
46
|
+
executor.execute(Type(kind="type", text="Hello!"))
|
|
47
|
+
|
|
48
|
+
# Async version
|
|
49
|
+
async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
50
|
+
executor = AsyncTryCUAExecutor(conn)
|
|
51
|
+
result = await executor.execute(Screenshot(kind="screenshot"))
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
from .actions import (
|
|
55
|
+
Bash,
|
|
56
|
+
Click,
|
|
57
|
+
CUAction,
|
|
58
|
+
CursorPos,
|
|
59
|
+
DoubleClick,
|
|
60
|
+
Drag,
|
|
61
|
+
Edit,
|
|
62
|
+
GoBack,
|
|
63
|
+
GoForward,
|
|
64
|
+
HoldKey,
|
|
65
|
+
Keypress,
|
|
66
|
+
MouseDown,
|
|
67
|
+
MouseUp,
|
|
68
|
+
Move,
|
|
69
|
+
Navigate,
|
|
70
|
+
Scroll,
|
|
71
|
+
Screenshot,
|
|
72
|
+
Search,
|
|
73
|
+
TripleClick,
|
|
74
|
+
Type,
|
|
75
|
+
Wait,
|
|
76
|
+
)
|
|
77
|
+
from .base import ComputerExecutor, CUActionResult
|
|
78
|
+
from .base import Screenshot as ScreenshotResult
|
|
79
|
+
from .converters import (
|
|
80
|
+
anthropic_tool_call_to_action,
|
|
81
|
+
openai_computer_call_to_action,
|
|
82
|
+
gemini_function_call_to_action,
|
|
83
|
+
)
|
|
84
|
+
from .batch import create_computer_batch_tool
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Lazy imports for optional dependencies
|
|
88
|
+
def __getattr__(name: str):
|
|
89
|
+
if name in (
|
|
90
|
+
"KernelBrowser",
|
|
91
|
+
"KernelExecutor",
|
|
92
|
+
"AsyncKernelBrowser",
|
|
93
|
+
"AsyncKernelExecutor",
|
|
94
|
+
):
|
|
95
|
+
from .kernel import (
|
|
96
|
+
KernelBrowser,
|
|
97
|
+
KernelExecutor,
|
|
98
|
+
AsyncKernelBrowser,
|
|
99
|
+
AsyncKernelExecutor,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
"KernelBrowser": KernelBrowser,
|
|
104
|
+
"KernelExecutor": KernelExecutor,
|
|
105
|
+
"AsyncKernelBrowser": AsyncKernelBrowser,
|
|
106
|
+
"AsyncKernelExecutor": AsyncKernelExecutor,
|
|
107
|
+
}[name]
|
|
108
|
+
if name in (
|
|
109
|
+
"TryCUAConnection",
|
|
110
|
+
"TryCUAExecutor",
|
|
111
|
+
"AsyncTryCUAConnection",
|
|
112
|
+
"AsyncTryCUAExecutor",
|
|
113
|
+
):
|
|
114
|
+
from .trycua import (
|
|
115
|
+
TryCUAConnection,
|
|
116
|
+
TryCUAExecutor,
|
|
117
|
+
AsyncTryCUAConnection,
|
|
118
|
+
AsyncTryCUAExecutor,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
"TryCUAConnection": TryCUAConnection,
|
|
123
|
+
"TryCUAExecutor": TryCUAExecutor,
|
|
124
|
+
"AsyncTryCUAConnection": AsyncTryCUAConnection,
|
|
125
|
+
"AsyncTryCUAExecutor": AsyncTryCUAExecutor,
|
|
126
|
+
}[name]
|
|
127
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
__all__ = [
|
|
131
|
+
# Actions
|
|
132
|
+
"CUAction",
|
|
133
|
+
"Click",
|
|
134
|
+
"DoubleClick",
|
|
135
|
+
"TripleClick",
|
|
136
|
+
"Move",
|
|
137
|
+
"Drag",
|
|
138
|
+
"Scroll",
|
|
139
|
+
"Keypress",
|
|
140
|
+
"Type",
|
|
141
|
+
"Wait",
|
|
142
|
+
"Screenshot",
|
|
143
|
+
"MouseDown",
|
|
144
|
+
"MouseUp",
|
|
145
|
+
"CursorPos",
|
|
146
|
+
"HoldKey",
|
|
147
|
+
"Navigate",
|
|
148
|
+
"GoBack",
|
|
149
|
+
"GoForward",
|
|
150
|
+
"Search",
|
|
151
|
+
"Bash",
|
|
152
|
+
"Edit",
|
|
153
|
+
# Base classes
|
|
154
|
+
"ComputerExecutor",
|
|
155
|
+
"CUActionResult",
|
|
156
|
+
"ScreenshotResult",
|
|
157
|
+
# Converters
|
|
158
|
+
"anthropic_tool_call_to_action",
|
|
159
|
+
"openai_computer_call_to_action",
|
|
160
|
+
"gemini_function_call_to_action",
|
|
161
|
+
# Batch tool
|
|
162
|
+
"create_computer_batch_tool",
|
|
163
|
+
# Kernel executor (lazy loaded)
|
|
164
|
+
"KernelBrowser", # pyright: ignore[reportUnsupportedDunderAll]
|
|
165
|
+
"KernelExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
166
|
+
"AsyncKernelBrowser", # pyright: ignore[reportUnsupportedDunderAll]
|
|
167
|
+
"AsyncKernelExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
168
|
+
# TryCUA executor (lazy loaded)
|
|
169
|
+
"TryCUAConnection", # pyright: ignore[reportUnsupportedDunderAll]
|
|
170
|
+
"TryCUAExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
171
|
+
"AsyncTryCUAConnection", # pyright: ignore[reportUnsupportedDunderAll]
|
|
172
|
+
"AsyncTryCUAExecutor", # pyright: ignore[reportUnsupportedDunderAll]
|
|
173
|
+
]
|