lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Converters from provider-specific computer use formats to CUAction.
|
|
3
|
+
|
|
4
|
+
This module handles the mapping between:
|
|
5
|
+
- Anthropic's computer tool call arguments
|
|
6
|
+
- OpenAI's computer_call action format
|
|
7
|
+
- The provider-agnostic CUAction format
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .actions import (
|
|
13
|
+
Click,
|
|
14
|
+
CUAction,
|
|
15
|
+
CursorPos,
|
|
16
|
+
DoubleClick,
|
|
17
|
+
Drag,
|
|
18
|
+
GoBack,
|
|
19
|
+
GoForward,
|
|
20
|
+
HoldKey,
|
|
21
|
+
Keypress,
|
|
22
|
+
MouseDown,
|
|
23
|
+
MouseUp,
|
|
24
|
+
Move,
|
|
25
|
+
Navigate,
|
|
26
|
+
Scroll,
|
|
27
|
+
Screenshot,
|
|
28
|
+
Search,
|
|
29
|
+
TripleClick,
|
|
30
|
+
Type,
|
|
31
|
+
Wait,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def anthropic_tool_call_to_action(arguments: dict[str, Any]) -> CUAction:
|
|
36
|
+
"""
|
|
37
|
+
Convert Anthropic computer tool call arguments to a CUAction.
|
|
38
|
+
|
|
39
|
+
Anthropic's computer tool uses an "action" field to specify the action type,
|
|
40
|
+
with additional fields depending on the action.
|
|
41
|
+
|
|
42
|
+
Supported actions:
|
|
43
|
+
- screenshot: Take a screenshot
|
|
44
|
+
- left_click, right_click, middle_click: Click at coordinates
|
|
45
|
+
- double_click, triple_click: Multi-click at coordinates
|
|
46
|
+
- mouse_move: Move cursor to coordinates
|
|
47
|
+
- left_click_drag: Drag from current position to target
|
|
48
|
+
- scroll: Scroll at position by delta amounts
|
|
49
|
+
- type: Type text
|
|
50
|
+
- key: Press key combination
|
|
51
|
+
- wait: Wait for milliseconds
|
|
52
|
+
- left_mouse_down, left_mouse_up: Fine-grained mouse control
|
|
53
|
+
- hold_key: Hold a key for duration
|
|
54
|
+
- cursor_position: Get current cursor position
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
arguments: The "input" dict from Anthropic's tool_use block
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
A CUAction that can be passed to a ComputerExecutor
|
|
61
|
+
"""
|
|
62
|
+
action = arguments.get("action")
|
|
63
|
+
|
|
64
|
+
if action == "screenshot":
|
|
65
|
+
return Screenshot(kind="screenshot")
|
|
66
|
+
|
|
67
|
+
elif action == "left_click":
|
|
68
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
69
|
+
return Click(
|
|
70
|
+
kind="click",
|
|
71
|
+
x=coord[0] if coord else None,
|
|
72
|
+
y=coord[1] if coord else None,
|
|
73
|
+
button="left",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
elif action == "right_click":
|
|
77
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
78
|
+
return Click(
|
|
79
|
+
kind="click",
|
|
80
|
+
x=coord[0] if coord else None,
|
|
81
|
+
y=coord[1] if coord else None,
|
|
82
|
+
button="right",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
elif action == "middle_click":
|
|
86
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
87
|
+
return Click(
|
|
88
|
+
kind="click",
|
|
89
|
+
x=coord[0] if coord else None,
|
|
90
|
+
y=coord[1] if coord else None,
|
|
91
|
+
button="middle",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
elif action == "double_click":
|
|
95
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
96
|
+
return DoubleClick(
|
|
97
|
+
kind="double_click",
|
|
98
|
+
x=coord[0] if coord else None,
|
|
99
|
+
y=coord[1] if coord else None,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
elif action == "triple_click":
|
|
103
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
104
|
+
return TripleClick(
|
|
105
|
+
kind="triple_click",
|
|
106
|
+
x=coord[0] if coord else None,
|
|
107
|
+
y=coord[1] if coord else None,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
elif action == "mouse_move":
|
|
111
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
112
|
+
return Move(
|
|
113
|
+
kind="move",
|
|
114
|
+
x=coord[0],
|
|
115
|
+
y=coord[1],
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
elif action == "left_click_drag":
|
|
119
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
120
|
+
start_coord = arguments.get("start_coordinate")
|
|
121
|
+
return Drag(
|
|
122
|
+
kind="drag",
|
|
123
|
+
start_x=start_coord[0] if start_coord else None,
|
|
124
|
+
start_y=start_coord[1] if start_coord else None,
|
|
125
|
+
path=[(coord[0], coord[1])], # End point as the path
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
elif action == "scroll":
|
|
129
|
+
coord = arguments.get("coordinate", [0, 0])
|
|
130
|
+
# Anthropic uses scroll_direction or scroll_amount
|
|
131
|
+
# scroll_direction can be "up", "down", "left", "right"
|
|
132
|
+
# scroll_amount is the number of "clicks" to scroll
|
|
133
|
+
direction = arguments.get("scroll_direction", "down")
|
|
134
|
+
amount = arguments.get("scroll_amount", 3)
|
|
135
|
+
|
|
136
|
+
# Convert direction to delta values
|
|
137
|
+
# Positive delta_y = scroll down, negative = scroll up
|
|
138
|
+
# Positive delta_x = scroll right, negative = scroll left
|
|
139
|
+
dx, dy = 0, 0
|
|
140
|
+
pixels_per_click = 120 # Standard scroll amount
|
|
141
|
+
|
|
142
|
+
if direction == "down":
|
|
143
|
+
dy = amount * pixels_per_click
|
|
144
|
+
elif direction == "up":
|
|
145
|
+
dy = -amount * pixels_per_click
|
|
146
|
+
elif direction == "right":
|
|
147
|
+
dx = amount * pixels_per_click
|
|
148
|
+
elif direction == "left":
|
|
149
|
+
dx = -amount * pixels_per_click
|
|
150
|
+
|
|
151
|
+
return Scroll(
|
|
152
|
+
kind="scroll",
|
|
153
|
+
x=coord[0] if coord else None,
|
|
154
|
+
y=coord[1] if coord else None,
|
|
155
|
+
dx=dx,
|
|
156
|
+
dy=dy,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
elif action == "type":
|
|
160
|
+
return Type(
|
|
161
|
+
kind="type",
|
|
162
|
+
text=arguments.get("text", ""),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
elif action == "key":
|
|
166
|
+
# Anthropic's computer tool uses "text" parameter for key presses
|
|
167
|
+
# e.g., {"action": "key", "text": "Return"} or {"action": "key", "text": "ctrl+a"}
|
|
168
|
+
key = arguments.get("text", "")
|
|
169
|
+
# Normalize to a list for our format
|
|
170
|
+
return Keypress(
|
|
171
|
+
kind="keypress",
|
|
172
|
+
keys=[key] if key else [],
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
elif action == "wait":
|
|
176
|
+
# Anthropic sends duration in seconds (float)
|
|
177
|
+
duration = arguments.get("duration", 1.0)
|
|
178
|
+
return Wait(
|
|
179
|
+
kind="wait",
|
|
180
|
+
ms=int(duration * 1000),
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
elif action == "left_mouse_down":
|
|
184
|
+
return MouseDown(
|
|
185
|
+
kind="mouse_down",
|
|
186
|
+
button="left",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
elif action == "left_mouse_up":
|
|
190
|
+
return MouseUp(
|
|
191
|
+
kind="mouse_up",
|
|
192
|
+
button="left",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
elif action == "hold_key":
|
|
196
|
+
key = arguments.get("key", "")
|
|
197
|
+
duration = arguments.get("duration", 0.5)
|
|
198
|
+
return HoldKey(
|
|
199
|
+
kind="hold_key",
|
|
200
|
+
key=key,
|
|
201
|
+
ms=int(duration * 1000),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
elif action == "cursor_position":
|
|
205
|
+
return CursorPos(kind="cursor_position")
|
|
206
|
+
|
|
207
|
+
else:
|
|
208
|
+
raise ValueError(f"Unknown Anthropic computer action: {action}")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def openai_computer_call_to_action(action_data: dict[str, Any]) -> CUAction:
|
|
212
|
+
"""
|
|
213
|
+
Convert OpenAI Responses API computer_call action to a CUAction.
|
|
214
|
+
|
|
215
|
+
OpenAI's computer_call uses a "type" field within the action object
|
|
216
|
+
to specify the action type.
|
|
217
|
+
|
|
218
|
+
Supported action types:
|
|
219
|
+
- screenshot: Take a screenshot
|
|
220
|
+
- click: Click at x, y with button
|
|
221
|
+
- double_click: Double click at x, y
|
|
222
|
+
- scroll: Scroll at x, y by scroll_x, scroll_y
|
|
223
|
+
- type: Type text
|
|
224
|
+
- keypress: Press keys
|
|
225
|
+
- move: Move cursor to x, y
|
|
226
|
+
- drag: Drag along a path
|
|
227
|
+
- wait: Wait for milliseconds
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
action_data: The "action" dict from OpenAI's computer_call
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
A CUAction that can be passed to a ComputerExecutor
|
|
234
|
+
"""
|
|
235
|
+
action_type = action_data.get("type")
|
|
236
|
+
|
|
237
|
+
if action_type == "screenshot":
|
|
238
|
+
return Screenshot(kind="screenshot")
|
|
239
|
+
|
|
240
|
+
elif action_type == "click":
|
|
241
|
+
return Click(
|
|
242
|
+
kind="click",
|
|
243
|
+
x=action_data.get("x"),
|
|
244
|
+
y=action_data.get("y"),
|
|
245
|
+
button=action_data.get("button", "left"),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
elif action_type == "double_click":
|
|
249
|
+
return DoubleClick(
|
|
250
|
+
kind="double_click",
|
|
251
|
+
x=action_data.get("x"),
|
|
252
|
+
y=action_data.get("y"),
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
elif action_type == "scroll":
|
|
256
|
+
return Scroll(
|
|
257
|
+
kind="scroll",
|
|
258
|
+
x=action_data.get("x"),
|
|
259
|
+
y=action_data.get("y"),
|
|
260
|
+
dx=action_data.get("scroll_x", 0),
|
|
261
|
+
dy=action_data.get("scroll_y", 0),
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
elif action_type == "type":
|
|
265
|
+
return Type(
|
|
266
|
+
kind="type",
|
|
267
|
+
text=action_data.get("text", ""),
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
elif action_type == "keypress":
|
|
271
|
+
return Keypress(
|
|
272
|
+
kind="keypress",
|
|
273
|
+
keys=action_data.get("keys", []),
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
elif action_type == "move":
|
|
277
|
+
return Move(
|
|
278
|
+
kind="move",
|
|
279
|
+
x=action_data.get("x", 0),
|
|
280
|
+
y=action_data.get("y", 0),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
elif action_type == "drag":
|
|
284
|
+
path = action_data.get("path", [])
|
|
285
|
+
return Drag(
|
|
286
|
+
kind="drag",
|
|
287
|
+
start_x=action_data.get("start_x"),
|
|
288
|
+
start_y=action_data.get("start_y"),
|
|
289
|
+
path=path,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
elif action_type == "wait":
|
|
293
|
+
return Wait(
|
|
294
|
+
kind="wait",
|
|
295
|
+
ms=action_data.get("ms", 1000),
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
else:
|
|
299
|
+
raise ValueError(f"Unknown OpenAI computer action type: {action_type}")
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def gemini_function_call_to_action(
|
|
303
|
+
function_name: str,
|
|
304
|
+
args: dict[str, Any],
|
|
305
|
+
screen_width: int = 1440,
|
|
306
|
+
screen_height: int = 900,
|
|
307
|
+
) -> CUAction:
|
|
308
|
+
"""
|
|
309
|
+
Convert Gemini computer use function call to a CUAction.
|
|
310
|
+
|
|
311
|
+
Gemini uses regular function calls for computer use actions. The coordinates
|
|
312
|
+
are normalized to a 0-999 scale and must be denormalized to actual pixels.
|
|
313
|
+
|
|
314
|
+
Supported function names:
|
|
315
|
+
- click_at: x, y (normalized)
|
|
316
|
+
- type_text_at: x, y, text, press_enter, clear_before_typing
|
|
317
|
+
- scroll_at: x, y, direction, magnitude (normalized)
|
|
318
|
+
- scroll_document: direction
|
|
319
|
+
- drag_and_drop: x, y, destination_x, destination_y
|
|
320
|
+
- key_combination: keys (string like "Control+C")
|
|
321
|
+
- navigate: url
|
|
322
|
+
- hover_at: x, y
|
|
323
|
+
- go_back, go_forward, search, open_web_browser, wait_5_seconds
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
function_name: The function name from Gemini's function_call
|
|
327
|
+
args: The arguments dict from the function_call
|
|
328
|
+
screen_width: Screen width for denormalizing coordinates (default 1440)
|
|
329
|
+
screen_height: Screen height for denormalizing coordinates (default 900)
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
A CUAction that can be passed to a ComputerExecutor
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
def denormalize_x(x: int) -> int:
|
|
336
|
+
"""Convert normalized x coordinate (0-999) to actual pixel coordinate."""
|
|
337
|
+
return int(x / 1000 * screen_width)
|
|
338
|
+
|
|
339
|
+
def denormalize_y(y: int) -> int:
|
|
340
|
+
"""Convert normalized y coordinate (0-999) to actual pixel coordinate."""
|
|
341
|
+
return int(y / 1000 * screen_height)
|
|
342
|
+
|
|
343
|
+
if function_name == "click_at":
|
|
344
|
+
return Click(
|
|
345
|
+
kind="click",
|
|
346
|
+
x=denormalize_x(args.get("x", 0)),
|
|
347
|
+
y=denormalize_y(args.get("y", 0)),
|
|
348
|
+
button="left",
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
elif function_name == "hover_at":
|
|
352
|
+
return Move(
|
|
353
|
+
kind="move",
|
|
354
|
+
x=denormalize_x(args.get("x", 0)),
|
|
355
|
+
y=denormalize_y(args.get("y", 0)),
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
elif function_name == "type_text_at":
|
|
359
|
+
# Gemini's type_text_at includes click, clear, type, and optional enter
|
|
360
|
+
# We'll return just the Type action - the test code should handle the rest
|
|
361
|
+
# (clicking at position, clearing field if needed)
|
|
362
|
+
return Type(
|
|
363
|
+
kind="type",
|
|
364
|
+
text=args.get("text", ""),
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
elif function_name == "key_combination":
|
|
368
|
+
# Gemini uses plus-separated keys like "Control+C"
|
|
369
|
+
keys_str = args.get("keys", "")
|
|
370
|
+
return Keypress(
|
|
371
|
+
kind="keypress",
|
|
372
|
+
keys=[keys_str] if keys_str else [],
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
elif function_name == "scroll_at":
|
|
376
|
+
direction = args.get("direction", "down")
|
|
377
|
+
magnitude = args.get("magnitude", 800) # Default magnitude in normalized units
|
|
378
|
+
|
|
379
|
+
# Convert magnitude (0-999) to pixels
|
|
380
|
+
mag_pixels = int(magnitude / 1000 * max(screen_width, screen_height))
|
|
381
|
+
|
|
382
|
+
dx, dy = 0, 0
|
|
383
|
+
if direction == "down":
|
|
384
|
+
dy = mag_pixels
|
|
385
|
+
elif direction == "up":
|
|
386
|
+
dy = -mag_pixels
|
|
387
|
+
elif direction == "right":
|
|
388
|
+
dx = mag_pixels
|
|
389
|
+
elif direction == "left":
|
|
390
|
+
dx = -mag_pixels
|
|
391
|
+
|
|
392
|
+
return Scroll(
|
|
393
|
+
kind="scroll",
|
|
394
|
+
x=denormalize_x(args.get("x", 500)),
|
|
395
|
+
y=denormalize_y(args.get("y", 500)),
|
|
396
|
+
dx=dx,
|
|
397
|
+
dy=dy,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
elif function_name == "scroll_document":
|
|
401
|
+
direction = args.get("direction", "down")
|
|
402
|
+
# Use a default scroll amount
|
|
403
|
+
scroll_amount = 300
|
|
404
|
+
|
|
405
|
+
dx, dy = 0, 0
|
|
406
|
+
if direction == "down":
|
|
407
|
+
dy = scroll_amount
|
|
408
|
+
elif direction == "up":
|
|
409
|
+
dy = -scroll_amount
|
|
410
|
+
elif direction == "right":
|
|
411
|
+
dx = scroll_amount
|
|
412
|
+
elif direction == "left":
|
|
413
|
+
dx = -scroll_amount
|
|
414
|
+
|
|
415
|
+
return Scroll(
|
|
416
|
+
kind="scroll",
|
|
417
|
+
x=screen_width // 2,
|
|
418
|
+
y=screen_height // 2,
|
|
419
|
+
dx=dx,
|
|
420
|
+
dy=dy,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
elif function_name == "drag_and_drop":
|
|
424
|
+
return Drag(
|
|
425
|
+
kind="drag",
|
|
426
|
+
start_x=denormalize_x(args.get("x", 0)),
|
|
427
|
+
start_y=denormalize_y(args.get("y", 0)),
|
|
428
|
+
path=[
|
|
429
|
+
(
|
|
430
|
+
denormalize_x(args.get("destination_x", 0)),
|
|
431
|
+
denormalize_y(args.get("destination_y", 0)),
|
|
432
|
+
)
|
|
433
|
+
],
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
elif function_name == "navigate":
|
|
437
|
+
return Navigate(
|
|
438
|
+
kind="navigate",
|
|
439
|
+
url=args.get("url", ""),
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
elif function_name == "wait_5_seconds":
|
|
443
|
+
return Wait(
|
|
444
|
+
kind="wait",
|
|
445
|
+
ms=5000,
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
elif function_name == "go_back":
|
|
449
|
+
return GoBack(kind="go_back")
|
|
450
|
+
|
|
451
|
+
elif function_name == "go_forward":
|
|
452
|
+
return GoForward(kind="go_forward")
|
|
453
|
+
|
|
454
|
+
elif function_name == "search":
|
|
455
|
+
return Search(
|
|
456
|
+
kind="search",
|
|
457
|
+
query=args.get("query", ""),
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
elif function_name == "open_web_browser":
|
|
461
|
+
# No-op for browser environments - browser is already open
|
|
462
|
+
# Return a wait with 0ms as a no-op
|
|
463
|
+
return Wait(kind="wait", ms=0)
|
|
464
|
+
|
|
465
|
+
else:
|
|
466
|
+
raise ValueError(f"Unknown Gemini computer use function: {function_name}")
|