lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,989 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TryCUA (cua.ai) implementation of ComputerExecutor.
|
|
3
|
+
|
|
4
|
+
This module provides a ComputerExecutor that connects to a TryCUA computer-server
|
|
5
|
+
instance via WebSocket to execute computer use actions on a remote desktop.
|
|
6
|
+
|
|
7
|
+
The computer-server can be:
|
|
8
|
+
- A local instance: ws://localhost:8000/ws
|
|
9
|
+
- A cloud instance: wss://your-container.containers.cloud.trycua.com:8443/ws
|
|
10
|
+
|
|
11
|
+
No SDK required - communicates directly via WebSocket using JSON messages.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import base64
|
|
18
|
+
import json
|
|
19
|
+
import time
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from .actions import (
|
|
23
|
+
Bash,
|
|
24
|
+
Click,
|
|
25
|
+
CUAction,
|
|
26
|
+
DoubleClick,
|
|
27
|
+
Drag,
|
|
28
|
+
GoBack,
|
|
29
|
+
GoForward,
|
|
30
|
+
HoldKey,
|
|
31
|
+
Keypress,
|
|
32
|
+
MouseDown,
|
|
33
|
+
MouseUp,
|
|
34
|
+
Move,
|
|
35
|
+
Navigate,
|
|
36
|
+
Scroll,
|
|
37
|
+
Search,
|
|
38
|
+
TripleClick,
|
|
39
|
+
Type,
|
|
40
|
+
Wait,
|
|
41
|
+
)
|
|
42
|
+
from .base import ComputerExecutor, CUActionResult, Screenshot as ScreenshotResult
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class TryCUAConnection:
|
|
46
|
+
"""
|
|
47
|
+
Manages a WebSocket connection to a TryCUA computer-server.
|
|
48
|
+
|
|
49
|
+
Usage:
|
|
50
|
+
# Sync context manager
|
|
51
|
+
with TryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
52
|
+
executor = TryCUAExecutor(conn)
|
|
53
|
+
result = executor.execute(Screenshot(kind="screenshot"))
|
|
54
|
+
|
|
55
|
+
# Async context manager
|
|
56
|
+
async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
57
|
+
executor = AsyncTryCUAExecutor(conn)
|
|
58
|
+
result = await executor.execute(Screenshot(kind="screenshot"))
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
ws_url: str = "ws://localhost:8000/ws",
|
|
64
|
+
*,
|
|
65
|
+
container_name: str | None = None,
|
|
66
|
+
api_key: str | None = None,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize a TryCUA connection configuration.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
ws_url: WebSocket URL to the computer-server
|
|
73
|
+
container_name: Container name for cloud authentication (optional)
|
|
74
|
+
api_key: API key for cloud authentication (optional)
|
|
75
|
+
"""
|
|
76
|
+
self.ws_url = ws_url
|
|
77
|
+
self.container_name = container_name
|
|
78
|
+
self.api_key = api_key
|
|
79
|
+
self._ws = None
|
|
80
|
+
self._loop = None
|
|
81
|
+
|
|
82
|
+
def connect(self) -> "TryCUAConnection":
|
|
83
|
+
"""Establish the WebSocket connection synchronously."""
|
|
84
|
+
try:
|
|
85
|
+
import websockets.sync.client as ws_sync
|
|
86
|
+
except ImportError:
|
|
87
|
+
raise ImportError(
|
|
88
|
+
"The 'websockets' package is required for TryCUAConnection. "
|
|
89
|
+
"Install it with: pip install websockets"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
self._ws = ws_sync.connect(self.ws_url)
|
|
93
|
+
|
|
94
|
+
# Authenticate if credentials provided
|
|
95
|
+
if self.container_name and self.api_key:
|
|
96
|
+
self._send_command(
|
|
97
|
+
"authenticate",
|
|
98
|
+
{"container_name": self.container_name, "api_key": self.api_key},
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return self
|
|
102
|
+
|
|
103
|
+
def disconnect(self) -> None:
|
|
104
|
+
"""Close the WebSocket connection."""
|
|
105
|
+
if self._ws:
|
|
106
|
+
self._ws.close()
|
|
107
|
+
self._ws = None
|
|
108
|
+
|
|
109
|
+
def _send_command(self, command: str, params: dict[str, Any] | None = None) -> dict:
|
|
110
|
+
"""Send a command and return the response."""
|
|
111
|
+
if not self._ws:
|
|
112
|
+
raise RuntimeError("Not connected. Call connect() first.")
|
|
113
|
+
|
|
114
|
+
message = {"command": command, "params": params or {}}
|
|
115
|
+
self._ws.send(json.dumps(message))
|
|
116
|
+
response = self._ws.recv()
|
|
117
|
+
return json.loads(response)
|
|
118
|
+
|
|
119
|
+
def send_command(self, command: str, params: dict[str, Any] | None = None) -> dict:
|
|
120
|
+
"""Public method to send a command and return the response."""
|
|
121
|
+
return self._send_command(command, params)
|
|
122
|
+
|
|
123
|
+
def __enter__(self) -> "TryCUAConnection":
|
|
124
|
+
return self.connect()
|
|
125
|
+
|
|
126
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
127
|
+
self.disconnect()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class AsyncTryCUAConnection:
|
|
131
|
+
"""
|
|
132
|
+
Async version of TryCUAConnection for use with asyncio.
|
|
133
|
+
|
|
134
|
+
Usage:
|
|
135
|
+
async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
136
|
+
executor = AsyncTryCUAExecutor(conn)
|
|
137
|
+
result = await executor.execute(Screenshot(kind="screenshot"))
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(
|
|
141
|
+
self,
|
|
142
|
+
ws_url: str = "ws://localhost:8000/ws",
|
|
143
|
+
*,
|
|
144
|
+
container_name: str | None = None,
|
|
145
|
+
api_key: str | None = None,
|
|
146
|
+
):
|
|
147
|
+
self.ws_url = ws_url
|
|
148
|
+
self.container_name = container_name
|
|
149
|
+
self.api_key = api_key
|
|
150
|
+
self._ws = None
|
|
151
|
+
|
|
152
|
+
async def connect(self) -> "AsyncTryCUAConnection":
|
|
153
|
+
"""Establish the WebSocket connection asynchronously."""
|
|
154
|
+
try:
|
|
155
|
+
import websockets
|
|
156
|
+
except ImportError:
|
|
157
|
+
raise ImportError(
|
|
158
|
+
"The 'websockets' package is required for AsyncTryCUAConnection. "
|
|
159
|
+
"Install it with: pip install websockets"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
self._ws = await websockets.connect(self.ws_url)
|
|
163
|
+
|
|
164
|
+
# Authenticate if credentials provided
|
|
165
|
+
if self.container_name and self.api_key:
|
|
166
|
+
await self._send_command(
|
|
167
|
+
"authenticate",
|
|
168
|
+
{"container_name": self.container_name, "api_key": self.api_key},
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
return self
|
|
172
|
+
|
|
173
|
+
async def disconnect(self) -> None:
|
|
174
|
+
"""Close the WebSocket connection."""
|
|
175
|
+
if self._ws:
|
|
176
|
+
await self._ws.close()
|
|
177
|
+
self._ws = None
|
|
178
|
+
|
|
179
|
+
async def _send_command(
|
|
180
|
+
self, command: str, params: dict[str, Any] | None = None
|
|
181
|
+
) -> dict:
|
|
182
|
+
"""Send a command and return the response."""
|
|
183
|
+
if not self._ws:
|
|
184
|
+
raise RuntimeError("Not connected. Call connect() first.")
|
|
185
|
+
|
|
186
|
+
message = {"command": command, "params": params or {}}
|
|
187
|
+
await self._ws.send(json.dumps(message))
|
|
188
|
+
response = await self._ws.recv()
|
|
189
|
+
return json.loads(response)
|
|
190
|
+
|
|
191
|
+
async def send_command(
|
|
192
|
+
self, command: str, params: dict[str, Any] | None = None
|
|
193
|
+
) -> dict:
|
|
194
|
+
"""Public method to send a command and return the response."""
|
|
195
|
+
return await self._send_command(command, params)
|
|
196
|
+
|
|
197
|
+
async def __aenter__(self) -> "AsyncTryCUAConnection":
|
|
198
|
+
return await self.connect()
|
|
199
|
+
|
|
200
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
201
|
+
await self.disconnect()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class TryCUAExecutor(ComputerExecutor):
|
|
205
|
+
"""
|
|
206
|
+
Execute computer use actions on a TryCUA computer-server.
|
|
207
|
+
|
|
208
|
+
This executor maps CUAction types to TryCUA's computer control commands,
|
|
209
|
+
enabling vision-based LLM loops to control a remote desktop.
|
|
210
|
+
|
|
211
|
+
Example:
|
|
212
|
+
with TryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
213
|
+
executor = TryCUAExecutor(conn)
|
|
214
|
+
|
|
215
|
+
# Take a screenshot
|
|
216
|
+
result = executor.execute(Screenshot(kind="screenshot"))
|
|
217
|
+
print(f"Got {len(result['screenshot']['content'])} bytes")
|
|
218
|
+
|
|
219
|
+
# Click at coordinates
|
|
220
|
+
executor.execute(Click(kind="click", x=100, y=200, button="left"))
|
|
221
|
+
|
|
222
|
+
# Type text
|
|
223
|
+
executor.execute(Type(kind="type", text="Hello, world!"))
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
def __init__(self, connection: TryCUAConnection):
|
|
227
|
+
"""
|
|
228
|
+
Initialize the executor with an active TryCUA connection.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
connection: An active TryCUAConnection instance
|
|
232
|
+
"""
|
|
233
|
+
self.conn = connection
|
|
234
|
+
|
|
235
|
+
def execute(self, action: CUAction) -> CUActionResult:
|
|
236
|
+
"""
|
|
237
|
+
Execute a computer use action on the TryCUA desktop.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
action: The action to execute (Click, Type, Screenshot, etc.)
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
CUActionResult with screenshot (if applicable) and metadata
|
|
244
|
+
"""
|
|
245
|
+
kind = action["kind"]
|
|
246
|
+
|
|
247
|
+
if kind == "screenshot":
|
|
248
|
+
return self._screenshot()
|
|
249
|
+
elif kind == "click":
|
|
250
|
+
return self._click(action) # type: ignore
|
|
251
|
+
elif kind == "double_click":
|
|
252
|
+
return self._double_click(action) # type: ignore
|
|
253
|
+
elif kind == "triple_click":
|
|
254
|
+
return self._triple_click(action) # type: ignore
|
|
255
|
+
elif kind == "move":
|
|
256
|
+
return self._move(action) # type: ignore
|
|
257
|
+
elif kind == "scroll":
|
|
258
|
+
return self._scroll(action) # type: ignore
|
|
259
|
+
elif kind == "type":
|
|
260
|
+
return self._type(action) # type: ignore
|
|
261
|
+
elif kind == "keypress":
|
|
262
|
+
return self._keypress(action) # type: ignore
|
|
263
|
+
elif kind == "drag":
|
|
264
|
+
return self._drag(action) # type: ignore
|
|
265
|
+
elif kind == "wait":
|
|
266
|
+
return self._wait(action) # type: ignore
|
|
267
|
+
elif kind == "mouse_down":
|
|
268
|
+
return self._mouse_down(action) # type: ignore
|
|
269
|
+
elif kind == "mouse_up":
|
|
270
|
+
return self._mouse_up(action) # type: ignore
|
|
271
|
+
elif kind == "cursor_position":
|
|
272
|
+
return self._cursor_position()
|
|
273
|
+
elif kind == "hold_key":
|
|
274
|
+
return self._hold_key(action) # type: ignore
|
|
275
|
+
elif kind == "navigate":
|
|
276
|
+
return self._navigate(action) # type: ignore
|
|
277
|
+
elif kind == "go_back":
|
|
278
|
+
return self._go_back(action) # type: ignore
|
|
279
|
+
elif kind == "go_forward":
|
|
280
|
+
return self._go_forward(action) # type: ignore
|
|
281
|
+
elif kind == "search":
|
|
282
|
+
return self._search(action) # type: ignore
|
|
283
|
+
elif kind == "bash":
|
|
284
|
+
return self._bash(action) # type: ignore
|
|
285
|
+
else:
|
|
286
|
+
raise ValueError(f"Unsupported action kind: {kind}")
|
|
287
|
+
|
|
288
|
+
def _screenshot(self) -> CUActionResult:
|
|
289
|
+
"""Capture a screenshot of the desktop."""
|
|
290
|
+
response = self.conn.send_command("screenshot")
|
|
291
|
+
if not response.get("success"):
|
|
292
|
+
raise RuntimeError(f"Screenshot failed: {response.get('error')}")
|
|
293
|
+
|
|
294
|
+
# Decode base64 image data
|
|
295
|
+
image_data = response.get("image_data", "")
|
|
296
|
+
content = base64.b64decode(image_data)
|
|
297
|
+
|
|
298
|
+
return CUActionResult(
|
|
299
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
300
|
+
data={},
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
def _click(self, action: Click) -> CUActionResult:
|
|
304
|
+
"""Execute a click action."""
|
|
305
|
+
x = action.get("x")
|
|
306
|
+
y = action.get("y")
|
|
307
|
+
button = action.get("button", "left")
|
|
308
|
+
|
|
309
|
+
# Map button names
|
|
310
|
+
if button == "middle":
|
|
311
|
+
button = "middle"
|
|
312
|
+
elif button in ("back", "forward"):
|
|
313
|
+
# TryCUA may not support these, fall back to left
|
|
314
|
+
button = "left"
|
|
315
|
+
|
|
316
|
+
if button == "left":
|
|
317
|
+
cmd = "left_click"
|
|
318
|
+
elif button == "right":
|
|
319
|
+
cmd = "right_click"
|
|
320
|
+
else:
|
|
321
|
+
cmd = "left_click"
|
|
322
|
+
|
|
323
|
+
params: dict[str, Any] = {}
|
|
324
|
+
if x is not None:
|
|
325
|
+
params["x"] = x
|
|
326
|
+
if y is not None:
|
|
327
|
+
params["y"] = y
|
|
328
|
+
|
|
329
|
+
response = self.conn.send_command(cmd, params)
|
|
330
|
+
if not response.get("success"):
|
|
331
|
+
raise RuntimeError(f"Click failed: {response.get('error')}")
|
|
332
|
+
|
|
333
|
+
return CUActionResult(screenshot=None, data={"action": "click"})
|
|
334
|
+
|
|
335
|
+
def _double_click(self, action: DoubleClick) -> CUActionResult:
|
|
336
|
+
"""Execute a double click action."""
|
|
337
|
+
params: dict[str, Any] = {}
|
|
338
|
+
if action.get("x") is not None:
|
|
339
|
+
params["x"] = action["x"]
|
|
340
|
+
if action.get("y") is not None:
|
|
341
|
+
params["y"] = action["y"]
|
|
342
|
+
|
|
343
|
+
response = self.conn.send_command("double_click", params)
|
|
344
|
+
if not response.get("success"):
|
|
345
|
+
raise RuntimeError(f"Double click failed: {response.get('error')}")
|
|
346
|
+
|
|
347
|
+
return CUActionResult(screenshot=None, data={"action": "double_click"})
|
|
348
|
+
|
|
349
|
+
def _triple_click(self, action: TripleClick) -> CUActionResult:
|
|
350
|
+
"""Execute a triple click action (3 rapid left clicks)."""
|
|
351
|
+
params: dict[str, Any] = {}
|
|
352
|
+
if action.get("x") is not None:
|
|
353
|
+
params["x"] = action["x"]
|
|
354
|
+
if action.get("y") is not None:
|
|
355
|
+
params["y"] = action["y"]
|
|
356
|
+
|
|
357
|
+
# TryCUA doesn't have native triple click, so do 3 clicks
|
|
358
|
+
for _ in range(3):
|
|
359
|
+
response = self.conn.send_command("left_click", params)
|
|
360
|
+
if not response.get("success"):
|
|
361
|
+
raise RuntimeError(f"Triple click failed: {response.get('error')}")
|
|
362
|
+
|
|
363
|
+
return CUActionResult(screenshot=None, data={"action": "triple_click"})
|
|
364
|
+
|
|
365
|
+
def _move(self, action: Move) -> CUActionResult:
|
|
366
|
+
"""Move the mouse cursor."""
|
|
367
|
+
response = self.conn.send_command(
|
|
368
|
+
"move_cursor", {"x": action["x"], "y": action["y"]}
|
|
369
|
+
)
|
|
370
|
+
if not response.get("success"):
|
|
371
|
+
raise RuntimeError(f"Move failed: {response.get('error')}")
|
|
372
|
+
|
|
373
|
+
return CUActionResult(screenshot=None, data={"action": "move"})
|
|
374
|
+
|
|
375
|
+
def _scroll(self, action: Scroll) -> CUActionResult:
|
|
376
|
+
"""Execute a scroll action."""
|
|
377
|
+
# Our action has dx, dy for scroll amounts (in pixels)
|
|
378
|
+
# Positive dy = scroll down, negative dy = scroll up
|
|
379
|
+
dx = action.get("dx", 0)
|
|
380
|
+
dy = action.get("dy", 0)
|
|
381
|
+
|
|
382
|
+
# First move to position if specified, then click to focus
|
|
383
|
+
x = action.get("x")
|
|
384
|
+
y = action.get("y")
|
|
385
|
+
if x is not None and y is not None:
|
|
386
|
+
self.conn.send_command("move_cursor", {"x": x, "y": y})
|
|
387
|
+
# Click to ensure the element under cursor gets focus for scroll
|
|
388
|
+
self.conn.send_command("left_click", {"x": x, "y": y})
|
|
389
|
+
|
|
390
|
+
# Convert pixel delta to scroll clicks (roughly 120 pixels per click)
|
|
391
|
+
# Use scroll_down/scroll_up for vertical, and scroll for horizontal
|
|
392
|
+
if dy != 0:
|
|
393
|
+
clicks = max(1, abs(dy) // 120)
|
|
394
|
+
if dy > 0:
|
|
395
|
+
# Positive dy means scroll down (content moves up)
|
|
396
|
+
response = self.conn.send_command("scroll_down", {"clicks": clicks})
|
|
397
|
+
else:
|
|
398
|
+
# Negative dy means scroll up (content moves down)
|
|
399
|
+
response = self.conn.send_command("scroll_up", {"clicks": clicks})
|
|
400
|
+
elif dx != 0:
|
|
401
|
+
# For horizontal scroll, use the generic scroll command
|
|
402
|
+
response = self.conn.send_command("scroll", {"x": dx, "y": 0})
|
|
403
|
+
else:
|
|
404
|
+
# No scroll needed
|
|
405
|
+
response = {"success": True}
|
|
406
|
+
|
|
407
|
+
if not response.get("success"):
|
|
408
|
+
raise RuntimeError(f"Scroll failed: {response.get('error')}")
|
|
409
|
+
|
|
410
|
+
return CUActionResult(screenshot=None, data={"action": "scroll"})
|
|
411
|
+
|
|
412
|
+
def _type(self, action: Type) -> CUActionResult:
|
|
413
|
+
"""Type text."""
|
|
414
|
+
response = self.conn.send_command("type_text", {"text": action["text"]})
|
|
415
|
+
if not response.get("success"):
|
|
416
|
+
raise RuntimeError(f"Type failed: {response.get('error')}")
|
|
417
|
+
|
|
418
|
+
return CUActionResult(screenshot=None, data={"action": "type"})
|
|
419
|
+
|
|
420
|
+
def _keypress(self, action: Keypress) -> CUActionResult:
|
|
421
|
+
"""Press key(s)."""
|
|
422
|
+
keys = action["keys"]
|
|
423
|
+
|
|
424
|
+
if len(keys) == 1:
|
|
425
|
+
# Single key press
|
|
426
|
+
response = self.conn.send_command("press_key", {"key": keys[0]})
|
|
427
|
+
else:
|
|
428
|
+
# Key combination (hotkey)
|
|
429
|
+
response = self.conn.send_command("hotkey", {"keys": keys})
|
|
430
|
+
|
|
431
|
+
if not response.get("success"):
|
|
432
|
+
raise RuntimeError(f"Keypress failed: {response.get('error')}")
|
|
433
|
+
|
|
434
|
+
return CUActionResult(screenshot=None, data={"action": "keypress"})
|
|
435
|
+
|
|
436
|
+
def _drag(self, action: Drag) -> CUActionResult:
|
|
437
|
+
"""Execute a drag action."""
|
|
438
|
+
start_x = action.get("start_x")
|
|
439
|
+
start_y = action.get("start_y")
|
|
440
|
+
path = action.get("path", [])
|
|
441
|
+
|
|
442
|
+
# If start position specified, move there first
|
|
443
|
+
if start_x is not None and start_y is not None:
|
|
444
|
+
self.conn.send_command("move_cursor", {"x": start_x, "y": start_y})
|
|
445
|
+
|
|
446
|
+
# Execute drag to each point in path
|
|
447
|
+
for point in path:
|
|
448
|
+
end_x, end_y = point
|
|
449
|
+
response = self.conn.send_command("drag_to", {"x": end_x, "y": end_y})
|
|
450
|
+
if not response.get("success"):
|
|
451
|
+
raise RuntimeError(f"Drag failed: {response.get('error')}")
|
|
452
|
+
|
|
453
|
+
return CUActionResult(screenshot=None, data={"action": "drag"})
|
|
454
|
+
|
|
455
|
+
def _wait(self, action: Wait) -> CUActionResult:
|
|
456
|
+
"""Wait for a specified duration."""
|
|
457
|
+
time.sleep(action["ms"] / 1000.0)
|
|
458
|
+
return CUActionResult(
|
|
459
|
+
screenshot=None, data={"action": "wait", "ms": action["ms"]}
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
def _mouse_down(self, action: MouseDown) -> CUActionResult:
|
|
463
|
+
"""Press and hold a mouse button."""
|
|
464
|
+
# Get current cursor position for the command
|
|
465
|
+
pos_response = self.conn.send_command("get_cursor_position")
|
|
466
|
+
if pos_response.get("success"):
|
|
467
|
+
pos = pos_response.get("position", {})
|
|
468
|
+
x, y = pos.get("x", 0), pos.get("y", 0)
|
|
469
|
+
else:
|
|
470
|
+
x, y = 0, 0
|
|
471
|
+
|
|
472
|
+
response = self.conn.send_command(
|
|
473
|
+
"mouse_down", {"x": x, "y": y, "button": action.get("button", "left")}
|
|
474
|
+
)
|
|
475
|
+
if not response.get("success"):
|
|
476
|
+
raise RuntimeError(f"Mouse down failed: {response.get('error')}")
|
|
477
|
+
|
|
478
|
+
return CUActionResult(screenshot=None, data={"action": "mouse_down"})
|
|
479
|
+
|
|
480
|
+
def _mouse_up(self, action: MouseUp) -> CUActionResult:
|
|
481
|
+
"""Release a mouse button."""
|
|
482
|
+
# Get current cursor position for the command
|
|
483
|
+
pos_response = self.conn.send_command("get_cursor_position")
|
|
484
|
+
if pos_response.get("success"):
|
|
485
|
+
pos = pos_response.get("position", {})
|
|
486
|
+
x, y = pos.get("x", 0), pos.get("y", 0)
|
|
487
|
+
else:
|
|
488
|
+
x, y = 0, 0
|
|
489
|
+
|
|
490
|
+
response = self.conn.send_command(
|
|
491
|
+
"mouse_up", {"x": x, "y": y, "button": action.get("button", "left")}
|
|
492
|
+
)
|
|
493
|
+
if not response.get("success"):
|
|
494
|
+
raise RuntimeError(f"Mouse up failed: {response.get('error')}")
|
|
495
|
+
|
|
496
|
+
return CUActionResult(screenshot=None, data={"action": "mouse_up"})
|
|
497
|
+
|
|
498
|
+
def _cursor_position(self) -> CUActionResult:
|
|
499
|
+
"""Get current cursor position."""
|
|
500
|
+
response = self.conn.send_command("get_cursor_position")
|
|
501
|
+
if not response.get("success"):
|
|
502
|
+
raise RuntimeError(f"Get cursor position failed: {response.get('error')}")
|
|
503
|
+
|
|
504
|
+
pos = response.get("position", {})
|
|
505
|
+
return CUActionResult(
|
|
506
|
+
screenshot=None,
|
|
507
|
+
data={"action": "cursor_position", "x": pos.get("x"), "y": pos.get("y")},
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
def _hold_key(self, action: HoldKey) -> CUActionResult:
|
|
511
|
+
"""Hold a key for a duration."""
|
|
512
|
+
key = action["key"]
|
|
513
|
+
ms = action["ms"]
|
|
514
|
+
|
|
515
|
+
# Press key down
|
|
516
|
+
self.conn.send_command("key_down", {"key": key})
|
|
517
|
+
# Wait
|
|
518
|
+
time.sleep(ms / 1000.0)
|
|
519
|
+
# Release key
|
|
520
|
+
self.conn.send_command("key_up", {"key": key})
|
|
521
|
+
|
|
522
|
+
return CUActionResult(
|
|
523
|
+
screenshot=None, data={"action": "hold_key", "key": key, "ms": ms}
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def _navigate(self, action: Navigate) -> CUActionResult:
|
|
527
|
+
"""Navigate to a URL (assumes browser is open)."""
|
|
528
|
+
url = action["url"]
|
|
529
|
+
|
|
530
|
+
# Use keyboard shortcuts to navigate: Ctrl+L, type URL, Enter
|
|
531
|
+
self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
|
|
532
|
+
time.sleep(0.2)
|
|
533
|
+
self.conn.send_command("type_text", {"text": url})
|
|
534
|
+
time.sleep(0.1)
|
|
535
|
+
self.conn.send_command("press_key", {"key": "Return"})
|
|
536
|
+
time.sleep(1.5)
|
|
537
|
+
|
|
538
|
+
# Take screenshot after navigation
|
|
539
|
+
return self._screenshot_with_data({"action": "navigate", "url": url})
|
|
540
|
+
|
|
541
|
+
def _go_back(self, action: GoBack) -> CUActionResult:
|
|
542
|
+
"""Go back in browser history."""
|
|
543
|
+
self.conn.send_command("hotkey", {"keys": ["alt", "Left"]})
|
|
544
|
+
time.sleep(0.5)
|
|
545
|
+
return self._screenshot_with_data({"action": "go_back"})
|
|
546
|
+
|
|
547
|
+
def _go_forward(self, action: GoForward) -> CUActionResult:
|
|
548
|
+
"""Go forward in browser history."""
|
|
549
|
+
self.conn.send_command("hotkey", {"keys": ["alt", "Right"]})
|
|
550
|
+
time.sleep(0.5)
|
|
551
|
+
return self._screenshot_with_data({"action": "go_forward"})
|
|
552
|
+
|
|
553
|
+
def _search(self, action: Search) -> CUActionResult:
|
|
554
|
+
"""Perform a web search."""
|
|
555
|
+
from urllib.parse import quote
|
|
556
|
+
|
|
557
|
+
query = action["query"]
|
|
558
|
+
search_url = f"https://www.google.com/search?q={quote(query)}"
|
|
559
|
+
|
|
560
|
+
# Navigate to search URL
|
|
561
|
+
self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
|
|
562
|
+
time.sleep(0.2)
|
|
563
|
+
self.conn.send_command("type_text", {"text": search_url})
|
|
564
|
+
time.sleep(0.1)
|
|
565
|
+
self.conn.send_command("press_key", {"key": "Return"})
|
|
566
|
+
time.sleep(1.5)
|
|
567
|
+
|
|
568
|
+
return self._screenshot_with_data({"action": "search", "query": query})
|
|
569
|
+
|
|
570
|
+
def _bash(self, action: Bash) -> CUActionResult:
|
|
571
|
+
"""Execute a bash command."""
|
|
572
|
+
command = action.get("command")
|
|
573
|
+
restart = action.get("restart", False)
|
|
574
|
+
|
|
575
|
+
if restart:
|
|
576
|
+
# Not directly supported, but we can try to open a new terminal
|
|
577
|
+
return CUActionResult(
|
|
578
|
+
screenshot=None,
|
|
579
|
+
data={"action": "bash", "error": "restart not supported"},
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
if command:
|
|
583
|
+
response = self.conn.send_command("run_command", {"command": command})
|
|
584
|
+
if not response.get("success"):
|
|
585
|
+
raise RuntimeError(f"Bash command failed: {response.get('error')}")
|
|
586
|
+
|
|
587
|
+
return CUActionResult(
|
|
588
|
+
screenshot=None,
|
|
589
|
+
data={
|
|
590
|
+
"action": "bash",
|
|
591
|
+
"stdout": response.get("stdout", ""),
|
|
592
|
+
"stderr": response.get("stderr", ""),
|
|
593
|
+
"return_code": response.get("return_code", 0),
|
|
594
|
+
},
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
return CUActionResult(screenshot=None, data={"action": "bash"})
|
|
598
|
+
|
|
599
|
+
def _screenshot_with_data(self, data: dict) -> CUActionResult:
|
|
600
|
+
"""Take a screenshot and include additional data."""
|
|
601
|
+
response = self.conn.send_command("screenshot")
|
|
602
|
+
if not response.get("success"):
|
|
603
|
+
raise RuntimeError(f"Screenshot failed: {response.get('error')}")
|
|
604
|
+
|
|
605
|
+
image_data = response.get("image_data", "")
|
|
606
|
+
content = base64.b64decode(image_data)
|
|
607
|
+
|
|
608
|
+
return CUActionResult(
|
|
609
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
610
|
+
data=data,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
class AsyncTryCUAExecutor:
|
|
615
|
+
"""
|
|
616
|
+
Async version of TryCUAExecutor for use with asyncio.
|
|
617
|
+
|
|
618
|
+
Example:
|
|
619
|
+
async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
|
|
620
|
+
executor = AsyncTryCUAExecutor(conn)
|
|
621
|
+
result = await executor.execute(Screenshot(kind="screenshot"))
|
|
622
|
+
"""
|
|
623
|
+
|
|
624
|
+
def __init__(self, connection: AsyncTryCUAConnection):
|
|
625
|
+
"""
|
|
626
|
+
Initialize the executor with an active async TryCUA connection.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
connection: An active AsyncTryCUAConnection instance
|
|
630
|
+
"""
|
|
631
|
+
self.conn = connection
|
|
632
|
+
|
|
633
|
+
async def execute(self, action: CUAction) -> CUActionResult:
|
|
634
|
+
"""
|
|
635
|
+
Execute a computer use action on the TryCUA desktop asynchronously.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
action: The action to execute (Click, Type, Screenshot, etc.)
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
CUActionResult with screenshot (if applicable) and metadata
|
|
642
|
+
"""
|
|
643
|
+
kind = action["kind"]
|
|
644
|
+
|
|
645
|
+
if kind == "screenshot":
|
|
646
|
+
return await self._screenshot()
|
|
647
|
+
elif kind == "click":
|
|
648
|
+
return await self._click(action) # type: ignore
|
|
649
|
+
elif kind == "double_click":
|
|
650
|
+
return await self._double_click(action) # type: ignore
|
|
651
|
+
elif kind == "triple_click":
|
|
652
|
+
return await self._triple_click(action) # type: ignore
|
|
653
|
+
elif kind == "move":
|
|
654
|
+
return await self._move(action) # type: ignore
|
|
655
|
+
elif kind == "scroll":
|
|
656
|
+
return await self._scroll(action) # type: ignore
|
|
657
|
+
elif kind == "type":
|
|
658
|
+
return await self._type(action) # type: ignore
|
|
659
|
+
elif kind == "keypress":
|
|
660
|
+
return await self._keypress(action) # type: ignore
|
|
661
|
+
elif kind == "drag":
|
|
662
|
+
return await self._drag(action) # type: ignore
|
|
663
|
+
elif kind == "wait":
|
|
664
|
+
return await self._wait(action) # type: ignore
|
|
665
|
+
elif kind == "mouse_down":
|
|
666
|
+
return await self._mouse_down(action) # type: ignore
|
|
667
|
+
elif kind == "mouse_up":
|
|
668
|
+
return await self._mouse_up(action) # type: ignore
|
|
669
|
+
elif kind == "cursor_position":
|
|
670
|
+
return await self._cursor_position()
|
|
671
|
+
elif kind == "hold_key":
|
|
672
|
+
return await self._hold_key(action) # type: ignore
|
|
673
|
+
elif kind == "navigate":
|
|
674
|
+
return await self._navigate(action) # type: ignore
|
|
675
|
+
elif kind == "go_back":
|
|
676
|
+
return await self._go_back(action) # type: ignore
|
|
677
|
+
elif kind == "go_forward":
|
|
678
|
+
return await self._go_forward(action) # type: ignore
|
|
679
|
+
elif kind == "search":
|
|
680
|
+
return await self._search(action) # type: ignore
|
|
681
|
+
elif kind == "bash":
|
|
682
|
+
return await self._bash(action) # type: ignore
|
|
683
|
+
else:
|
|
684
|
+
raise ValueError(f"Unsupported action kind: {kind}")
|
|
685
|
+
|
|
686
|
+
async def _screenshot(self) -> CUActionResult:
|
|
687
|
+
"""Capture a screenshot of the desktop."""
|
|
688
|
+
response = await self.conn.send_command("screenshot")
|
|
689
|
+
if not response.get("success"):
|
|
690
|
+
raise RuntimeError(f"Screenshot failed: {response.get('error')}")
|
|
691
|
+
|
|
692
|
+
image_data = response.get("image_data", "")
|
|
693
|
+
content = base64.b64decode(image_data)
|
|
694
|
+
|
|
695
|
+
return CUActionResult(
|
|
696
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
697
|
+
data={},
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
async def _click(self, action: Click) -> CUActionResult:
|
|
701
|
+
"""Execute a click action."""
|
|
702
|
+
x = action.get("x")
|
|
703
|
+
y = action.get("y")
|
|
704
|
+
button = action.get("button", "left")
|
|
705
|
+
|
|
706
|
+
if button == "left":
|
|
707
|
+
cmd = "left_click"
|
|
708
|
+
elif button == "right":
|
|
709
|
+
cmd = "right_click"
|
|
710
|
+
else:
|
|
711
|
+
cmd = "left_click"
|
|
712
|
+
|
|
713
|
+
params: dict[str, Any] = {}
|
|
714
|
+
if x is not None:
|
|
715
|
+
params["x"] = x
|
|
716
|
+
if y is not None:
|
|
717
|
+
params["y"] = y
|
|
718
|
+
|
|
719
|
+
response = await self.conn.send_command(cmd, params)
|
|
720
|
+
if not response.get("success"):
|
|
721
|
+
raise RuntimeError(f"Click failed: {response.get('error')}")
|
|
722
|
+
|
|
723
|
+
return CUActionResult(screenshot=None, data={"action": "click"})
|
|
724
|
+
|
|
725
|
+
async def _double_click(self, action: DoubleClick) -> CUActionResult:
|
|
726
|
+
"""Execute a double click action."""
|
|
727
|
+
params: dict[str, Any] = {}
|
|
728
|
+
if action.get("x") is not None:
|
|
729
|
+
params["x"] = action["x"]
|
|
730
|
+
if action.get("y") is not None:
|
|
731
|
+
params["y"] = action["y"]
|
|
732
|
+
|
|
733
|
+
response = await self.conn.send_command("double_click", params)
|
|
734
|
+
if not response.get("success"):
|
|
735
|
+
raise RuntimeError(f"Double click failed: {response.get('error')}")
|
|
736
|
+
|
|
737
|
+
return CUActionResult(screenshot=None, data={"action": "double_click"})
|
|
738
|
+
|
|
739
|
+
async def _triple_click(self, action: TripleClick) -> CUActionResult:
|
|
740
|
+
"""Execute a triple click action (3 rapid left clicks)."""
|
|
741
|
+
params: dict[str, Any] = {}
|
|
742
|
+
if action.get("x") is not None:
|
|
743
|
+
params["x"] = action["x"]
|
|
744
|
+
if action.get("y") is not None:
|
|
745
|
+
params["y"] = action["y"]
|
|
746
|
+
|
|
747
|
+
for _ in range(3):
|
|
748
|
+
response = await self.conn.send_command("left_click", params)
|
|
749
|
+
if not response.get("success"):
|
|
750
|
+
raise RuntimeError(f"Triple click failed: {response.get('error')}")
|
|
751
|
+
|
|
752
|
+
return CUActionResult(screenshot=None, data={"action": "triple_click"})
|
|
753
|
+
|
|
754
|
+
async def _move(self, action: Move) -> CUActionResult:
|
|
755
|
+
"""Move the mouse cursor."""
|
|
756
|
+
response = await self.conn.send_command(
|
|
757
|
+
"move_cursor", {"x": action["x"], "y": action["y"]}
|
|
758
|
+
)
|
|
759
|
+
if not response.get("success"):
|
|
760
|
+
raise RuntimeError(f"Move failed: {response.get('error')}")
|
|
761
|
+
|
|
762
|
+
return CUActionResult(screenshot=None, data={"action": "move"})
|
|
763
|
+
|
|
764
|
+
async def _scroll(self, action: Scroll) -> CUActionResult:
|
|
765
|
+
"""Execute a scroll action."""
|
|
766
|
+
# Our action has dx, dy for scroll amounts (in pixels)
|
|
767
|
+
# Positive dy = scroll down, negative dy = scroll up
|
|
768
|
+
dx = action.get("dx", 0)
|
|
769
|
+
dy = action.get("dy", 0)
|
|
770
|
+
|
|
771
|
+
# First move to position if specified, then click to focus
|
|
772
|
+
x = action.get("x")
|
|
773
|
+
y = action.get("y")
|
|
774
|
+
if x is not None and y is not None:
|
|
775
|
+
await self.conn.send_command("move_cursor", {"x": x, "y": y})
|
|
776
|
+
# Click to ensure the element under cursor gets focus for scroll
|
|
777
|
+
await self.conn.send_command("left_click", {"x": x, "y": y})
|
|
778
|
+
|
|
779
|
+
# Convert pixel delta to scroll clicks (roughly 120 pixels per click)
|
|
780
|
+
# Use scroll_down/scroll_up for vertical, and scroll for horizontal
|
|
781
|
+
if dy != 0:
|
|
782
|
+
clicks = max(1, abs(dy) // 120)
|
|
783
|
+
if dy > 0:
|
|
784
|
+
# Positive dy means scroll down (content moves up)
|
|
785
|
+
response = await self.conn.send_command(
|
|
786
|
+
"scroll_down", {"clicks": clicks}
|
|
787
|
+
)
|
|
788
|
+
else:
|
|
789
|
+
# Negative dy means scroll up (content moves down)
|
|
790
|
+
response = await self.conn.send_command("scroll_up", {"clicks": clicks})
|
|
791
|
+
elif dx != 0:
|
|
792
|
+
# For horizontal scroll, use the generic scroll command
|
|
793
|
+
response = await self.conn.send_command("scroll", {"x": dx, "y": 0})
|
|
794
|
+
else:
|
|
795
|
+
# No scroll needed
|
|
796
|
+
response = {"success": True}
|
|
797
|
+
|
|
798
|
+
if not response.get("success"):
|
|
799
|
+
raise RuntimeError(f"Scroll failed: {response.get('error')}")
|
|
800
|
+
|
|
801
|
+
return CUActionResult(screenshot=None, data={"action": "scroll"})
|
|
802
|
+
|
|
803
|
+
async def _type(self, action: Type) -> CUActionResult:
|
|
804
|
+
"""Type text."""
|
|
805
|
+
response = await self.conn.send_command("type_text", {"text": action["text"]})
|
|
806
|
+
if not response.get("success"):
|
|
807
|
+
raise RuntimeError(f"Type failed: {response.get('error')}")
|
|
808
|
+
|
|
809
|
+
return CUActionResult(screenshot=None, data={"action": "type"})
|
|
810
|
+
|
|
811
|
+
async def _keypress(self, action: Keypress) -> CUActionResult:
|
|
812
|
+
"""Press key(s)."""
|
|
813
|
+
keys = action["keys"]
|
|
814
|
+
|
|
815
|
+
if len(keys) == 1:
|
|
816
|
+
response = await self.conn.send_command("press_key", {"key": keys[0]})
|
|
817
|
+
else:
|
|
818
|
+
response = await self.conn.send_command("hotkey", {"keys": keys})
|
|
819
|
+
|
|
820
|
+
if not response.get("success"):
|
|
821
|
+
raise RuntimeError(f"Keypress failed: {response.get('error')}")
|
|
822
|
+
|
|
823
|
+
return CUActionResult(screenshot=None, data={"action": "keypress"})
|
|
824
|
+
|
|
825
|
+
async def _drag(self, action: Drag) -> CUActionResult:
|
|
826
|
+
"""Execute a drag action."""
|
|
827
|
+
start_x = action.get("start_x")
|
|
828
|
+
start_y = action.get("start_y")
|
|
829
|
+
path = action.get("path", [])
|
|
830
|
+
|
|
831
|
+
if start_x is not None and start_y is not None:
|
|
832
|
+
await self.conn.send_command("move_cursor", {"x": start_x, "y": start_y})
|
|
833
|
+
|
|
834
|
+
for point in path:
|
|
835
|
+
end_x, end_y = point
|
|
836
|
+
response = await self.conn.send_command("drag_to", {"x": end_x, "y": end_y})
|
|
837
|
+
if not response.get("success"):
|
|
838
|
+
raise RuntimeError(f"Drag failed: {response.get('error')}")
|
|
839
|
+
|
|
840
|
+
return CUActionResult(screenshot=None, data={"action": "drag"})
|
|
841
|
+
|
|
842
|
+
async def _wait(self, action: Wait) -> CUActionResult:
|
|
843
|
+
"""Wait for a specified duration."""
|
|
844
|
+
await asyncio.sleep(action["ms"] / 1000.0)
|
|
845
|
+
return CUActionResult(
|
|
846
|
+
screenshot=None, data={"action": "wait", "ms": action["ms"]}
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
async def _mouse_down(self, action: MouseDown) -> CUActionResult:
|
|
850
|
+
"""Press and hold a mouse button."""
|
|
851
|
+
pos_response = await self.conn.send_command("get_cursor_position")
|
|
852
|
+
if pos_response.get("success"):
|
|
853
|
+
pos = pos_response.get("position", {})
|
|
854
|
+
x, y = pos.get("x", 0), pos.get("y", 0)
|
|
855
|
+
else:
|
|
856
|
+
x, y = 0, 0
|
|
857
|
+
|
|
858
|
+
response = await self.conn.send_command(
|
|
859
|
+
"mouse_down", {"x": x, "y": y, "button": action.get("button", "left")}
|
|
860
|
+
)
|
|
861
|
+
if not response.get("success"):
|
|
862
|
+
raise RuntimeError(f"Mouse down failed: {response.get('error')}")
|
|
863
|
+
|
|
864
|
+
return CUActionResult(screenshot=None, data={"action": "mouse_down"})
|
|
865
|
+
|
|
866
|
+
async def _mouse_up(self, action: MouseUp) -> CUActionResult:
|
|
867
|
+
"""Release a mouse button."""
|
|
868
|
+
pos_response = await self.conn.send_command("get_cursor_position")
|
|
869
|
+
if pos_response.get("success"):
|
|
870
|
+
pos = pos_response.get("position", {})
|
|
871
|
+
x, y = pos.get("x", 0), pos.get("y", 0)
|
|
872
|
+
else:
|
|
873
|
+
x, y = 0, 0
|
|
874
|
+
|
|
875
|
+
response = await self.conn.send_command(
|
|
876
|
+
"mouse_up", {"x": x, "y": y, "button": action.get("button", "left")}
|
|
877
|
+
)
|
|
878
|
+
if not response.get("success"):
|
|
879
|
+
raise RuntimeError(f"Mouse up failed: {response.get('error')}")
|
|
880
|
+
|
|
881
|
+
return CUActionResult(screenshot=None, data={"action": "mouse_up"})
|
|
882
|
+
|
|
883
|
+
async def _cursor_position(self) -> CUActionResult:
|
|
884
|
+
"""Get current cursor position."""
|
|
885
|
+
response = await self.conn.send_command("get_cursor_position")
|
|
886
|
+
if not response.get("success"):
|
|
887
|
+
raise RuntimeError(f"Get cursor position failed: {response.get('error')}")
|
|
888
|
+
|
|
889
|
+
pos = response.get("position", {})
|
|
890
|
+
return CUActionResult(
|
|
891
|
+
screenshot=None,
|
|
892
|
+
data={"action": "cursor_position", "x": pos.get("x"), "y": pos.get("y")},
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
async def _hold_key(self, action: HoldKey) -> CUActionResult:
|
|
896
|
+
"""Hold a key for a duration."""
|
|
897
|
+
key = action["key"]
|
|
898
|
+
ms = action["ms"]
|
|
899
|
+
|
|
900
|
+
await self.conn.send_command("key_down", {"key": key})
|
|
901
|
+
await asyncio.sleep(ms / 1000.0)
|
|
902
|
+
await self.conn.send_command("key_up", {"key": key})
|
|
903
|
+
|
|
904
|
+
return CUActionResult(
|
|
905
|
+
screenshot=None, data={"action": "hold_key", "key": key, "ms": ms}
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
async def _navigate(self, action: Navigate) -> CUActionResult:
|
|
909
|
+
"""Navigate to a URL."""
|
|
910
|
+
url = action["url"]
|
|
911
|
+
|
|
912
|
+
await self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
|
|
913
|
+
await asyncio.sleep(0.2)
|
|
914
|
+
await self.conn.send_command("type_text", {"text": url})
|
|
915
|
+
await asyncio.sleep(0.1)
|
|
916
|
+
await self.conn.send_command("press_key", {"key": "Return"})
|
|
917
|
+
await asyncio.sleep(1.5)
|
|
918
|
+
|
|
919
|
+
return await self._screenshot_with_data({"action": "navigate", "url": url})
|
|
920
|
+
|
|
921
|
+
async def _go_back(self, action: GoBack) -> CUActionResult:
|
|
922
|
+
"""Go back in browser history."""
|
|
923
|
+
await self.conn.send_command("hotkey", {"keys": ["alt", "Left"]})
|
|
924
|
+
await asyncio.sleep(0.5)
|
|
925
|
+
return await self._screenshot_with_data({"action": "go_back"})
|
|
926
|
+
|
|
927
|
+
async def _go_forward(self, action: GoForward) -> CUActionResult:
|
|
928
|
+
"""Go forward in browser history."""
|
|
929
|
+
await self.conn.send_command("hotkey", {"keys": ["alt", "Right"]})
|
|
930
|
+
await asyncio.sleep(0.5)
|
|
931
|
+
return await self._screenshot_with_data({"action": "go_forward"})
|
|
932
|
+
|
|
933
|
+
async def _search(self, action: Search) -> CUActionResult:
|
|
934
|
+
"""Perform a web search."""
|
|
935
|
+
from urllib.parse import quote
|
|
936
|
+
|
|
937
|
+
query = action["query"]
|
|
938
|
+
search_url = f"https://www.google.com/search?q={quote(query)}"
|
|
939
|
+
|
|
940
|
+
await self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
|
|
941
|
+
await asyncio.sleep(0.2)
|
|
942
|
+
await self.conn.send_command("type_text", {"text": search_url})
|
|
943
|
+
await asyncio.sleep(0.1)
|
|
944
|
+
await self.conn.send_command("press_key", {"key": "Return"})
|
|
945
|
+
await asyncio.sleep(1.5)
|
|
946
|
+
|
|
947
|
+
return await self._screenshot_with_data({"action": "search", "query": query})
|
|
948
|
+
|
|
949
|
+
async def _bash(self, action: Bash) -> CUActionResult:
|
|
950
|
+
"""Execute a bash command."""
|
|
951
|
+
command = action.get("command")
|
|
952
|
+
restart = action.get("restart", False)
|
|
953
|
+
|
|
954
|
+
if restart:
|
|
955
|
+
return CUActionResult(
|
|
956
|
+
screenshot=None,
|
|
957
|
+
data={"action": "bash", "error": "restart not supported"},
|
|
958
|
+
)
|
|
959
|
+
|
|
960
|
+
if command:
|
|
961
|
+
response = await self.conn.send_command("run_command", {"command": command})
|
|
962
|
+
if not response.get("success"):
|
|
963
|
+
raise RuntimeError(f"Bash command failed: {response.get('error')}")
|
|
964
|
+
|
|
965
|
+
return CUActionResult(
|
|
966
|
+
screenshot=None,
|
|
967
|
+
data={
|
|
968
|
+
"action": "bash",
|
|
969
|
+
"stdout": response.get("stdout", ""),
|
|
970
|
+
"stderr": response.get("stderr", ""),
|
|
971
|
+
"return_code": response.get("return_code", 0),
|
|
972
|
+
},
|
|
973
|
+
)
|
|
974
|
+
|
|
975
|
+
return CUActionResult(screenshot=None, data={"action": "bash"})
|
|
976
|
+
|
|
977
|
+
async def _screenshot_with_data(self, data: dict) -> CUActionResult:
|
|
978
|
+
"""Take a screenshot and include additional data."""
|
|
979
|
+
response = await self.conn.send_command("screenshot")
|
|
980
|
+
if not response.get("success"):
|
|
981
|
+
raise RuntimeError(f"Screenshot failed: {response.get('error')}")
|
|
982
|
+
|
|
983
|
+
image_data = response.get("image_data", "")
|
|
984
|
+
content = base64.b64decode(image_data)
|
|
985
|
+
|
|
986
|
+
return CUActionResult(
|
|
987
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
988
|
+
data=data,
|
|
989
|
+
)
|