hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/tools/executors/xdo.py
CHANGED
|
@@ -1,511 +1,511 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import base64
|
|
5
|
-
import logging
|
|
6
|
-
import os
|
|
7
|
-
import shlex
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from tempfile import gettempdir
|
|
10
|
-
from typing import Literal
|
|
11
|
-
from uuid import uuid4
|
|
12
|
-
|
|
13
|
-
from hud.tools.types import ContentResult
|
|
14
|
-
from hud.tools.utils import run
|
|
15
|
-
|
|
16
|
-
from .base import BaseExecutor
|
|
17
|
-
|
|
18
|
-
OUTPUT_DIR = os.environ.get("SCREENSHOT_DIR")
|
|
19
|
-
logger = logging.getLogger(__name__)
|
|
20
|
-
|
|
21
|
-
# Map CLA standard keys to X11/XDO key names
|
|
22
|
-
CLA_TO_XDO = {
|
|
23
|
-
"enter": "Return",
|
|
24
|
-
"tab": "Tab",
|
|
25
|
-
"space": "space",
|
|
26
|
-
"backspace": "BackSpace",
|
|
27
|
-
"delete": "Delete",
|
|
28
|
-
"escape": "Escape",
|
|
29
|
-
"esc": "Escape",
|
|
30
|
-
"up": "Up",
|
|
31
|
-
"down": "Down",
|
|
32
|
-
"left": "Left",
|
|
33
|
-
"right": "Right",
|
|
34
|
-
"shift": "Shift_L",
|
|
35
|
-
"shiftleft": "Shift_L",
|
|
36
|
-
"shiftright": "Shift_R",
|
|
37
|
-
"ctrl": "Control_L",
|
|
38
|
-
"ctrlleft": "Control_L",
|
|
39
|
-
"ctrlright": "Control_R",
|
|
40
|
-
"alt": "Alt_L",
|
|
41
|
-
"altleft": "Alt_L",
|
|
42
|
-
"altright": "Alt_R",
|
|
43
|
-
"win": "Super_L",
|
|
44
|
-
"winleft": "Super_L",
|
|
45
|
-
"winright": "Super_R",
|
|
46
|
-
"cmd": "Control_L", # Map cmd to ctrl for Linux
|
|
47
|
-
"command": "Control_L",
|
|
48
|
-
"super": "Super_L",
|
|
49
|
-
"pageup": "Page_Up",
|
|
50
|
-
"pagedown": "Page_Down",
|
|
51
|
-
"home": "Home",
|
|
52
|
-
"end": "End",
|
|
53
|
-
"insert": "Insert",
|
|
54
|
-
"pause": "Pause",
|
|
55
|
-
"capslock": "Caps_Lock",
|
|
56
|
-
"numlock": "Num_Lock",
|
|
57
|
-
"scrolllock": "Scroll_Lock",
|
|
58
|
-
"printscreen": "Print",
|
|
59
|
-
"prtsc": "Print",
|
|
60
|
-
# Function keys
|
|
61
|
-
**{f"f{i}": f"F{i}" for i in range(1, 25)},
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class XDOExecutor(BaseExecutor):
|
|
66
|
-
"""
|
|
67
|
-
Low-level executor for xdotool commands.
|
|
68
|
-
Handles display management and screenshot capture on Linux/X11 systems.
|
|
69
|
-
|
|
70
|
-
This executor should only be instantiated when X11 display is available.
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
def __init__(self, display_num: int | None = None) -> None:
|
|
74
|
-
"""Initialize with optional display number."""
|
|
75
|
-
super().__init__(display_num)
|
|
76
|
-
|
|
77
|
-
if display_num is not None:
|
|
78
|
-
self._display_prefix = f"DISPLAY=:{display_num} "
|
|
79
|
-
else:
|
|
80
|
-
self._display_prefix = ""
|
|
81
|
-
|
|
82
|
-
self.xdotool = f"{self._display_prefix}xdotool"
|
|
83
|
-
logger.info("XDOExecutor initialized")
|
|
84
|
-
|
|
85
|
-
def _map_key(self, key: str) -> str:
|
|
86
|
-
"""Map CLA standard key to XDO key."""
|
|
87
|
-
return CLA_TO_XDO.get(key.lower(), key)
|
|
88
|
-
|
|
89
|
-
def _map_keys(self, keys: list[str]) -> list[str]:
|
|
90
|
-
"""Map CLA standard keys to XDO keys."""
|
|
91
|
-
mapped_keys = []
|
|
92
|
-
for key in keys:
|
|
93
|
-
# Handle key combinations like "ctrl+a"
|
|
94
|
-
if "+" in key:
|
|
95
|
-
parts = key.split("+")
|
|
96
|
-
mapped_parts = [self._map_key(part) for part in parts]
|
|
97
|
-
mapped_keys.append("+".join(mapped_parts))
|
|
98
|
-
else:
|
|
99
|
-
mapped_keys.append(self._map_key(key))
|
|
100
|
-
return mapped_keys
|
|
101
|
-
|
|
102
|
-
@classmethod
|
|
103
|
-
def is_available(cls) -> bool:
|
|
104
|
-
"""
|
|
105
|
-
Check if xdotool and X11 display are available.
|
|
106
|
-
|
|
107
|
-
Returns:
|
|
108
|
-
True if xdotool can be used, False otherwise
|
|
109
|
-
"""
|
|
110
|
-
display = os.environ.get("DISPLAY")
|
|
111
|
-
if not display:
|
|
112
|
-
return False
|
|
113
|
-
|
|
114
|
-
# Try a simple xdotool command to test availability
|
|
115
|
-
try:
|
|
116
|
-
import subprocess
|
|
117
|
-
|
|
118
|
-
# Try without display prefix if DISPLAY is already set
|
|
119
|
-
result = subprocess.run(
|
|
120
|
-
["xdotool", "getdisplaygeometry"], # noqa: S607
|
|
121
|
-
capture_output=True,
|
|
122
|
-
timeout=2,
|
|
123
|
-
)
|
|
124
|
-
return result.returncode == 0
|
|
125
|
-
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
|
|
126
|
-
return False
|
|
127
|
-
|
|
128
|
-
async def execute(self, command: str, take_screenshot: bool = True) -> ContentResult:
|
|
129
|
-
"""
|
|
130
|
-
Execute an xdotool command.
|
|
131
|
-
|
|
132
|
-
Args:
|
|
133
|
-
command: The xdotool command (without xdotool prefix)
|
|
134
|
-
take_screenshot: Whether to capture a screenshot after execution
|
|
135
|
-
|
|
136
|
-
Returns:
|
|
137
|
-
ContentResult with output, error, and optional screenshot
|
|
138
|
-
"""
|
|
139
|
-
full_command = f"{self.xdotool} {command}"
|
|
140
|
-
|
|
141
|
-
# Execute command
|
|
142
|
-
returncode, stdout, stderr = await run(full_command)
|
|
143
|
-
|
|
144
|
-
# Prepare result
|
|
145
|
-
result = ContentResult(
|
|
146
|
-
output=stdout if stdout else None, error=stderr if stderr or returncode != 0 else None
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
# Take screenshot if requested
|
|
150
|
-
if take_screenshot:
|
|
151
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
152
|
-
screenshot = await self.screenshot()
|
|
153
|
-
if screenshot:
|
|
154
|
-
result = ContentResult(
|
|
155
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
return result
|
|
159
|
-
|
|
160
|
-
async def screenshot(self) -> str | None:
|
|
161
|
-
"""
|
|
162
|
-
Take a screenshot and return base64 encoded image.
|
|
163
|
-
|
|
164
|
-
Returns:
|
|
165
|
-
Base64 encoded PNG image or None if failed
|
|
166
|
-
"""
|
|
167
|
-
# Real screenshot using scrot
|
|
168
|
-
if OUTPUT_DIR:
|
|
169
|
-
output_dir = Path(OUTPUT_DIR)
|
|
170
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
|
171
|
-
screenshot_path = output_dir / f"screenshot_{uuid4().hex}.png"
|
|
172
|
-
else:
|
|
173
|
-
# Generate a unique path in system temp dir without opening a file
|
|
174
|
-
screenshot_path = Path(gettempdir()) / f"screenshot_{uuid4().hex}.png"
|
|
175
|
-
|
|
176
|
-
screenshot_cmd = f"{self._display_prefix}scrot -p {screenshot_path}"
|
|
177
|
-
|
|
178
|
-
returncode, _, stderr = await run(screenshot_cmd)
|
|
179
|
-
|
|
180
|
-
if returncode == 0 and screenshot_path.exists():
|
|
181
|
-
try:
|
|
182
|
-
image_data = screenshot_path.read_bytes()
|
|
183
|
-
# Remove the file unless user requested persistence via env var
|
|
184
|
-
if not OUTPUT_DIR:
|
|
185
|
-
screenshot_path.unlink(missing_ok=True)
|
|
186
|
-
return base64.b64encode(image_data).decode()
|
|
187
|
-
except Exception:
|
|
188
|
-
return None
|
|
189
|
-
|
|
190
|
-
return None
|
|
191
|
-
|
|
192
|
-
# ===== Helper Methods =====
|
|
193
|
-
|
|
194
|
-
async def _hold_keys_context(self, keys: list[str] | None) -> None:
|
|
195
|
-
"""
|
|
196
|
-
Press and hold keys, to be used with try/finally.
|
|
197
|
-
|
|
198
|
-
Args:
|
|
199
|
-
keys: List of keys to hold
|
|
200
|
-
|
|
201
|
-
Example:
|
|
202
|
-
await self._hold_keys_context(['ctrl'])
|
|
203
|
-
try:
|
|
204
|
-
# Do action with ctrl held
|
|
205
|
-
finally:
|
|
206
|
-
await self._release_keys(['ctrl'])
|
|
207
|
-
"""
|
|
208
|
-
if keys:
|
|
209
|
-
for key in keys:
|
|
210
|
-
escaped_key = shlex.quote(key)
|
|
211
|
-
await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
212
|
-
|
|
213
|
-
async def _release_keys(self, keys: list[str] | None) -> None:
|
|
214
|
-
"""Release held keys."""
|
|
215
|
-
if keys:
|
|
216
|
-
for key in reversed(keys): # Release in reverse order
|
|
217
|
-
escaped_key = shlex.quote(key)
|
|
218
|
-
await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
219
|
-
|
|
220
|
-
# ===== CLA Action Implementations =====
|
|
221
|
-
|
|
222
|
-
async def click(
|
|
223
|
-
self,
|
|
224
|
-
x: int | None = None,
|
|
225
|
-
y: int | None = None,
|
|
226
|
-
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
227
|
-
pattern: list[int] | None = None,
|
|
228
|
-
hold_keys: list[str] | None = None,
|
|
229
|
-
take_screenshot: bool = True,
|
|
230
|
-
) -> ContentResult:
|
|
231
|
-
"""Click at specified coordinates or current position."""
|
|
232
|
-
# Map button names to xdotool button numbers
|
|
233
|
-
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
234
|
-
button_num = button_map.get(button, 1)
|
|
235
|
-
|
|
236
|
-
# Hold keys if specified
|
|
237
|
-
await self._hold_keys_context(hold_keys)
|
|
238
|
-
|
|
239
|
-
try:
|
|
240
|
-
# Handle multi-clicks based on pattern
|
|
241
|
-
if pattern:
|
|
242
|
-
click_count = len(pattern) + 1
|
|
243
|
-
delay = pattern[0] if pattern else 10 # Use first delay for all clicks
|
|
244
|
-
|
|
245
|
-
if x is not None and y is not None:
|
|
246
|
-
cmd = f"mousemove {x} {y} click --repeat {click_count} --delay {delay} {button_num}" # noqa: E501
|
|
247
|
-
else:
|
|
248
|
-
cmd = f"click --repeat {click_count} --delay {delay} {button_num}"
|
|
249
|
-
else:
|
|
250
|
-
# Single click
|
|
251
|
-
if x is not None and y is not None:
|
|
252
|
-
cmd = f"mousemove {x} {y} click {button_num}"
|
|
253
|
-
else:
|
|
254
|
-
cmd = f"click {button_num}"
|
|
255
|
-
|
|
256
|
-
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
257
|
-
finally:
|
|
258
|
-
# Release held keys
|
|
259
|
-
await self._release_keys(hold_keys)
|
|
260
|
-
|
|
261
|
-
return result
|
|
262
|
-
|
|
263
|
-
async def write(
|
|
264
|
-
self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
|
|
265
|
-
) -> ContentResult:
|
|
266
|
-
"""Type text with specified delay between keystrokes."""
|
|
267
|
-
# Escape text for shell
|
|
268
|
-
escaped_text = shlex.quote(text)
|
|
269
|
-
cmd = f"type --delay {delay} -- {escaped_text}"
|
|
270
|
-
result = await self.execute(cmd, take_screenshot=False)
|
|
271
|
-
|
|
272
|
-
if enter_after:
|
|
273
|
-
enter_result = await self.key("Return", take_screenshot=False)
|
|
274
|
-
# Combine outputs
|
|
275
|
-
combined_output = (result.output or "") + "\n" + (enter_result.output or "")
|
|
276
|
-
combined_error = None
|
|
277
|
-
if result.error or enter_result.error:
|
|
278
|
-
combined_error = (result.error or "") + "\n" + (enter_result.error or "")
|
|
279
|
-
result = ContentResult(output=combined_output.strip(), error=combined_error)
|
|
280
|
-
|
|
281
|
-
if take_screenshot:
|
|
282
|
-
screenshot = await self.screenshot()
|
|
283
|
-
if screenshot:
|
|
284
|
-
result = ContentResult(
|
|
285
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
return result
|
|
289
|
-
|
|
290
|
-
async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
|
|
291
|
-
"""Press a key or key combination."""
|
|
292
|
-
return await self.execute(f"key -- {key_sequence}", take_screenshot=take_screenshot)
|
|
293
|
-
|
|
294
|
-
async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
295
|
-
"""Press a key combination (hotkey)."""
|
|
296
|
-
# Map CLA keys to XDO keys
|
|
297
|
-
mapped_keys = self._map_keys(keys)
|
|
298
|
-
# Convert list of keys to xdotool format
|
|
299
|
-
key_combo = "+".join(mapped_keys)
|
|
300
|
-
return await self.key(key_combo, take_screenshot=take_screenshot)
|
|
301
|
-
|
|
302
|
-
async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
303
|
-
"""Press and hold keys."""
|
|
304
|
-
# Map CLA keys to XDO keys
|
|
305
|
-
mapped_keys = self._map_keys(keys)
|
|
306
|
-
last_result = None
|
|
307
|
-
for key in mapped_keys:
|
|
308
|
-
escaped_key = shlex.quote(key)
|
|
309
|
-
last_result = await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
310
|
-
|
|
311
|
-
if take_screenshot and last_result:
|
|
312
|
-
screenshot = await self.screenshot()
|
|
313
|
-
if screenshot:
|
|
314
|
-
last_result = ContentResult(
|
|
315
|
-
output=last_result.output, error=last_result.error, base64_image=screenshot
|
|
316
|
-
)
|
|
317
|
-
|
|
318
|
-
return last_result or ContentResult()
|
|
319
|
-
|
|
320
|
-
async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
321
|
-
"""Release held keys."""
|
|
322
|
-
# Map CLA keys to XDO keys
|
|
323
|
-
mapped_keys = self._map_keys(keys)
|
|
324
|
-
last_result = None
|
|
325
|
-
for key in mapped_keys:
|
|
326
|
-
escaped_key = shlex.quote(key)
|
|
327
|
-
last_result = await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
328
|
-
|
|
329
|
-
if take_screenshot and last_result:
|
|
330
|
-
screenshot = await self.screenshot()
|
|
331
|
-
if screenshot:
|
|
332
|
-
last_result = ContentResult(
|
|
333
|
-
output=last_result.output, error=last_result.error, base64_image=screenshot
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
return last_result or ContentResult()
|
|
337
|
-
|
|
338
|
-
async def scroll(
|
|
339
|
-
self,
|
|
340
|
-
x: int | None = None,
|
|
341
|
-
y: int | None = None,
|
|
342
|
-
scroll_x: int | None = None,
|
|
343
|
-
scroll_y: int | None = None,
|
|
344
|
-
hold_keys: list[str] | None = None,
|
|
345
|
-
take_screenshot: bool = True,
|
|
346
|
-
) -> ContentResult:
|
|
347
|
-
"""Scroll at specified position."""
|
|
348
|
-
# Convert scroll amounts to xdotool format
|
|
349
|
-
scroll_button_map = {"up": 4, "down": 5, "left": 6, "right": 7}
|
|
350
|
-
|
|
351
|
-
# Convert pixels to wheel clicks
|
|
352
|
-
# Standard conversion: 1 wheel click ≈ 100 pixels
|
|
353
|
-
PIXELS_PER_WHEEL_CLICK = 100
|
|
354
|
-
|
|
355
|
-
# Hold keys if specified
|
|
356
|
-
await self._hold_keys_context(hold_keys)
|
|
357
|
-
|
|
358
|
-
try:
|
|
359
|
-
# Handle vertical scroll
|
|
360
|
-
if scroll_y and scroll_y != 0:
|
|
361
|
-
direction = "down" if scroll_y > 0 else "up"
|
|
362
|
-
# Convert pixels to clicks
|
|
363
|
-
clicks = max(1, abs(scroll_y) // PIXELS_PER_WHEEL_CLICK)
|
|
364
|
-
button = scroll_button_map.get(direction, 5)
|
|
365
|
-
|
|
366
|
-
if x is not None and y is not None:
|
|
367
|
-
cmd = f"mousemove {x} {y} click --repeat {clicks} {button}"
|
|
368
|
-
else:
|
|
369
|
-
cmd = f"click --repeat {clicks} {button}"
|
|
370
|
-
|
|
371
|
-
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
372
|
-
|
|
373
|
-
# Handle horizontal scroll
|
|
374
|
-
elif scroll_x and scroll_x != 0:
|
|
375
|
-
direction = "right" if scroll_x > 0 else "left"
|
|
376
|
-
# Convert pixels to clicks
|
|
377
|
-
clicks = max(1, abs(scroll_x) // PIXELS_PER_WHEEL_CLICK)
|
|
378
|
-
button = scroll_button_map.get(direction, 7)
|
|
379
|
-
|
|
380
|
-
if x is not None and y is not None:
|
|
381
|
-
cmd = f"mousemove {x} {y} click --repeat {clicks} {button}"
|
|
382
|
-
else:
|
|
383
|
-
cmd = f"click --repeat {clicks} {button}"
|
|
384
|
-
|
|
385
|
-
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
386
|
-
|
|
387
|
-
else:
|
|
388
|
-
result = ContentResult(output="No scroll amount specified")
|
|
389
|
-
finally:
|
|
390
|
-
# Release held keys
|
|
391
|
-
await self._release_keys(hold_keys)
|
|
392
|
-
|
|
393
|
-
return result
|
|
394
|
-
|
|
395
|
-
async def move(
|
|
396
|
-
self,
|
|
397
|
-
x: int | None = None,
|
|
398
|
-
y: int | None = None,
|
|
399
|
-
offset_x: int | None = None,
|
|
400
|
-
offset_y: int | None = None,
|
|
401
|
-
take_screenshot: bool = True,
|
|
402
|
-
) -> ContentResult:
|
|
403
|
-
"""Move mouse cursor."""
|
|
404
|
-
if x is not None and y is not None:
|
|
405
|
-
# Absolute move
|
|
406
|
-
return await self.execute(f"mousemove {x} {y}", take_screenshot=take_screenshot)
|
|
407
|
-
elif offset_x is not None or offset_y is not None:
|
|
408
|
-
# Relative move
|
|
409
|
-
offset_x = offset_x or 0
|
|
410
|
-
offset_y = offset_y or 0
|
|
411
|
-
return await self.execute(
|
|
412
|
-
f"mousemove_relative -- {offset_x} {offset_y}", take_screenshot=take_screenshot
|
|
413
|
-
)
|
|
414
|
-
else:
|
|
415
|
-
return ContentResult(output="No move coordinates specified")
|
|
416
|
-
|
|
417
|
-
async def drag(
|
|
418
|
-
self,
|
|
419
|
-
path: list[tuple[int, int]],
|
|
420
|
-
pattern: list[int] | None = None,
|
|
421
|
-
hold_keys: list[str] | None = None,
|
|
422
|
-
take_screenshot: bool = True,
|
|
423
|
-
) -> ContentResult:
|
|
424
|
-
"""Drag along a path."""
|
|
425
|
-
if len(path) < 2:
|
|
426
|
-
return ContentResult(error="Drag path must have at least 2 points")
|
|
427
|
-
|
|
428
|
-
# Hold keys if specified
|
|
429
|
-
await self._hold_keys_context(hold_keys)
|
|
430
|
-
|
|
431
|
-
try:
|
|
432
|
-
# Start drag
|
|
433
|
-
start_x, start_y = path[0]
|
|
434
|
-
await self.execute(f"mousemove {start_x} {start_y}", take_screenshot=False)
|
|
435
|
-
await self.execute("mousedown 1", take_screenshot=False)
|
|
436
|
-
|
|
437
|
-
# Move through intermediate points
|
|
438
|
-
for i, (x, y) in enumerate(path[1:], 1):
|
|
439
|
-
# Apply delay if pattern is specified
|
|
440
|
-
if pattern and i - 1 < len(pattern):
|
|
441
|
-
await asyncio.sleep(pattern[i - 1] / 1000.0) # Convert ms to seconds
|
|
442
|
-
|
|
443
|
-
await self.execute(f"mousemove {x} {y}", take_screenshot=False)
|
|
444
|
-
|
|
445
|
-
# End drag
|
|
446
|
-
await self.execute("mouseup 1", take_screenshot=False)
|
|
447
|
-
|
|
448
|
-
# Take final screenshot if requested
|
|
449
|
-
if take_screenshot:
|
|
450
|
-
screenshot = await self.screenshot()
|
|
451
|
-
result = ContentResult(
|
|
452
|
-
output=f"Dragged along {len(path)} points", base64_image=screenshot
|
|
453
|
-
)
|
|
454
|
-
else:
|
|
455
|
-
result = ContentResult(output=f"Dragged along {len(path)} points")
|
|
456
|
-
|
|
457
|
-
finally:
|
|
458
|
-
# Release held keys
|
|
459
|
-
await self._release_keys(hold_keys)
|
|
460
|
-
|
|
461
|
-
return result
|
|
462
|
-
|
|
463
|
-
async def mouse_down(
|
|
464
|
-
self,
|
|
465
|
-
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
466
|
-
take_screenshot: bool = True,
|
|
467
|
-
) -> ContentResult:
|
|
468
|
-
"""Press and hold a mouse button."""
|
|
469
|
-
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
470
|
-
button_num = button_map.get(button, 1)
|
|
471
|
-
return await self.execute(f"mousedown {button_num}", take_screenshot=take_screenshot)
|
|
472
|
-
|
|
473
|
-
async def mouse_up(
|
|
474
|
-
self,
|
|
475
|
-
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
476
|
-
take_screenshot: bool = True,
|
|
477
|
-
) -> ContentResult:
|
|
478
|
-
"""Release a mouse button."""
|
|
479
|
-
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
480
|
-
button_num = button_map.get(button, 1)
|
|
481
|
-
return await self.execute(f"mouseup {button_num}", take_screenshot=take_screenshot)
|
|
482
|
-
|
|
483
|
-
async def hold_key(
|
|
484
|
-
self, key: str, duration: float, take_screenshot: bool = True
|
|
485
|
-
) -> ContentResult:
|
|
486
|
-
"""Hold a key for a specified duration."""
|
|
487
|
-
# Map CLA key to XDO key
|
|
488
|
-
mapped_key = self._map_key(key)
|
|
489
|
-
escaped_key = shlex.quote(mapped_key)
|
|
490
|
-
|
|
491
|
-
# Press the key
|
|
492
|
-
await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
493
|
-
|
|
494
|
-
# Wait
|
|
495
|
-
await asyncio.sleep(duration)
|
|
496
|
-
|
|
497
|
-
# Release the key
|
|
498
|
-
result = await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
499
|
-
|
|
500
|
-
if take_screenshot:
|
|
501
|
-
screenshot = await self.screenshot()
|
|
502
|
-
if screenshot:
|
|
503
|
-
result = ContentResult(
|
|
504
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
505
|
-
)
|
|
506
|
-
|
|
507
|
-
return result
|
|
508
|
-
|
|
509
|
-
async def position(self) -> ContentResult:
|
|
510
|
-
"""Get current cursor position."""
|
|
511
|
-
return await self.execute("getmouselocation", take_screenshot=False)
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import base64
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import shlex
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from tempfile import gettempdir
|
|
10
|
+
from typing import Literal
|
|
11
|
+
from uuid import uuid4
|
|
12
|
+
|
|
13
|
+
from hud.tools.types import ContentResult
|
|
14
|
+
from hud.tools.utils import run
|
|
15
|
+
|
|
16
|
+
from .base import BaseExecutor
|
|
17
|
+
|
|
18
|
+
OUTPUT_DIR = os.environ.get("SCREENSHOT_DIR")
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# Map CLA standard keys to X11/XDO key names
|
|
22
|
+
CLA_TO_XDO = {
|
|
23
|
+
"enter": "Return",
|
|
24
|
+
"tab": "Tab",
|
|
25
|
+
"space": "space",
|
|
26
|
+
"backspace": "BackSpace",
|
|
27
|
+
"delete": "Delete",
|
|
28
|
+
"escape": "Escape",
|
|
29
|
+
"esc": "Escape",
|
|
30
|
+
"up": "Up",
|
|
31
|
+
"down": "Down",
|
|
32
|
+
"left": "Left",
|
|
33
|
+
"right": "Right",
|
|
34
|
+
"shift": "Shift_L",
|
|
35
|
+
"shiftleft": "Shift_L",
|
|
36
|
+
"shiftright": "Shift_R",
|
|
37
|
+
"ctrl": "Control_L",
|
|
38
|
+
"ctrlleft": "Control_L",
|
|
39
|
+
"ctrlright": "Control_R",
|
|
40
|
+
"alt": "Alt_L",
|
|
41
|
+
"altleft": "Alt_L",
|
|
42
|
+
"altright": "Alt_R",
|
|
43
|
+
"win": "Super_L",
|
|
44
|
+
"winleft": "Super_L",
|
|
45
|
+
"winright": "Super_R",
|
|
46
|
+
"cmd": "Control_L", # Map cmd to ctrl for Linux
|
|
47
|
+
"command": "Control_L",
|
|
48
|
+
"super": "Super_L",
|
|
49
|
+
"pageup": "Page_Up",
|
|
50
|
+
"pagedown": "Page_Down",
|
|
51
|
+
"home": "Home",
|
|
52
|
+
"end": "End",
|
|
53
|
+
"insert": "Insert",
|
|
54
|
+
"pause": "Pause",
|
|
55
|
+
"capslock": "Caps_Lock",
|
|
56
|
+
"numlock": "Num_Lock",
|
|
57
|
+
"scrolllock": "Scroll_Lock",
|
|
58
|
+
"printscreen": "Print",
|
|
59
|
+
"prtsc": "Print",
|
|
60
|
+
# Function keys
|
|
61
|
+
**{f"f{i}": f"F{i}" for i in range(1, 25)},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class XDOExecutor(BaseExecutor):
|
|
66
|
+
"""
|
|
67
|
+
Low-level executor for xdotool commands.
|
|
68
|
+
Handles display management and screenshot capture on Linux/X11 systems.
|
|
69
|
+
|
|
70
|
+
This executor should only be instantiated when X11 display is available.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, display_num: int | None = None) -> None:
|
|
74
|
+
"""Initialize with optional display number."""
|
|
75
|
+
super().__init__(display_num)
|
|
76
|
+
|
|
77
|
+
if display_num is not None:
|
|
78
|
+
self._display_prefix = f"DISPLAY=:{display_num} "
|
|
79
|
+
else:
|
|
80
|
+
self._display_prefix = ""
|
|
81
|
+
|
|
82
|
+
self.xdotool = f"{self._display_prefix}xdotool"
|
|
83
|
+
logger.info("XDOExecutor initialized")
|
|
84
|
+
|
|
85
|
+
def _map_key(self, key: str) -> str:
|
|
86
|
+
"""Map CLA standard key to XDO key."""
|
|
87
|
+
return CLA_TO_XDO.get(key.lower(), key)
|
|
88
|
+
|
|
89
|
+
def _map_keys(self, keys: list[str]) -> list[str]:
|
|
90
|
+
"""Map CLA standard keys to XDO keys."""
|
|
91
|
+
mapped_keys = []
|
|
92
|
+
for key in keys:
|
|
93
|
+
# Handle key combinations like "ctrl+a"
|
|
94
|
+
if "+" in key:
|
|
95
|
+
parts = key.split("+")
|
|
96
|
+
mapped_parts = [self._map_key(part) for part in parts]
|
|
97
|
+
mapped_keys.append("+".join(mapped_parts))
|
|
98
|
+
else:
|
|
99
|
+
mapped_keys.append(self._map_key(key))
|
|
100
|
+
return mapped_keys
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def is_available(cls) -> bool:
|
|
104
|
+
"""
|
|
105
|
+
Check if xdotool and X11 display are available.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
True if xdotool can be used, False otherwise
|
|
109
|
+
"""
|
|
110
|
+
display = os.environ.get("DISPLAY")
|
|
111
|
+
if not display:
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
# Try a simple xdotool command to test availability
|
|
115
|
+
try:
|
|
116
|
+
import subprocess
|
|
117
|
+
|
|
118
|
+
# Try without display prefix if DISPLAY is already set
|
|
119
|
+
result = subprocess.run(
|
|
120
|
+
["xdotool", "getdisplaygeometry"], # noqa: S607
|
|
121
|
+
capture_output=True,
|
|
122
|
+
timeout=2,
|
|
123
|
+
)
|
|
124
|
+
return result.returncode == 0
|
|
125
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
async def execute(self, command: str, take_screenshot: bool = True) -> ContentResult:
|
|
129
|
+
"""
|
|
130
|
+
Execute an xdotool command.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
command: The xdotool command (without xdotool prefix)
|
|
134
|
+
take_screenshot: Whether to capture a screenshot after execution
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
ContentResult with output, error, and optional screenshot
|
|
138
|
+
"""
|
|
139
|
+
full_command = f"{self.xdotool} {command}"
|
|
140
|
+
|
|
141
|
+
# Execute command
|
|
142
|
+
returncode, stdout, stderr = await run(full_command)
|
|
143
|
+
|
|
144
|
+
# Prepare result
|
|
145
|
+
result = ContentResult(
|
|
146
|
+
output=stdout if stdout else None, error=stderr if stderr or returncode != 0 else None
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Take screenshot if requested
|
|
150
|
+
if take_screenshot:
|
|
151
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
152
|
+
screenshot = await self.screenshot()
|
|
153
|
+
if screenshot:
|
|
154
|
+
result = ContentResult(
|
|
155
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return result
|
|
159
|
+
|
|
160
|
+
async def screenshot(self) -> str | None:
|
|
161
|
+
"""
|
|
162
|
+
Take a screenshot and return base64 encoded image.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Base64 encoded PNG image or None if failed
|
|
166
|
+
"""
|
|
167
|
+
# Real screenshot using scrot
|
|
168
|
+
if OUTPUT_DIR:
|
|
169
|
+
output_dir = Path(OUTPUT_DIR)
|
|
170
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
171
|
+
screenshot_path = output_dir / f"screenshot_{uuid4().hex}.png"
|
|
172
|
+
else:
|
|
173
|
+
# Generate a unique path in system temp dir without opening a file
|
|
174
|
+
screenshot_path = Path(gettempdir()) / f"screenshot_{uuid4().hex}.png"
|
|
175
|
+
|
|
176
|
+
screenshot_cmd = f"{self._display_prefix}scrot -p {screenshot_path}"
|
|
177
|
+
|
|
178
|
+
returncode, _, stderr = await run(screenshot_cmd)
|
|
179
|
+
|
|
180
|
+
if returncode == 0 and screenshot_path.exists():
|
|
181
|
+
try:
|
|
182
|
+
image_data = screenshot_path.read_bytes()
|
|
183
|
+
# Remove the file unless user requested persistence via env var
|
|
184
|
+
if not OUTPUT_DIR:
|
|
185
|
+
screenshot_path.unlink(missing_ok=True)
|
|
186
|
+
return base64.b64encode(image_data).decode()
|
|
187
|
+
except Exception:
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
# ===== Helper Methods =====
|
|
193
|
+
|
|
194
|
+
async def _hold_keys_context(self, keys: list[str] | None) -> None:
|
|
195
|
+
"""
|
|
196
|
+
Press and hold keys, to be used with try/finally.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
keys: List of keys to hold
|
|
200
|
+
|
|
201
|
+
Example:
|
|
202
|
+
await self._hold_keys_context(['ctrl'])
|
|
203
|
+
try:
|
|
204
|
+
# Do action with ctrl held
|
|
205
|
+
finally:
|
|
206
|
+
await self._release_keys(['ctrl'])
|
|
207
|
+
"""
|
|
208
|
+
if keys:
|
|
209
|
+
for key in keys:
|
|
210
|
+
escaped_key = shlex.quote(key)
|
|
211
|
+
await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
212
|
+
|
|
213
|
+
async def _release_keys(self, keys: list[str] | None) -> None:
|
|
214
|
+
"""Release held keys."""
|
|
215
|
+
if keys:
|
|
216
|
+
for key in reversed(keys): # Release in reverse order
|
|
217
|
+
escaped_key = shlex.quote(key)
|
|
218
|
+
await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
219
|
+
|
|
220
|
+
# ===== CLA Action Implementations =====
|
|
221
|
+
|
|
222
|
+
async def click(
|
|
223
|
+
self,
|
|
224
|
+
x: int | None = None,
|
|
225
|
+
y: int | None = None,
|
|
226
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
227
|
+
pattern: list[int] | None = None,
|
|
228
|
+
hold_keys: list[str] | None = None,
|
|
229
|
+
take_screenshot: bool = True,
|
|
230
|
+
) -> ContentResult:
|
|
231
|
+
"""Click at specified coordinates or current position."""
|
|
232
|
+
# Map button names to xdotool button numbers
|
|
233
|
+
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
234
|
+
button_num = button_map.get(button, 1)
|
|
235
|
+
|
|
236
|
+
# Hold keys if specified
|
|
237
|
+
await self._hold_keys_context(hold_keys)
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
# Handle multi-clicks based on pattern
|
|
241
|
+
if pattern:
|
|
242
|
+
click_count = len(pattern) + 1
|
|
243
|
+
delay = pattern[0] if pattern else 10 # Use first delay for all clicks
|
|
244
|
+
|
|
245
|
+
if x is not None and y is not None:
|
|
246
|
+
cmd = f"mousemove {x} {y} click --repeat {click_count} --delay {delay} {button_num}" # noqa: E501
|
|
247
|
+
else:
|
|
248
|
+
cmd = f"click --repeat {click_count} --delay {delay} {button_num}"
|
|
249
|
+
else:
|
|
250
|
+
# Single click
|
|
251
|
+
if x is not None and y is not None:
|
|
252
|
+
cmd = f"mousemove {x} {y} click {button_num}"
|
|
253
|
+
else:
|
|
254
|
+
cmd = f"click {button_num}"
|
|
255
|
+
|
|
256
|
+
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
257
|
+
finally:
|
|
258
|
+
# Release held keys
|
|
259
|
+
await self._release_keys(hold_keys)
|
|
260
|
+
|
|
261
|
+
return result
|
|
262
|
+
|
|
263
|
+
async def write(
|
|
264
|
+
self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
|
|
265
|
+
) -> ContentResult:
|
|
266
|
+
"""Type text with specified delay between keystrokes."""
|
|
267
|
+
# Escape text for shell
|
|
268
|
+
escaped_text = shlex.quote(text)
|
|
269
|
+
cmd = f"type --delay {delay} -- {escaped_text}"
|
|
270
|
+
result = await self.execute(cmd, take_screenshot=False)
|
|
271
|
+
|
|
272
|
+
if enter_after:
|
|
273
|
+
enter_result = await self.key("Return", take_screenshot=False)
|
|
274
|
+
# Combine outputs
|
|
275
|
+
combined_output = (result.output or "") + "\n" + (enter_result.output or "")
|
|
276
|
+
combined_error = None
|
|
277
|
+
if result.error or enter_result.error:
|
|
278
|
+
combined_error = (result.error or "") + "\n" + (enter_result.error or "")
|
|
279
|
+
result = ContentResult(output=combined_output.strip(), error=combined_error)
|
|
280
|
+
|
|
281
|
+
if take_screenshot:
|
|
282
|
+
screenshot = await self.screenshot()
|
|
283
|
+
if screenshot:
|
|
284
|
+
result = ContentResult(
|
|
285
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return result
|
|
289
|
+
|
|
290
|
+
async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
|
|
291
|
+
"""Press a key or key combination."""
|
|
292
|
+
return await self.execute(f"key -- {key_sequence}", take_screenshot=take_screenshot)
|
|
293
|
+
|
|
294
|
+
async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
295
|
+
"""Press a key combination (hotkey)."""
|
|
296
|
+
# Map CLA keys to XDO keys
|
|
297
|
+
mapped_keys = self._map_keys(keys)
|
|
298
|
+
# Convert list of keys to xdotool format
|
|
299
|
+
key_combo = "+".join(mapped_keys)
|
|
300
|
+
return await self.key(key_combo, take_screenshot=take_screenshot)
|
|
301
|
+
|
|
302
|
+
async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
303
|
+
"""Press and hold keys."""
|
|
304
|
+
# Map CLA keys to XDO keys
|
|
305
|
+
mapped_keys = self._map_keys(keys)
|
|
306
|
+
last_result = None
|
|
307
|
+
for key in mapped_keys:
|
|
308
|
+
escaped_key = shlex.quote(key)
|
|
309
|
+
last_result = await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
310
|
+
|
|
311
|
+
if take_screenshot and last_result:
|
|
312
|
+
screenshot = await self.screenshot()
|
|
313
|
+
if screenshot:
|
|
314
|
+
last_result = ContentResult(
|
|
315
|
+
output=last_result.output, error=last_result.error, base64_image=screenshot
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return last_result or ContentResult()
|
|
319
|
+
|
|
320
|
+
async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
321
|
+
"""Release held keys."""
|
|
322
|
+
# Map CLA keys to XDO keys
|
|
323
|
+
mapped_keys = self._map_keys(keys)
|
|
324
|
+
last_result = None
|
|
325
|
+
for key in mapped_keys:
|
|
326
|
+
escaped_key = shlex.quote(key)
|
|
327
|
+
last_result = await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
328
|
+
|
|
329
|
+
if take_screenshot and last_result:
|
|
330
|
+
screenshot = await self.screenshot()
|
|
331
|
+
if screenshot:
|
|
332
|
+
last_result = ContentResult(
|
|
333
|
+
output=last_result.output, error=last_result.error, base64_image=screenshot
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return last_result or ContentResult()
|
|
337
|
+
|
|
338
|
+
async def scroll(
|
|
339
|
+
self,
|
|
340
|
+
x: int | None = None,
|
|
341
|
+
y: int | None = None,
|
|
342
|
+
scroll_x: int | None = None,
|
|
343
|
+
scroll_y: int | None = None,
|
|
344
|
+
hold_keys: list[str] | None = None,
|
|
345
|
+
take_screenshot: bool = True,
|
|
346
|
+
) -> ContentResult:
|
|
347
|
+
"""Scroll at specified position."""
|
|
348
|
+
# Convert scroll amounts to xdotool format
|
|
349
|
+
scroll_button_map = {"up": 4, "down": 5, "left": 6, "right": 7}
|
|
350
|
+
|
|
351
|
+
# Convert pixels to wheel clicks
|
|
352
|
+
# Standard conversion: 1 wheel click ≈ 100 pixels
|
|
353
|
+
PIXELS_PER_WHEEL_CLICK = 100
|
|
354
|
+
|
|
355
|
+
# Hold keys if specified
|
|
356
|
+
await self._hold_keys_context(hold_keys)
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
# Handle vertical scroll
|
|
360
|
+
if scroll_y and scroll_y != 0:
|
|
361
|
+
direction = "down" if scroll_y > 0 else "up"
|
|
362
|
+
# Convert pixels to clicks
|
|
363
|
+
clicks = max(1, abs(scroll_y) // PIXELS_PER_WHEEL_CLICK)
|
|
364
|
+
button = scroll_button_map.get(direction, 5)
|
|
365
|
+
|
|
366
|
+
if x is not None and y is not None:
|
|
367
|
+
cmd = f"mousemove {x} {y} click --repeat {clicks} {button}"
|
|
368
|
+
else:
|
|
369
|
+
cmd = f"click --repeat {clicks} {button}"
|
|
370
|
+
|
|
371
|
+
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
372
|
+
|
|
373
|
+
# Handle horizontal scroll
|
|
374
|
+
elif scroll_x and scroll_x != 0:
|
|
375
|
+
direction = "right" if scroll_x > 0 else "left"
|
|
376
|
+
# Convert pixels to clicks
|
|
377
|
+
clicks = max(1, abs(scroll_x) // PIXELS_PER_WHEEL_CLICK)
|
|
378
|
+
button = scroll_button_map.get(direction, 7)
|
|
379
|
+
|
|
380
|
+
if x is not None and y is not None:
|
|
381
|
+
cmd = f"mousemove {x} {y} click --repeat {clicks} {button}"
|
|
382
|
+
else:
|
|
383
|
+
cmd = f"click --repeat {clicks} {button}"
|
|
384
|
+
|
|
385
|
+
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
386
|
+
|
|
387
|
+
else:
|
|
388
|
+
result = ContentResult(output="No scroll amount specified")
|
|
389
|
+
finally:
|
|
390
|
+
# Release held keys
|
|
391
|
+
await self._release_keys(hold_keys)
|
|
392
|
+
|
|
393
|
+
return result
|
|
394
|
+
|
|
395
|
+
async def move(
|
|
396
|
+
self,
|
|
397
|
+
x: int | None = None,
|
|
398
|
+
y: int | None = None,
|
|
399
|
+
offset_x: int | None = None,
|
|
400
|
+
offset_y: int | None = None,
|
|
401
|
+
take_screenshot: bool = True,
|
|
402
|
+
) -> ContentResult:
|
|
403
|
+
"""Move mouse cursor."""
|
|
404
|
+
if x is not None and y is not None:
|
|
405
|
+
# Absolute move
|
|
406
|
+
return await self.execute(f"mousemove {x} {y}", take_screenshot=take_screenshot)
|
|
407
|
+
elif offset_x is not None or offset_y is not None:
|
|
408
|
+
# Relative move
|
|
409
|
+
offset_x = offset_x or 0
|
|
410
|
+
offset_y = offset_y or 0
|
|
411
|
+
return await self.execute(
|
|
412
|
+
f"mousemove_relative -- {offset_x} {offset_y}", take_screenshot=take_screenshot
|
|
413
|
+
)
|
|
414
|
+
else:
|
|
415
|
+
return ContentResult(output="No move coordinates specified")
|
|
416
|
+
|
|
417
|
+
async def drag(
|
|
418
|
+
self,
|
|
419
|
+
path: list[tuple[int, int]],
|
|
420
|
+
pattern: list[int] | None = None,
|
|
421
|
+
hold_keys: list[str] | None = None,
|
|
422
|
+
take_screenshot: bool = True,
|
|
423
|
+
) -> ContentResult:
|
|
424
|
+
"""Drag along a path."""
|
|
425
|
+
if len(path) < 2:
|
|
426
|
+
return ContentResult(error="Drag path must have at least 2 points")
|
|
427
|
+
|
|
428
|
+
# Hold keys if specified
|
|
429
|
+
await self._hold_keys_context(hold_keys)
|
|
430
|
+
|
|
431
|
+
try:
|
|
432
|
+
# Start drag
|
|
433
|
+
start_x, start_y = path[0]
|
|
434
|
+
await self.execute(f"mousemove {start_x} {start_y}", take_screenshot=False)
|
|
435
|
+
await self.execute("mousedown 1", take_screenshot=False)
|
|
436
|
+
|
|
437
|
+
# Move through intermediate points
|
|
438
|
+
for i, (x, y) in enumerate(path[1:], 1):
|
|
439
|
+
# Apply delay if pattern is specified
|
|
440
|
+
if pattern and i - 1 < len(pattern):
|
|
441
|
+
await asyncio.sleep(pattern[i - 1] / 1000.0) # Convert ms to seconds
|
|
442
|
+
|
|
443
|
+
await self.execute(f"mousemove {x} {y}", take_screenshot=False)
|
|
444
|
+
|
|
445
|
+
# End drag
|
|
446
|
+
await self.execute("mouseup 1", take_screenshot=False)
|
|
447
|
+
|
|
448
|
+
# Take final screenshot if requested
|
|
449
|
+
if take_screenshot:
|
|
450
|
+
screenshot = await self.screenshot()
|
|
451
|
+
result = ContentResult(
|
|
452
|
+
output=f"Dragged along {len(path)} points", base64_image=screenshot
|
|
453
|
+
)
|
|
454
|
+
else:
|
|
455
|
+
result = ContentResult(output=f"Dragged along {len(path)} points")
|
|
456
|
+
|
|
457
|
+
finally:
|
|
458
|
+
# Release held keys
|
|
459
|
+
await self._release_keys(hold_keys)
|
|
460
|
+
|
|
461
|
+
return result
|
|
462
|
+
|
|
463
|
+
async def mouse_down(
|
|
464
|
+
self,
|
|
465
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
466
|
+
take_screenshot: bool = True,
|
|
467
|
+
) -> ContentResult:
|
|
468
|
+
"""Press and hold a mouse button."""
|
|
469
|
+
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
470
|
+
button_num = button_map.get(button, 1)
|
|
471
|
+
return await self.execute(f"mousedown {button_num}", take_screenshot=take_screenshot)
|
|
472
|
+
|
|
473
|
+
async def mouse_up(
|
|
474
|
+
self,
|
|
475
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
476
|
+
take_screenshot: bool = True,
|
|
477
|
+
) -> ContentResult:
|
|
478
|
+
"""Release a mouse button."""
|
|
479
|
+
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
480
|
+
button_num = button_map.get(button, 1)
|
|
481
|
+
return await self.execute(f"mouseup {button_num}", take_screenshot=take_screenshot)
|
|
482
|
+
|
|
483
|
+
async def hold_key(
|
|
484
|
+
self, key: str, duration: float, take_screenshot: bool = True
|
|
485
|
+
) -> ContentResult:
|
|
486
|
+
"""Hold a key for a specified duration."""
|
|
487
|
+
# Map CLA key to XDO key
|
|
488
|
+
mapped_key = self._map_key(key)
|
|
489
|
+
escaped_key = shlex.quote(mapped_key)
|
|
490
|
+
|
|
491
|
+
# Press the key
|
|
492
|
+
await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
493
|
+
|
|
494
|
+
# Wait
|
|
495
|
+
await asyncio.sleep(duration)
|
|
496
|
+
|
|
497
|
+
# Release the key
|
|
498
|
+
result = await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
499
|
+
|
|
500
|
+
if take_screenshot:
|
|
501
|
+
screenshot = await self.screenshot()
|
|
502
|
+
if screenshot:
|
|
503
|
+
result = ContentResult(
|
|
504
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
return result
|
|
508
|
+
|
|
509
|
+
async def position(self) -> ContentResult:
|
|
510
|
+
"""Get current cursor position."""
|
|
511
|
+
return await self.execute("getmouselocation", take_screenshot=False)
|