hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/tools/executors/pyautogui.py
CHANGED
|
@@ -1,621 +1,621 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import base64
|
|
5
|
-
import logging
|
|
6
|
-
import os
|
|
7
|
-
from io import BytesIO
|
|
8
|
-
from typing import Any, Literal
|
|
9
|
-
|
|
10
|
-
from hud.tools.types import ContentResult
|
|
11
|
-
|
|
12
|
-
from .base import BaseExecutor
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
# Lazy loading for pyautogui
|
|
17
|
-
_pyautogui = None
|
|
18
|
-
_pyautogui_available = None
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _get_pyautogui() -> Any | None:
|
|
22
|
-
"""Lazily import and return pyautogui module."""
|
|
23
|
-
global _pyautogui, _pyautogui_available
|
|
24
|
-
|
|
25
|
-
if _pyautogui_available is False:
|
|
26
|
-
return None
|
|
27
|
-
|
|
28
|
-
if _pyautogui is None:
|
|
29
|
-
# Set display if not already set
|
|
30
|
-
if "DISPLAY" not in os.environ:
|
|
31
|
-
try:
|
|
32
|
-
from hud.tools.computer import computer_settings
|
|
33
|
-
|
|
34
|
-
os.environ["DISPLAY"] = str(computer_settings.DISPLAY_NUM)
|
|
35
|
-
except (ImportError, AttributeError):
|
|
36
|
-
os.environ["DISPLAY"] = ":0"
|
|
37
|
-
|
|
38
|
-
try:
|
|
39
|
-
import pyautogui # type: ignore[import-not-found]
|
|
40
|
-
|
|
41
|
-
_pyautogui = pyautogui
|
|
42
|
-
_pyautogui_available = True
|
|
43
|
-
|
|
44
|
-
# Configure PyAutoGUI settings
|
|
45
|
-
_pyautogui.FAILSAFE = False # Disable fail-safe feature
|
|
46
|
-
_pyautogui.PAUSE = 0.1 # Small pause between actions
|
|
47
|
-
except ImportError:
|
|
48
|
-
_pyautogui_available = False
|
|
49
|
-
logger.warning("PyAutoGUI is not available")
|
|
50
|
-
return None
|
|
51
|
-
except Exception as e:
|
|
52
|
-
_pyautogui_available = False
|
|
53
|
-
logger.warning("Failed to initialize PyAutoGUI: %s", e)
|
|
54
|
-
return None
|
|
55
|
-
|
|
56
|
-
return _pyautogui
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# Map CLA standard keys to PyAutoGUI keys (only where they differ)
|
|
60
|
-
CLA_TO_PYAUTOGUI = {
|
|
61
|
-
# Most keys are the same in PyAutoGUI, only map the differences
|
|
62
|
-
"escape": "esc",
|
|
63
|
-
"enter": "return",
|
|
64
|
-
"pageup": "pgup",
|
|
65
|
-
"pagedown": "pgdn",
|
|
66
|
-
"printscreen": "prtscr",
|
|
67
|
-
"prtsc": "prtscr",
|
|
68
|
-
"super": "win",
|
|
69
|
-
"command": "cmd",
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class PyAutoGUIExecutor(BaseExecutor):
|
|
74
|
-
"""
|
|
75
|
-
Cross-platform executor using PyAutoGUI.
|
|
76
|
-
Works on Windows, macOS, and Linux.
|
|
77
|
-
|
|
78
|
-
This executor should only be instantiated when PyAutoGUI is available and functional.
|
|
79
|
-
"""
|
|
80
|
-
|
|
81
|
-
def __init__(self, display_num: int | None = None) -> None:
|
|
82
|
-
"""
|
|
83
|
-
Initialize the executor.
|
|
84
|
-
|
|
85
|
-
Args:
|
|
86
|
-
display_num: X display number (used only on Linux, ignored on Windows/macOS)
|
|
87
|
-
"""
|
|
88
|
-
super().__init__(display_num)
|
|
89
|
-
self._pyautogui = None
|
|
90
|
-
logger.info("PyAutoGUIExecutor initialized")
|
|
91
|
-
|
|
92
|
-
@property
|
|
93
|
-
def pyautogui(self) -> Any:
|
|
94
|
-
"""Get the pyautogui module, importing it lazily if needed."""
|
|
95
|
-
if self._pyautogui is None:
|
|
96
|
-
self._pyautogui = _get_pyautogui()
|
|
97
|
-
if self._pyautogui is None:
|
|
98
|
-
raise RuntimeError("PyAutoGUI is not available")
|
|
99
|
-
return self._pyautogui
|
|
100
|
-
|
|
101
|
-
def _map_key(self, key: str) -> str:
|
|
102
|
-
"""Map CLA standard key to PyAutoGUI key."""
|
|
103
|
-
return CLA_TO_PYAUTOGUI.get(key.lower(), key.lower())
|
|
104
|
-
|
|
105
|
-
def _map_keys(self, keys: list[str]) -> list[str]:
|
|
106
|
-
"""Map CLA standard keys to PyAutoGUI keys."""
|
|
107
|
-
mapped_keys = []
|
|
108
|
-
for key in keys:
|
|
109
|
-
# Handle key combinations like "ctrl+a"
|
|
110
|
-
if "+" in key:
|
|
111
|
-
parts = key.split("+")
|
|
112
|
-
mapped_parts = [self._map_key(part) for part in parts]
|
|
113
|
-
mapped_keys.append("+".join(mapped_parts))
|
|
114
|
-
else:
|
|
115
|
-
mapped_keys.append(self._map_key(key))
|
|
116
|
-
return mapped_keys
|
|
117
|
-
|
|
118
|
-
@classmethod
|
|
119
|
-
def is_available(cls) -> bool:
|
|
120
|
-
"""
|
|
121
|
-
Check if PyAutoGUI is available and functional.
|
|
122
|
-
|
|
123
|
-
Returns:
|
|
124
|
-
True if PyAutoGUI is available and functional, False otherwise
|
|
125
|
-
"""
|
|
126
|
-
pyautogui = _get_pyautogui()
|
|
127
|
-
if not pyautogui:
|
|
128
|
-
return False
|
|
129
|
-
|
|
130
|
-
try:
|
|
131
|
-
# Try to get screen size as a simple test
|
|
132
|
-
pyautogui.size()
|
|
133
|
-
return True
|
|
134
|
-
except Exception:
|
|
135
|
-
return False
|
|
136
|
-
|
|
137
|
-
async def screenshot(self) -> str | None:
|
|
138
|
-
"""
|
|
139
|
-
Take a screenshot and return base64 encoded image.
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
Base64 encoded PNG image or None if failed
|
|
143
|
-
"""
|
|
144
|
-
try:
|
|
145
|
-
# Take screenshot using PyAutoGUI
|
|
146
|
-
screenshot = self.pyautogui.screenshot()
|
|
147
|
-
|
|
148
|
-
# Convert to base64
|
|
149
|
-
buffer = BytesIO()
|
|
150
|
-
screenshot.save(buffer, format="PNG")
|
|
151
|
-
image_data = buffer.getvalue()
|
|
152
|
-
return base64.b64encode(image_data).decode()
|
|
153
|
-
except Exception as e:
|
|
154
|
-
logger.error("Failed to take screenshot: %s", e)
|
|
155
|
-
return None
|
|
156
|
-
|
|
157
|
-
# ===== Helper Methods =====
|
|
158
|
-
|
|
159
|
-
def _hold_keys_context(self, keys: list[str] | None) -> None:
|
|
160
|
-
"""
|
|
161
|
-
Press and hold keys.
|
|
162
|
-
|
|
163
|
-
Args:
|
|
164
|
-
keys: List of keys to hold
|
|
165
|
-
"""
|
|
166
|
-
if keys:
|
|
167
|
-
for key in keys:
|
|
168
|
-
self.pyautogui.keyDown(key)
|
|
169
|
-
|
|
170
|
-
def _release_keys(self, keys: list[str] | None) -> None:
|
|
171
|
-
"""Release held keys."""
|
|
172
|
-
if keys:
|
|
173
|
-
for key in reversed(keys): # Release in reverse order
|
|
174
|
-
self.pyautogui.keyUp(key)
|
|
175
|
-
|
|
176
|
-
# ===== CLA Action Implementations =====
|
|
177
|
-
|
|
178
|
-
async def click(
|
|
179
|
-
self,
|
|
180
|
-
x: int | None = None,
|
|
181
|
-
y: int | None = None,
|
|
182
|
-
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
183
|
-
pattern: list[int] | None = None,
|
|
184
|
-
hold_keys: list[str] | None = None,
|
|
185
|
-
take_screenshot: bool = True,
|
|
186
|
-
) -> ContentResult:
|
|
187
|
-
"""Click at specified coordinates or current position."""
|
|
188
|
-
try:
|
|
189
|
-
# Map button names (PyAutoGUI doesn't support back/forward)
|
|
190
|
-
button_map = {
|
|
191
|
-
"left": "left",
|
|
192
|
-
"right": "right",
|
|
193
|
-
"middle": "middle",
|
|
194
|
-
"back": "left",
|
|
195
|
-
"forward": "right",
|
|
196
|
-
} # Fallback for unsupported
|
|
197
|
-
button_name = button_map.get(button, "left")
|
|
198
|
-
|
|
199
|
-
# Hold keys if specified
|
|
200
|
-
self._hold_keys_context(hold_keys)
|
|
201
|
-
|
|
202
|
-
try:
|
|
203
|
-
# Handle multi-clicks based on pattern
|
|
204
|
-
if pattern:
|
|
205
|
-
clicks = len(pattern) + 1
|
|
206
|
-
interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
|
|
207
|
-
|
|
208
|
-
if x is not None and y is not None:
|
|
209
|
-
self.pyautogui.click(
|
|
210
|
-
x=x, y=y, clicks=clicks, interval=interval, button=button_name
|
|
211
|
-
)
|
|
212
|
-
else:
|
|
213
|
-
self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
|
|
214
|
-
else:
|
|
215
|
-
# Single click
|
|
216
|
-
if x is not None and y is not None:
|
|
217
|
-
self.pyautogui.click(x=x, y=y, button=button_name)
|
|
218
|
-
else:
|
|
219
|
-
self.pyautogui.click(button=button_name)
|
|
220
|
-
finally:
|
|
221
|
-
# Release held keys
|
|
222
|
-
self._release_keys(hold_keys)
|
|
223
|
-
|
|
224
|
-
result = ContentResult(
|
|
225
|
-
output=f"Clicked {button} button at ({x}, {y})" if x else f"Clicked {button} button"
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
if take_screenshot:
|
|
229
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
230
|
-
screenshot = await self.screenshot()
|
|
231
|
-
if screenshot:
|
|
232
|
-
result = ContentResult(
|
|
233
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
return result
|
|
237
|
-
except Exception as e:
|
|
238
|
-
return ContentResult(error=str(e))
|
|
239
|
-
|
|
240
|
-
async def write(
|
|
241
|
-
self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
|
|
242
|
-
) -> ContentResult:
|
|
243
|
-
"""Type text with specified delay between keystrokes."""
|
|
244
|
-
try:
|
|
245
|
-
# Convert delay from milliseconds to seconds for PyAutoGUI
|
|
246
|
-
interval = delay / 1000.0
|
|
247
|
-
self.pyautogui.typewrite(text, interval=interval)
|
|
248
|
-
|
|
249
|
-
if enter_after:
|
|
250
|
-
self.pyautogui.press("enter")
|
|
251
|
-
|
|
252
|
-
result = ContentResult(
|
|
253
|
-
output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
if take_screenshot:
|
|
257
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
258
|
-
screenshot = await self.screenshot()
|
|
259
|
-
if screenshot:
|
|
260
|
-
result = ContentResult(
|
|
261
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
return result
|
|
265
|
-
except Exception as e:
|
|
266
|
-
return ContentResult(error=str(e))
|
|
267
|
-
|
|
268
|
-
async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
|
|
269
|
-
"""Press a key or key combination."""
|
|
270
|
-
try:
|
|
271
|
-
# Handle key combinations (e.g., "ctrl+c")
|
|
272
|
-
if "+" in key_sequence:
|
|
273
|
-
keys = key_sequence.split("+")
|
|
274
|
-
self.pyautogui.hotkey(*keys)
|
|
275
|
-
result = ContentResult(output=f"Pressed hotkey: {key_sequence}")
|
|
276
|
-
else:
|
|
277
|
-
# Map common key names from xdotool to PyAutoGUI
|
|
278
|
-
key = key_sequence.lower()
|
|
279
|
-
self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
|
|
280
|
-
result = ContentResult(output=f"Pressed key: {key_sequence}")
|
|
281
|
-
|
|
282
|
-
if take_screenshot:
|
|
283
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
284
|
-
screenshot = await self.screenshot()
|
|
285
|
-
if screenshot:
|
|
286
|
-
result = ContentResult(
|
|
287
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
return result
|
|
291
|
-
except Exception as e:
|
|
292
|
-
return ContentResult(error=str(e))
|
|
293
|
-
|
|
294
|
-
async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
295
|
-
"""Press a key combination (hotkey)."""
|
|
296
|
-
try:
|
|
297
|
-
# Map CLA keys to PyAutoGUI keys
|
|
298
|
-
mapped_keys = self._map_keys(keys)
|
|
299
|
-
|
|
300
|
-
# Handle single key or combination
|
|
301
|
-
if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
|
|
302
|
-
self.pyautogui.press(mapped_keys[0])
|
|
303
|
-
result = ContentResult(output=f"Pressed key: {keys[0]}")
|
|
304
|
-
else:
|
|
305
|
-
# For combinations, use hotkey
|
|
306
|
-
hotkey_parts = []
|
|
307
|
-
for key in mapped_keys:
|
|
308
|
-
if "+" in key:
|
|
309
|
-
hotkey_parts.extend(key.split("+"))
|
|
310
|
-
else:
|
|
311
|
-
hotkey_parts.append(key)
|
|
312
|
-
self.pyautogui.hotkey(*hotkey_parts)
|
|
313
|
-
result = ContentResult(output=f"Pressed hotkey: {'+'.join(keys)}")
|
|
314
|
-
|
|
315
|
-
if take_screenshot:
|
|
316
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
317
|
-
screenshot = await self.screenshot()
|
|
318
|
-
if screenshot:
|
|
319
|
-
result = ContentResult(
|
|
320
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
321
|
-
)
|
|
322
|
-
|
|
323
|
-
return result
|
|
324
|
-
except Exception as e:
|
|
325
|
-
return ContentResult(error=str(e))
|
|
326
|
-
|
|
327
|
-
async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
328
|
-
"""Press and hold keys."""
|
|
329
|
-
try:
|
|
330
|
-
# Map CLA keys to PyAutoGUI keys
|
|
331
|
-
mapped_keys = self._map_keys(keys)
|
|
332
|
-
for key in mapped_keys:
|
|
333
|
-
self.pyautogui.keyDown(key)
|
|
334
|
-
|
|
335
|
-
result = ContentResult(output=f"Keys down: {', '.join(keys)}")
|
|
336
|
-
|
|
337
|
-
if take_screenshot:
|
|
338
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
339
|
-
screenshot = await self.screenshot()
|
|
340
|
-
if screenshot:
|
|
341
|
-
result = ContentResult(
|
|
342
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
343
|
-
)
|
|
344
|
-
|
|
345
|
-
return result
|
|
346
|
-
except Exception as e:
|
|
347
|
-
return ContentResult(error=str(e))
|
|
348
|
-
|
|
349
|
-
async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
350
|
-
"""Release held keys."""
|
|
351
|
-
try:
|
|
352
|
-
# Map CLA keys to PyAutoGUI keys
|
|
353
|
-
mapped_keys = self._map_keys(keys)
|
|
354
|
-
for key in reversed(mapped_keys): # Release in reverse order
|
|
355
|
-
self.pyautogui.keyUp(key)
|
|
356
|
-
|
|
357
|
-
result = ContentResult(output=f"Keys up: {', '.join(keys)}")
|
|
358
|
-
|
|
359
|
-
if take_screenshot:
|
|
360
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
361
|
-
screenshot = await self.screenshot()
|
|
362
|
-
if screenshot:
|
|
363
|
-
result = ContentResult(
|
|
364
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
365
|
-
)
|
|
366
|
-
|
|
367
|
-
return result
|
|
368
|
-
except Exception as e:
|
|
369
|
-
return ContentResult(error=str(e))
|
|
370
|
-
|
|
371
|
-
async def scroll(
|
|
372
|
-
self,
|
|
373
|
-
x: int | None = None,
|
|
374
|
-
y: int | None = None,
|
|
375
|
-
scroll_x: int | None = None,
|
|
376
|
-
scroll_y: int | None = None,
|
|
377
|
-
hold_keys: list[str] | None = None,
|
|
378
|
-
take_screenshot: bool = True,
|
|
379
|
-
) -> ContentResult:
|
|
380
|
-
"""Scroll at specified position."""
|
|
381
|
-
try:
|
|
382
|
-
# Move to position if specified
|
|
383
|
-
if x is not None and y is not None:
|
|
384
|
-
self.pyautogui.moveTo(x, y)
|
|
385
|
-
|
|
386
|
-
# Hold keys if specified
|
|
387
|
-
self._hold_keys_context(hold_keys)
|
|
388
|
-
|
|
389
|
-
try:
|
|
390
|
-
msg_parts = []
|
|
391
|
-
|
|
392
|
-
# Perform vertical scroll
|
|
393
|
-
if scroll_y and scroll_y != 0:
|
|
394
|
-
# PyAutoGUI: positive = up, negative = down (opposite of our convention)
|
|
395
|
-
self.pyautogui.scroll(-scroll_y)
|
|
396
|
-
msg_parts.append(f"vertically by {scroll_y}")
|
|
397
|
-
|
|
398
|
-
# Perform horizontal scroll (if supported)
|
|
399
|
-
if scroll_x and scroll_x != 0:
|
|
400
|
-
# PyAutoGUI horizontal scroll might not work on all platforms
|
|
401
|
-
try:
|
|
402
|
-
self.pyautogui.hscroll(scroll_x)
|
|
403
|
-
msg_parts.append(f"horizontally by {scroll_x}")
|
|
404
|
-
except AttributeError:
|
|
405
|
-
# hscroll not available
|
|
406
|
-
msg_parts.append(f"horizontally by {scroll_x} (not supported)")
|
|
407
|
-
|
|
408
|
-
if not msg_parts:
|
|
409
|
-
return ContentResult(output="No scroll amount specified")
|
|
410
|
-
|
|
411
|
-
msg = "Scrolled " + " and ".join(msg_parts)
|
|
412
|
-
if x is not None and y is not None:
|
|
413
|
-
msg += f" at ({x}, {y})"
|
|
414
|
-
if hold_keys:
|
|
415
|
-
msg += f" while holding {hold_keys}"
|
|
416
|
-
finally:
|
|
417
|
-
# Release held keys
|
|
418
|
-
self._release_keys(hold_keys)
|
|
419
|
-
|
|
420
|
-
result = ContentResult(output=msg)
|
|
421
|
-
|
|
422
|
-
if take_screenshot:
|
|
423
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
424
|
-
screenshot = await self.screenshot()
|
|
425
|
-
if screenshot:
|
|
426
|
-
result = ContentResult(
|
|
427
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
428
|
-
)
|
|
429
|
-
|
|
430
|
-
return result
|
|
431
|
-
except Exception as e:
|
|
432
|
-
return ContentResult(error=str(e))
|
|
433
|
-
|
|
434
|
-
async def move(
|
|
435
|
-
self,
|
|
436
|
-
x: int | None = None,
|
|
437
|
-
y: int | None = None,
|
|
438
|
-
offset_x: int | None = None,
|
|
439
|
-
offset_y: int | None = None,
|
|
440
|
-
take_screenshot: bool = True,
|
|
441
|
-
) -> ContentResult:
|
|
442
|
-
"""Move mouse cursor."""
|
|
443
|
-
try:
|
|
444
|
-
if x is not None and y is not None:
|
|
445
|
-
# Absolute move
|
|
446
|
-
self.pyautogui.moveTo(x, y, duration=0.1)
|
|
447
|
-
result = ContentResult(output=f"Moved mouse to ({x}, {y})")
|
|
448
|
-
elif offset_x is not None or offset_y is not None:
|
|
449
|
-
# Relative move
|
|
450
|
-
offset_x = offset_x or 0
|
|
451
|
-
offset_y = offset_y or 0
|
|
452
|
-
self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
|
|
453
|
-
result = ContentResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
|
|
454
|
-
else:
|
|
455
|
-
return ContentResult(output="No move coordinates specified")
|
|
456
|
-
|
|
457
|
-
if take_screenshot:
|
|
458
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
459
|
-
screenshot = await self.screenshot()
|
|
460
|
-
if screenshot:
|
|
461
|
-
result = ContentResult(
|
|
462
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
463
|
-
)
|
|
464
|
-
|
|
465
|
-
return result
|
|
466
|
-
except Exception as e:
|
|
467
|
-
return ContentResult(error=str(e))
|
|
468
|
-
|
|
469
|
-
async def drag(
|
|
470
|
-
self,
|
|
471
|
-
path: list[tuple[int, int]],
|
|
472
|
-
pattern: list[int] | None = None,
|
|
473
|
-
hold_keys: list[str] | None = None,
|
|
474
|
-
take_screenshot: bool = True,
|
|
475
|
-
) -> ContentResult:
|
|
476
|
-
"""Drag along a path."""
|
|
477
|
-
if len(path) < 2:
|
|
478
|
-
return ContentResult(error="Drag path must have at least 2 points")
|
|
479
|
-
|
|
480
|
-
try:
|
|
481
|
-
# Hold keys if specified
|
|
482
|
-
self._hold_keys_context(hold_keys)
|
|
483
|
-
|
|
484
|
-
try:
|
|
485
|
-
# Move to start
|
|
486
|
-
start_x, start_y = path[0]
|
|
487
|
-
self.pyautogui.moveTo(start_x, start_y)
|
|
488
|
-
|
|
489
|
-
# Handle multi-point drag
|
|
490
|
-
if len(path) == 2:
|
|
491
|
-
# Simple drag
|
|
492
|
-
end_x, end_y = path[1]
|
|
493
|
-
self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
|
|
494
|
-
result = ContentResult(
|
|
495
|
-
output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
|
|
496
|
-
)
|
|
497
|
-
else:
|
|
498
|
-
# Multi-point drag
|
|
499
|
-
self.pyautogui.mouseDown(button="left")
|
|
500
|
-
for i, (x, y) in enumerate(path[1:], 1):
|
|
501
|
-
duration = 0.1
|
|
502
|
-
if pattern and i - 1 < len(pattern):
|
|
503
|
-
duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
|
|
504
|
-
self.pyautogui.moveTo(x, y, duration=duration)
|
|
505
|
-
self.pyautogui.mouseUp(button="left")
|
|
506
|
-
|
|
507
|
-
result = ContentResult(output=f"Dragged along {len(path)} points")
|
|
508
|
-
|
|
509
|
-
if hold_keys:
|
|
510
|
-
result = ContentResult(output=f"{result.output} while holding {hold_keys}")
|
|
511
|
-
finally:
|
|
512
|
-
# Release held keys
|
|
513
|
-
self._release_keys(hold_keys)
|
|
514
|
-
|
|
515
|
-
if take_screenshot:
|
|
516
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
517
|
-
screenshot = await self.screenshot()
|
|
518
|
-
if screenshot:
|
|
519
|
-
result = ContentResult(
|
|
520
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
return result
|
|
524
|
-
except Exception as e:
|
|
525
|
-
return ContentResult(error=str(e))
|
|
526
|
-
|
|
527
|
-
async def mouse_down(
|
|
528
|
-
self,
|
|
529
|
-
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
530
|
-
take_screenshot: bool = True,
|
|
531
|
-
) -> ContentResult:
|
|
532
|
-
"""Press and hold a mouse button."""
|
|
533
|
-
try:
|
|
534
|
-
# Map button names (PyAutoGUI doesn't support back/forward)
|
|
535
|
-
button_map = {
|
|
536
|
-
"left": "left",
|
|
537
|
-
"right": "right",
|
|
538
|
-
"middle": "middle",
|
|
539
|
-
"back": "left",
|
|
540
|
-
"forward": "right",
|
|
541
|
-
} # Fallback for unsupported
|
|
542
|
-
button_name = button_map.get(button, "left")
|
|
543
|
-
|
|
544
|
-
self.pyautogui.mouseDown(button=button_name)
|
|
545
|
-
result = ContentResult(output=f"Mouse down: {button} button")
|
|
546
|
-
|
|
547
|
-
if take_screenshot:
|
|
548
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
549
|
-
screenshot = await self.screenshot()
|
|
550
|
-
if screenshot:
|
|
551
|
-
result = ContentResult(
|
|
552
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
553
|
-
)
|
|
554
|
-
|
|
555
|
-
return result
|
|
556
|
-
except Exception as e:
|
|
557
|
-
return ContentResult(error=str(e))
|
|
558
|
-
|
|
559
|
-
async def mouse_up(
|
|
560
|
-
self,
|
|
561
|
-
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
562
|
-
take_screenshot: bool = True,
|
|
563
|
-
) -> ContentResult:
|
|
564
|
-
"""Release a mouse button."""
|
|
565
|
-
try:
|
|
566
|
-
# Map button names (PyAutoGUI doesn't support back/forward)
|
|
567
|
-
button_map = {
|
|
568
|
-
"left": "left",
|
|
569
|
-
"right": "right",
|
|
570
|
-
"middle": "middle",
|
|
571
|
-
"back": "left",
|
|
572
|
-
"forward": "right",
|
|
573
|
-
} # Fallback for unsupported
|
|
574
|
-
button_name = button_map.get(button, "left")
|
|
575
|
-
|
|
576
|
-
self.pyautogui.mouseUp(button=button_name)
|
|
577
|
-
result = ContentResult(output=f"Mouse up: {button} button")
|
|
578
|
-
|
|
579
|
-
if take_screenshot:
|
|
580
|
-
await asyncio.sleep(self._screenshot_delay)
|
|
581
|
-
screenshot = await self.screenshot()
|
|
582
|
-
if screenshot:
|
|
583
|
-
result = ContentResult(
|
|
584
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
585
|
-
)
|
|
586
|
-
|
|
587
|
-
return result
|
|
588
|
-
except Exception as e:
|
|
589
|
-
return ContentResult(error=str(e))
|
|
590
|
-
|
|
591
|
-
async def hold_key(
|
|
592
|
-
self, key: str, duration: float, take_screenshot: bool = True
|
|
593
|
-
) -> ContentResult:
|
|
594
|
-
"""Hold a key for a specified duration."""
|
|
595
|
-
try:
|
|
596
|
-
# Map CLA key to PyAutoGUI key
|
|
597
|
-
mapped_key = self._map_key(key)
|
|
598
|
-
self.pyautogui.keyDown(mapped_key)
|
|
599
|
-
await asyncio.sleep(duration)
|
|
600
|
-
self.pyautogui.keyUp(mapped_key)
|
|
601
|
-
|
|
602
|
-
result = ContentResult(output=f"Held key '{key}' for {duration} seconds")
|
|
603
|
-
|
|
604
|
-
if take_screenshot:
|
|
605
|
-
screenshot = await self.screenshot()
|
|
606
|
-
if screenshot:
|
|
607
|
-
result = ContentResult(
|
|
608
|
-
output=result.output, error=result.error, base64_image=screenshot
|
|
609
|
-
)
|
|
610
|
-
|
|
611
|
-
return result
|
|
612
|
-
except Exception as e:
|
|
613
|
-
return ContentResult(error=str(e))
|
|
614
|
-
|
|
615
|
-
async def position(self) -> ContentResult:
|
|
616
|
-
"""Get current cursor position."""
|
|
617
|
-
try:
|
|
618
|
-
x, y = self.pyautogui.position()
|
|
619
|
-
return ContentResult(output=f"Mouse position: ({x}, {y})")
|
|
620
|
-
except Exception as e:
|
|
621
|
-
return ContentResult(error=str(e))
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import base64
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
from typing import Any, Literal
|
|
9
|
+
|
|
10
|
+
from hud.tools.types import ContentResult
|
|
11
|
+
|
|
12
|
+
from .base import BaseExecutor
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Lazy loading for pyautogui
|
|
17
|
+
_pyautogui = None
|
|
18
|
+
_pyautogui_available = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_pyautogui() -> Any | None:
|
|
22
|
+
"""Lazily import and return pyautogui module."""
|
|
23
|
+
global _pyautogui, _pyautogui_available
|
|
24
|
+
|
|
25
|
+
if _pyautogui_available is False:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
if _pyautogui is None:
|
|
29
|
+
# Set display if not already set
|
|
30
|
+
if "DISPLAY" not in os.environ:
|
|
31
|
+
try:
|
|
32
|
+
from hud.tools.computer import computer_settings
|
|
33
|
+
|
|
34
|
+
os.environ["DISPLAY"] = str(computer_settings.DISPLAY_NUM)
|
|
35
|
+
except (ImportError, AttributeError):
|
|
36
|
+
os.environ["DISPLAY"] = ":0"
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
import pyautogui # type: ignore[import-not-found]
|
|
40
|
+
|
|
41
|
+
_pyautogui = pyautogui
|
|
42
|
+
_pyautogui_available = True
|
|
43
|
+
|
|
44
|
+
# Configure PyAutoGUI settings
|
|
45
|
+
_pyautogui.FAILSAFE = False # Disable fail-safe feature
|
|
46
|
+
_pyautogui.PAUSE = 0.1 # Small pause between actions
|
|
47
|
+
except ImportError:
|
|
48
|
+
_pyautogui_available = False
|
|
49
|
+
logger.warning("PyAutoGUI is not available")
|
|
50
|
+
return None
|
|
51
|
+
except Exception as e:
|
|
52
|
+
_pyautogui_available = False
|
|
53
|
+
logger.warning("Failed to initialize PyAutoGUI: %s", e)
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
return _pyautogui
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# Map CLA standard keys to PyAutoGUI keys (only where they differ)
|
|
60
|
+
CLA_TO_PYAUTOGUI = {
|
|
61
|
+
# Most keys are the same in PyAutoGUI, only map the differences
|
|
62
|
+
"escape": "esc",
|
|
63
|
+
"enter": "return",
|
|
64
|
+
"pageup": "pgup",
|
|
65
|
+
"pagedown": "pgdn",
|
|
66
|
+
"printscreen": "prtscr",
|
|
67
|
+
"prtsc": "prtscr",
|
|
68
|
+
"super": "win",
|
|
69
|
+
"command": "cmd",
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class PyAutoGUIExecutor(BaseExecutor):
|
|
74
|
+
"""
|
|
75
|
+
Cross-platform executor using PyAutoGUI.
|
|
76
|
+
Works on Windows, macOS, and Linux.
|
|
77
|
+
|
|
78
|
+
This executor should only be instantiated when PyAutoGUI is available and functional.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(self, display_num: int | None = None) -> None:
|
|
82
|
+
"""
|
|
83
|
+
Initialize the executor.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
display_num: X display number (used only on Linux, ignored on Windows/macOS)
|
|
87
|
+
"""
|
|
88
|
+
super().__init__(display_num)
|
|
89
|
+
self._pyautogui = None
|
|
90
|
+
logger.info("PyAutoGUIExecutor initialized")
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def pyautogui(self) -> Any:
|
|
94
|
+
"""Get the pyautogui module, importing it lazily if needed."""
|
|
95
|
+
if self._pyautogui is None:
|
|
96
|
+
self._pyautogui = _get_pyautogui()
|
|
97
|
+
if self._pyautogui is None:
|
|
98
|
+
raise RuntimeError("PyAutoGUI is not available")
|
|
99
|
+
return self._pyautogui
|
|
100
|
+
|
|
101
|
+
def _map_key(self, key: str) -> str:
|
|
102
|
+
"""Map CLA standard key to PyAutoGUI key."""
|
|
103
|
+
return CLA_TO_PYAUTOGUI.get(key.lower(), key.lower())
|
|
104
|
+
|
|
105
|
+
def _map_keys(self, keys: list[str]) -> list[str]:
|
|
106
|
+
"""Map CLA standard keys to PyAutoGUI keys."""
|
|
107
|
+
mapped_keys = []
|
|
108
|
+
for key in keys:
|
|
109
|
+
# Handle key combinations like "ctrl+a"
|
|
110
|
+
if "+" in key:
|
|
111
|
+
parts = key.split("+")
|
|
112
|
+
mapped_parts = [self._map_key(part) for part in parts]
|
|
113
|
+
mapped_keys.append("+".join(mapped_parts))
|
|
114
|
+
else:
|
|
115
|
+
mapped_keys.append(self._map_key(key))
|
|
116
|
+
return mapped_keys
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def is_available(cls) -> bool:
|
|
120
|
+
"""
|
|
121
|
+
Check if PyAutoGUI is available and functional.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
True if PyAutoGUI is available and functional, False otherwise
|
|
125
|
+
"""
|
|
126
|
+
pyautogui = _get_pyautogui()
|
|
127
|
+
if not pyautogui:
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
# Try to get screen size as a simple test
|
|
132
|
+
pyautogui.size()
|
|
133
|
+
return True
|
|
134
|
+
except Exception:
|
|
135
|
+
return False
|
|
136
|
+
|
|
137
|
+
async def screenshot(self) -> str | None:
|
|
138
|
+
"""
|
|
139
|
+
Take a screenshot and return base64 encoded image.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Base64 encoded PNG image or None if failed
|
|
143
|
+
"""
|
|
144
|
+
try:
|
|
145
|
+
# Take screenshot using PyAutoGUI
|
|
146
|
+
screenshot = self.pyautogui.screenshot()
|
|
147
|
+
|
|
148
|
+
# Convert to base64
|
|
149
|
+
buffer = BytesIO()
|
|
150
|
+
screenshot.save(buffer, format="PNG")
|
|
151
|
+
image_data = buffer.getvalue()
|
|
152
|
+
return base64.b64encode(image_data).decode()
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error("Failed to take screenshot: %s", e)
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
# ===== Helper Methods =====
|
|
158
|
+
|
|
159
|
+
def _hold_keys_context(self, keys: list[str] | None) -> None:
|
|
160
|
+
"""
|
|
161
|
+
Press and hold keys.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
keys: List of keys to hold
|
|
165
|
+
"""
|
|
166
|
+
if keys:
|
|
167
|
+
for key in keys:
|
|
168
|
+
self.pyautogui.keyDown(key)
|
|
169
|
+
|
|
170
|
+
def _release_keys(self, keys: list[str] | None) -> None:
|
|
171
|
+
"""Release held keys."""
|
|
172
|
+
if keys:
|
|
173
|
+
for key in reversed(keys): # Release in reverse order
|
|
174
|
+
self.pyautogui.keyUp(key)
|
|
175
|
+
|
|
176
|
+
# ===== CLA Action Implementations =====
|
|
177
|
+
|
|
178
|
+
async def click(
|
|
179
|
+
self,
|
|
180
|
+
x: int | None = None,
|
|
181
|
+
y: int | None = None,
|
|
182
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
183
|
+
pattern: list[int] | None = None,
|
|
184
|
+
hold_keys: list[str] | None = None,
|
|
185
|
+
take_screenshot: bool = True,
|
|
186
|
+
) -> ContentResult:
|
|
187
|
+
"""Click at specified coordinates or current position."""
|
|
188
|
+
try:
|
|
189
|
+
# Map button names (PyAutoGUI doesn't support back/forward)
|
|
190
|
+
button_map = {
|
|
191
|
+
"left": "left",
|
|
192
|
+
"right": "right",
|
|
193
|
+
"middle": "middle",
|
|
194
|
+
"back": "left",
|
|
195
|
+
"forward": "right",
|
|
196
|
+
} # Fallback for unsupported
|
|
197
|
+
button_name = button_map.get(button, "left")
|
|
198
|
+
|
|
199
|
+
# Hold keys if specified
|
|
200
|
+
self._hold_keys_context(hold_keys)
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
# Handle multi-clicks based on pattern
|
|
204
|
+
if pattern:
|
|
205
|
+
clicks = len(pattern) + 1
|
|
206
|
+
interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
|
|
207
|
+
|
|
208
|
+
if x is not None and y is not None:
|
|
209
|
+
self.pyautogui.click(
|
|
210
|
+
x=x, y=y, clicks=clicks, interval=interval, button=button_name
|
|
211
|
+
)
|
|
212
|
+
else:
|
|
213
|
+
self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
|
|
214
|
+
else:
|
|
215
|
+
# Single click
|
|
216
|
+
if x is not None and y is not None:
|
|
217
|
+
self.pyautogui.click(x=x, y=y, button=button_name)
|
|
218
|
+
else:
|
|
219
|
+
self.pyautogui.click(button=button_name)
|
|
220
|
+
finally:
|
|
221
|
+
# Release held keys
|
|
222
|
+
self._release_keys(hold_keys)
|
|
223
|
+
|
|
224
|
+
result = ContentResult(
|
|
225
|
+
output=f"Clicked {button} button at ({x}, {y})" if x else f"Clicked {button} button"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if take_screenshot:
|
|
229
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
230
|
+
screenshot = await self.screenshot()
|
|
231
|
+
if screenshot:
|
|
232
|
+
result = ContentResult(
|
|
233
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return result
|
|
237
|
+
except Exception as e:
|
|
238
|
+
return ContentResult(error=str(e))
|
|
239
|
+
|
|
240
|
+
async def write(
|
|
241
|
+
self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
|
|
242
|
+
) -> ContentResult:
|
|
243
|
+
"""Type text with specified delay between keystrokes."""
|
|
244
|
+
try:
|
|
245
|
+
# Convert delay from milliseconds to seconds for PyAutoGUI
|
|
246
|
+
interval = delay / 1000.0
|
|
247
|
+
self.pyautogui.typewrite(text, interval=interval)
|
|
248
|
+
|
|
249
|
+
if enter_after:
|
|
250
|
+
self.pyautogui.press("enter")
|
|
251
|
+
|
|
252
|
+
result = ContentResult(
|
|
253
|
+
output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
if take_screenshot:
|
|
257
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
258
|
+
screenshot = await self.screenshot()
|
|
259
|
+
if screenshot:
|
|
260
|
+
result = ContentResult(
|
|
261
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
return result
|
|
265
|
+
except Exception as e:
|
|
266
|
+
return ContentResult(error=str(e))
|
|
267
|
+
|
|
268
|
+
async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
|
|
269
|
+
"""Press a key or key combination."""
|
|
270
|
+
try:
|
|
271
|
+
# Handle key combinations (e.g., "ctrl+c")
|
|
272
|
+
if "+" in key_sequence:
|
|
273
|
+
keys = key_sequence.split("+")
|
|
274
|
+
self.pyautogui.hotkey(*keys)
|
|
275
|
+
result = ContentResult(output=f"Pressed hotkey: {key_sequence}")
|
|
276
|
+
else:
|
|
277
|
+
# Map common key names from xdotool to PyAutoGUI
|
|
278
|
+
key = key_sequence.lower()
|
|
279
|
+
self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
|
|
280
|
+
result = ContentResult(output=f"Pressed key: {key_sequence}")
|
|
281
|
+
|
|
282
|
+
if take_screenshot:
|
|
283
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
284
|
+
screenshot = await self.screenshot()
|
|
285
|
+
if screenshot:
|
|
286
|
+
result = ContentResult(
|
|
287
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
return result
|
|
291
|
+
except Exception as e:
|
|
292
|
+
return ContentResult(error=str(e))
|
|
293
|
+
|
|
294
|
+
async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
295
|
+
"""Press a key combination (hotkey)."""
|
|
296
|
+
try:
|
|
297
|
+
# Map CLA keys to PyAutoGUI keys
|
|
298
|
+
mapped_keys = self._map_keys(keys)
|
|
299
|
+
|
|
300
|
+
# Handle single key or combination
|
|
301
|
+
if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
|
|
302
|
+
self.pyautogui.press(mapped_keys[0])
|
|
303
|
+
result = ContentResult(output=f"Pressed key: {keys[0]}")
|
|
304
|
+
else:
|
|
305
|
+
# For combinations, use hotkey
|
|
306
|
+
hotkey_parts = []
|
|
307
|
+
for key in mapped_keys:
|
|
308
|
+
if "+" in key:
|
|
309
|
+
hotkey_parts.extend(key.split("+"))
|
|
310
|
+
else:
|
|
311
|
+
hotkey_parts.append(key)
|
|
312
|
+
self.pyautogui.hotkey(*hotkey_parts)
|
|
313
|
+
result = ContentResult(output=f"Pressed hotkey: {'+'.join(keys)}")
|
|
314
|
+
|
|
315
|
+
if take_screenshot:
|
|
316
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
317
|
+
screenshot = await self.screenshot()
|
|
318
|
+
if screenshot:
|
|
319
|
+
result = ContentResult(
|
|
320
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
return result
|
|
324
|
+
except Exception as e:
|
|
325
|
+
return ContentResult(error=str(e))
|
|
326
|
+
|
|
327
|
+
async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
328
|
+
"""Press and hold keys."""
|
|
329
|
+
try:
|
|
330
|
+
# Map CLA keys to PyAutoGUI keys
|
|
331
|
+
mapped_keys = self._map_keys(keys)
|
|
332
|
+
for key in mapped_keys:
|
|
333
|
+
self.pyautogui.keyDown(key)
|
|
334
|
+
|
|
335
|
+
result = ContentResult(output=f"Keys down: {', '.join(keys)}")
|
|
336
|
+
|
|
337
|
+
if take_screenshot:
|
|
338
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
339
|
+
screenshot = await self.screenshot()
|
|
340
|
+
if screenshot:
|
|
341
|
+
result = ContentResult(
|
|
342
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
return result
|
|
346
|
+
except Exception as e:
|
|
347
|
+
return ContentResult(error=str(e))
|
|
348
|
+
|
|
349
|
+
async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
|
|
350
|
+
"""Release held keys."""
|
|
351
|
+
try:
|
|
352
|
+
# Map CLA keys to PyAutoGUI keys
|
|
353
|
+
mapped_keys = self._map_keys(keys)
|
|
354
|
+
for key in reversed(mapped_keys): # Release in reverse order
|
|
355
|
+
self.pyautogui.keyUp(key)
|
|
356
|
+
|
|
357
|
+
result = ContentResult(output=f"Keys up: {', '.join(keys)}")
|
|
358
|
+
|
|
359
|
+
if take_screenshot:
|
|
360
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
361
|
+
screenshot = await self.screenshot()
|
|
362
|
+
if screenshot:
|
|
363
|
+
result = ContentResult(
|
|
364
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
return result
|
|
368
|
+
except Exception as e:
|
|
369
|
+
return ContentResult(error=str(e))
|
|
370
|
+
|
|
371
|
+
async def scroll(
|
|
372
|
+
self,
|
|
373
|
+
x: int | None = None,
|
|
374
|
+
y: int | None = None,
|
|
375
|
+
scroll_x: int | None = None,
|
|
376
|
+
scroll_y: int | None = None,
|
|
377
|
+
hold_keys: list[str] | None = None,
|
|
378
|
+
take_screenshot: bool = True,
|
|
379
|
+
) -> ContentResult:
|
|
380
|
+
"""Scroll at specified position."""
|
|
381
|
+
try:
|
|
382
|
+
# Move to position if specified
|
|
383
|
+
if x is not None and y is not None:
|
|
384
|
+
self.pyautogui.moveTo(x, y)
|
|
385
|
+
|
|
386
|
+
# Hold keys if specified
|
|
387
|
+
self._hold_keys_context(hold_keys)
|
|
388
|
+
|
|
389
|
+
try:
|
|
390
|
+
msg_parts = []
|
|
391
|
+
|
|
392
|
+
# Perform vertical scroll
|
|
393
|
+
if scroll_y and scroll_y != 0:
|
|
394
|
+
# PyAutoGUI: positive = up, negative = down (opposite of our convention)
|
|
395
|
+
self.pyautogui.scroll(-scroll_y)
|
|
396
|
+
msg_parts.append(f"vertically by {scroll_y}")
|
|
397
|
+
|
|
398
|
+
# Perform horizontal scroll (if supported)
|
|
399
|
+
if scroll_x and scroll_x != 0:
|
|
400
|
+
# PyAutoGUI horizontal scroll might not work on all platforms
|
|
401
|
+
try:
|
|
402
|
+
self.pyautogui.hscroll(scroll_x)
|
|
403
|
+
msg_parts.append(f"horizontally by {scroll_x}")
|
|
404
|
+
except AttributeError:
|
|
405
|
+
# hscroll not available
|
|
406
|
+
msg_parts.append(f"horizontally by {scroll_x} (not supported)")
|
|
407
|
+
|
|
408
|
+
if not msg_parts:
|
|
409
|
+
return ContentResult(output="No scroll amount specified")
|
|
410
|
+
|
|
411
|
+
msg = "Scrolled " + " and ".join(msg_parts)
|
|
412
|
+
if x is not None and y is not None:
|
|
413
|
+
msg += f" at ({x}, {y})"
|
|
414
|
+
if hold_keys:
|
|
415
|
+
msg += f" while holding {hold_keys}"
|
|
416
|
+
finally:
|
|
417
|
+
# Release held keys
|
|
418
|
+
self._release_keys(hold_keys)
|
|
419
|
+
|
|
420
|
+
result = ContentResult(output=msg)
|
|
421
|
+
|
|
422
|
+
if take_screenshot:
|
|
423
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
424
|
+
screenshot = await self.screenshot()
|
|
425
|
+
if screenshot:
|
|
426
|
+
result = ContentResult(
|
|
427
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
return result
|
|
431
|
+
except Exception as e:
|
|
432
|
+
return ContentResult(error=str(e))
|
|
433
|
+
|
|
434
|
+
async def move(
|
|
435
|
+
self,
|
|
436
|
+
x: int | None = None,
|
|
437
|
+
y: int | None = None,
|
|
438
|
+
offset_x: int | None = None,
|
|
439
|
+
offset_y: int | None = None,
|
|
440
|
+
take_screenshot: bool = True,
|
|
441
|
+
) -> ContentResult:
|
|
442
|
+
"""Move mouse cursor."""
|
|
443
|
+
try:
|
|
444
|
+
if x is not None and y is not None:
|
|
445
|
+
# Absolute move
|
|
446
|
+
self.pyautogui.moveTo(x, y, duration=0.1)
|
|
447
|
+
result = ContentResult(output=f"Moved mouse to ({x}, {y})")
|
|
448
|
+
elif offset_x is not None or offset_y is not None:
|
|
449
|
+
# Relative move
|
|
450
|
+
offset_x = offset_x or 0
|
|
451
|
+
offset_y = offset_y or 0
|
|
452
|
+
self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
|
|
453
|
+
result = ContentResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
|
|
454
|
+
else:
|
|
455
|
+
return ContentResult(output="No move coordinates specified")
|
|
456
|
+
|
|
457
|
+
if take_screenshot:
|
|
458
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
459
|
+
screenshot = await self.screenshot()
|
|
460
|
+
if screenshot:
|
|
461
|
+
result = ContentResult(
|
|
462
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
return result
|
|
466
|
+
except Exception as e:
|
|
467
|
+
return ContentResult(error=str(e))
|
|
468
|
+
|
|
469
|
+
async def drag(
|
|
470
|
+
self,
|
|
471
|
+
path: list[tuple[int, int]],
|
|
472
|
+
pattern: list[int] | None = None,
|
|
473
|
+
hold_keys: list[str] | None = None,
|
|
474
|
+
take_screenshot: bool = True,
|
|
475
|
+
) -> ContentResult:
|
|
476
|
+
"""Drag along a path."""
|
|
477
|
+
if len(path) < 2:
|
|
478
|
+
return ContentResult(error="Drag path must have at least 2 points")
|
|
479
|
+
|
|
480
|
+
try:
|
|
481
|
+
# Hold keys if specified
|
|
482
|
+
self._hold_keys_context(hold_keys)
|
|
483
|
+
|
|
484
|
+
try:
|
|
485
|
+
# Move to start
|
|
486
|
+
start_x, start_y = path[0]
|
|
487
|
+
self.pyautogui.moveTo(start_x, start_y)
|
|
488
|
+
|
|
489
|
+
# Handle multi-point drag
|
|
490
|
+
if len(path) == 2:
|
|
491
|
+
# Simple drag
|
|
492
|
+
end_x, end_y = path[1]
|
|
493
|
+
self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
|
|
494
|
+
result = ContentResult(
|
|
495
|
+
output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
|
|
496
|
+
)
|
|
497
|
+
else:
|
|
498
|
+
# Multi-point drag
|
|
499
|
+
self.pyautogui.mouseDown(button="left")
|
|
500
|
+
for i, (x, y) in enumerate(path[1:], 1):
|
|
501
|
+
duration = 0.1
|
|
502
|
+
if pattern and i - 1 < len(pattern):
|
|
503
|
+
duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
|
|
504
|
+
self.pyautogui.moveTo(x, y, duration=duration)
|
|
505
|
+
self.pyautogui.mouseUp(button="left")
|
|
506
|
+
|
|
507
|
+
result = ContentResult(output=f"Dragged along {len(path)} points")
|
|
508
|
+
|
|
509
|
+
if hold_keys:
|
|
510
|
+
result = ContentResult(output=f"{result.output} while holding {hold_keys}")
|
|
511
|
+
finally:
|
|
512
|
+
# Release held keys
|
|
513
|
+
self._release_keys(hold_keys)
|
|
514
|
+
|
|
515
|
+
if take_screenshot:
|
|
516
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
517
|
+
screenshot = await self.screenshot()
|
|
518
|
+
if screenshot:
|
|
519
|
+
result = ContentResult(
|
|
520
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
return result
|
|
524
|
+
except Exception as e:
|
|
525
|
+
return ContentResult(error=str(e))
|
|
526
|
+
|
|
527
|
+
async def mouse_down(
|
|
528
|
+
self,
|
|
529
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
530
|
+
take_screenshot: bool = True,
|
|
531
|
+
) -> ContentResult:
|
|
532
|
+
"""Press and hold a mouse button."""
|
|
533
|
+
try:
|
|
534
|
+
# Map button names (PyAutoGUI doesn't support back/forward)
|
|
535
|
+
button_map = {
|
|
536
|
+
"left": "left",
|
|
537
|
+
"right": "right",
|
|
538
|
+
"middle": "middle",
|
|
539
|
+
"back": "left",
|
|
540
|
+
"forward": "right",
|
|
541
|
+
} # Fallback for unsupported
|
|
542
|
+
button_name = button_map.get(button, "left")
|
|
543
|
+
|
|
544
|
+
self.pyautogui.mouseDown(button=button_name)
|
|
545
|
+
result = ContentResult(output=f"Mouse down: {button} button")
|
|
546
|
+
|
|
547
|
+
if take_screenshot:
|
|
548
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
549
|
+
screenshot = await self.screenshot()
|
|
550
|
+
if screenshot:
|
|
551
|
+
result = ContentResult(
|
|
552
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
return result
|
|
556
|
+
except Exception as e:
|
|
557
|
+
return ContentResult(error=str(e))
|
|
558
|
+
|
|
559
|
+
async def mouse_up(
|
|
560
|
+
self,
|
|
561
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
562
|
+
take_screenshot: bool = True,
|
|
563
|
+
) -> ContentResult:
|
|
564
|
+
"""Release a mouse button."""
|
|
565
|
+
try:
|
|
566
|
+
# Map button names (PyAutoGUI doesn't support back/forward)
|
|
567
|
+
button_map = {
|
|
568
|
+
"left": "left",
|
|
569
|
+
"right": "right",
|
|
570
|
+
"middle": "middle",
|
|
571
|
+
"back": "left",
|
|
572
|
+
"forward": "right",
|
|
573
|
+
} # Fallback for unsupported
|
|
574
|
+
button_name = button_map.get(button, "left")
|
|
575
|
+
|
|
576
|
+
self.pyautogui.mouseUp(button=button_name)
|
|
577
|
+
result = ContentResult(output=f"Mouse up: {button} button")
|
|
578
|
+
|
|
579
|
+
if take_screenshot:
|
|
580
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
581
|
+
screenshot = await self.screenshot()
|
|
582
|
+
if screenshot:
|
|
583
|
+
result = ContentResult(
|
|
584
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
return result
|
|
588
|
+
except Exception as e:
|
|
589
|
+
return ContentResult(error=str(e))
|
|
590
|
+
|
|
591
|
+
async def hold_key(
|
|
592
|
+
self, key: str, duration: float, take_screenshot: bool = True
|
|
593
|
+
) -> ContentResult:
|
|
594
|
+
"""Hold a key for a specified duration."""
|
|
595
|
+
try:
|
|
596
|
+
# Map CLA key to PyAutoGUI key
|
|
597
|
+
mapped_key = self._map_key(key)
|
|
598
|
+
self.pyautogui.keyDown(mapped_key)
|
|
599
|
+
await asyncio.sleep(duration)
|
|
600
|
+
self.pyautogui.keyUp(mapped_key)
|
|
601
|
+
|
|
602
|
+
result = ContentResult(output=f"Held key '{key}' for {duration} seconds")
|
|
603
|
+
|
|
604
|
+
if take_screenshot:
|
|
605
|
+
screenshot = await self.screenshot()
|
|
606
|
+
if screenshot:
|
|
607
|
+
result = ContentResult(
|
|
608
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
return result
|
|
612
|
+
except Exception as e:
|
|
613
|
+
return ContentResult(error=str(e))
|
|
614
|
+
|
|
615
|
+
async def position(self) -> ContentResult:
|
|
616
|
+
"""Get current cursor position."""
|
|
617
|
+
try:
|
|
618
|
+
x, y = self.pyautogui.position()
|
|
619
|
+
return ContentResult(output=f"Mouse position: ({x}, {y})")
|
|
620
|
+
except Exception as e:
|
|
621
|
+
return ContentResult(error=str(e))
|