hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -414
  87. hud/tools/computer/hud.py +376 -328
  88. hud/tools/computer/openai.py +295 -286
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.4.dist-info/METADATA +0 -284
  190. hud_python-0.3.4.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,619 +1,621 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import base64
5
- import logging
6
- import os
7
- from io import BytesIO
8
- from typing import Any, Literal
9
-
10
- from hud.tools.base import ToolResult
11
-
12
- from .base import BaseExecutor
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- # Lazy loading for pyautogui
17
- _pyautogui = None
18
- _pyautogui_available = None
19
-
20
-
21
- def _get_pyautogui() -> Any | None:
22
- """Lazily import and return pyautogui module."""
23
- global _pyautogui, _pyautogui_available
24
-
25
- if _pyautogui_available is False:
26
- return None
27
-
28
- if _pyautogui is None:
29
- # Set display if not already set
30
- if "DISPLAY" not in os.environ:
31
- try:
32
- from hud.settings import settings
33
-
34
- os.environ["DISPLAY"] = settings.display
35
- except (ImportError, AttributeError):
36
- os.environ["DISPLAY"] = ":0"
37
-
38
- try:
39
- import pyautogui
40
-
41
- _pyautogui = pyautogui
42
- _pyautogui_available = True
43
-
44
- # Configure PyAutoGUI settings
45
- _pyautogui.FAILSAFE = False # Disable fail-safe feature
46
- _pyautogui.PAUSE = 0.1 # Small pause between actions
47
- except ImportError:
48
- _pyautogui_available = False
49
- logger.warning("PyAutoGUI is not available")
50
- return None
51
- except Exception as e:
52
- _pyautogui_available = False
53
- logger.warning("Failed to initialize PyAutoGUI: %s", e)
54
- return None
55
-
56
- return _pyautogui
57
-
58
-
59
- # Map CLA standard keys to PyAutoGUI keys (only where they differ)
60
- CLA_TO_PYAUTOGUI = {
61
- # Most keys are the same in PyAutoGUI, only map the differences
62
- "escape": "esc",
63
- "enter": "return",
64
- "pageup": "pgup",
65
- "pagedown": "pgdn",
66
- "printscreen": "prtscr",
67
- "prtsc": "prtscr",
68
- "super": "win",
69
- "command": "cmd",
70
- }
71
-
72
-
73
- class PyAutoGUIExecutor(BaseExecutor):
74
- """
75
- Cross-platform executor using PyAutoGUI.
76
- Works on Windows, macOS, and Linux.
77
-
78
- This executor should only be instantiated when PyAutoGUI is available and functional.
79
- """
80
-
81
- def __init__(self, display_num: int | None = None) -> None:
82
- """
83
- Initialize the executor.
84
-
85
- Args:
86
- display_num: X display number (used only on Linux, ignored on Windows/macOS)
87
- """
88
- super().__init__(display_num)
89
- self._pyautogui = None
90
- logger.info("PyAutoGUIExecutor initialized")
91
-
92
- @property
93
- def pyautogui(self) -> Any:
94
- """Get the pyautogui module, importing it lazily if needed."""
95
- if self._pyautogui is None:
96
- self._pyautogui = _get_pyautogui()
97
- if self._pyautogui is None:
98
- raise RuntimeError("PyAutoGUI is not available")
99
- return self._pyautogui
100
-
101
- def _map_key(self, key: str) -> str:
102
- """Map CLA standard key to PyAutoGUI key."""
103
- return CLA_TO_PYAUTOGUI.get(key.lower(), key.lower())
104
-
105
- def _map_keys(self, keys: list[str]) -> list[str]:
106
- """Map CLA standard keys to PyAutoGUI keys."""
107
- mapped_keys = []
108
- for key in keys:
109
- # Handle key combinations like "ctrl+a"
110
- if "+" in key:
111
- parts = key.split("+")
112
- mapped_parts = [self._map_key(part) for part in parts]
113
- mapped_keys.append("+".join(mapped_parts))
114
- else:
115
- mapped_keys.append(self._map_key(key))
116
- return mapped_keys
117
-
118
- @classmethod
119
- def is_available(cls) -> bool:
120
- """
121
- Check if PyAutoGUI is available and functional.
122
-
123
- Returns:
124
- True if PyAutoGUI is available and functional, False otherwise
125
- """
126
- pyautogui = _get_pyautogui()
127
- if not pyautogui:
128
- return False
129
-
130
- try:
131
- # Try to get screen size as a simple test
132
- pyautogui.size()
133
- return True
134
- except Exception:
135
- return False
136
-
137
- async def screenshot(self) -> str | None:
138
- """
139
- Take a screenshot and return base64 encoded image.
140
-
141
- Returns:
142
- Base64 encoded PNG image or None if failed
143
- """
144
- try:
145
- # Take screenshot using PyAutoGUI
146
- screenshot = self.pyautogui.screenshot()
147
-
148
- # Convert to base64
149
- buffer = BytesIO()
150
- screenshot.save(buffer, format="PNG")
151
- image_data = buffer.getvalue()
152
- return base64.b64encode(image_data).decode()
153
- except Exception as e:
154
- logger.error("Failed to take screenshot: %s", e)
155
- return None
156
-
157
- # ===== Helper Methods =====
158
-
159
- def _hold_keys_context(self, keys: list[str] | None) -> None:
160
- """
161
- Press and hold keys.
162
-
163
- Args:
164
- keys: List of keys to hold
165
- """
166
- if keys:
167
- for key in keys:
168
- self.pyautogui.keyDown(key)
169
-
170
- def _release_keys(self, keys: list[str] | None) -> None:
171
- """Release held keys."""
172
- if keys:
173
- for key in reversed(keys): # Release in reverse order
174
- self.pyautogui.keyUp(key)
175
-
176
- # ===== CLA Action Implementations =====
177
-
178
- async def click(
179
- self,
180
- x: int | None = None,
181
- y: int | None = None,
182
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
183
- pattern: list[int] | None = None,
184
- hold_keys: list[str] | None = None,
185
- take_screenshot: bool = True,
186
- ) -> ToolResult:
187
- """Click at specified coordinates or current position."""
188
- try:
189
- # Map button names (PyAutoGUI doesn't support back/forward)
190
- button_map = {
191
- "left": "left",
192
- "right": "right",
193
- "middle": "middle",
194
- "back": "left",
195
- "forward": "right",
196
- } # Fallback for unsupported
197
- button_name = button_map.get(button, "left")
198
-
199
- # Hold keys if specified
200
- self._hold_keys_context(hold_keys)
201
-
202
- try:
203
- # Handle multi-clicks based on pattern
204
- if pattern:
205
- clicks = len(pattern) + 1
206
- interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
207
-
208
- if x is not None and y is not None:
209
- self.pyautogui.click(
210
- x=x, y=y, clicks=clicks, interval=interval, button=button_name
211
- )
212
- else:
213
- self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
214
- else:
215
- # Single click
216
- if x is not None and y is not None:
217
- self.pyautogui.click(x=x, y=y, button=button_name)
218
- else:
219
- self.pyautogui.click(button=button_name)
220
- finally:
221
- # Release held keys
222
- self._release_keys(hold_keys)
223
-
224
- result = ToolResult(
225
- output=f"Clicked {button} button at ({x}, {y})" if x else f"Clicked {button} button"
226
- )
227
-
228
- if take_screenshot:
229
- await asyncio.sleep(self._screenshot_delay)
230
- screenshot = await self.screenshot()
231
- if screenshot:
232
- result = ToolResult(
233
- output=result.output, error=result.error, base64_image=screenshot
234
- )
235
-
236
- return result
237
- except Exception as e:
238
- return ToolResult(error=str(e))
239
-
240
- async def type(
241
- self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
242
- ) -> ToolResult:
243
- """Type text with specified delay between keystrokes."""
244
- try:
245
- # Convert delay from milliseconds to seconds for PyAutoGUI
246
- interval = delay / 1000.0
247
- self.pyautogui.typewrite(text, interval=interval)
248
-
249
- if enter_after:
250
- self.pyautogui.press("enter")
251
-
252
- result = ToolResult(
253
- output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
254
- )
255
-
256
- if take_screenshot:
257
- await asyncio.sleep(self._screenshot_delay)
258
- screenshot = await self.screenshot()
259
- if screenshot:
260
- result = ToolResult(
261
- output=result.output, error=result.error, base64_image=screenshot
262
- )
263
-
264
- return result
265
- except Exception as e:
266
- return ToolResult(error=str(e))
267
-
268
- async def key(self, key_sequence: str, take_screenshot: bool = True) -> ToolResult:
269
- """Press a key or key combination."""
270
- try:
271
- # Handle key combinations (e.g., "ctrl+c")
272
- if "+" in key_sequence:
273
- keys = key_sequence.split("+")
274
- self.pyautogui.hotkey(*keys)
275
- result = ToolResult(output=f"Pressed hotkey: {key_sequence}")
276
- else:
277
- # Map common key names from xdotool to PyAutoGUI
278
- key = key_sequence.lower()
279
- self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
280
- result = ToolResult(output=f"Pressed key: {key_sequence}")
281
-
282
- if take_screenshot:
283
- await asyncio.sleep(self._screenshot_delay)
284
- screenshot = await self.screenshot()
285
- if screenshot:
286
- result = ToolResult(
287
- output=result.output, error=result.error, base64_image=screenshot
288
- )
289
-
290
- return result
291
- except Exception as e:
292
- return ToolResult(error=str(e))
293
-
294
- async def press(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
295
- """Press a key combination (hotkey)."""
296
- try:
297
- # Map CLA keys to PyAutoGUI keys
298
- mapped_keys = self._map_keys(keys)
299
-
300
- # Handle single key or combination
301
- if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
302
- self.pyautogui.press(mapped_keys[0])
303
- result = ToolResult(output=f"Pressed key: {keys[0]}")
304
- else:
305
- # For combinations, use hotkey
306
- hotkey_parts = []
307
- for key in mapped_keys:
308
- if "+" in key:
309
- hotkey_parts.extend(key.split("+"))
310
- else:
311
- hotkey_parts.append(key)
312
- self.pyautogui.hotkey(*hotkey_parts)
313
- result = ToolResult(output=f"Pressed hotkey: {'+'.join(keys)}")
314
-
315
- if take_screenshot:
316
- await asyncio.sleep(self._screenshot_delay)
317
- screenshot = await self.screenshot()
318
- if screenshot:
319
- result = ToolResult(
320
- output=result.output, error=result.error, base64_image=screenshot
321
- )
322
-
323
- return result
324
- except Exception as e:
325
- return ToolResult(error=str(e))
326
-
327
- async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
328
- """Press and hold keys."""
329
- try:
330
- # Map CLA keys to PyAutoGUI keys
331
- mapped_keys = self._map_keys(keys)
332
- for key in mapped_keys:
333
- self.pyautogui.keyDown(key)
334
-
335
- result = ToolResult(output=f"Keys down: {', '.join(keys)}")
336
-
337
- if take_screenshot:
338
- await asyncio.sleep(self._screenshot_delay)
339
- screenshot = await self.screenshot()
340
- if screenshot:
341
- result = ToolResult(
342
- output=result.output, error=result.error, base64_image=screenshot
343
- )
344
-
345
- return result
346
- except Exception as e:
347
- return ToolResult(error=str(e))
348
-
349
- async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
350
- """Release held keys."""
351
- try:
352
- # Map CLA keys to PyAutoGUI keys
353
- mapped_keys = self._map_keys(keys)
354
- for key in reversed(mapped_keys): # Release in reverse order
355
- self.pyautogui.keyUp(key)
356
-
357
- result = ToolResult(output=f"Keys up: {', '.join(keys)}")
358
-
359
- if take_screenshot:
360
- await asyncio.sleep(self._screenshot_delay)
361
- screenshot = await self.screenshot()
362
- if screenshot:
363
- result = ToolResult(
364
- output=result.output, error=result.error, base64_image=screenshot
365
- )
366
-
367
- return result
368
- except Exception as e:
369
- return ToolResult(error=str(e))
370
-
371
- async def scroll(
372
- self,
373
- x: int | None = None,
374
- y: int | None = None,
375
- scroll_x: int | None = None,
376
- scroll_y: int | None = None,
377
- hold_keys: list[str] | None = None,
378
- take_screenshot: bool = True,
379
- ) -> ToolResult:
380
- """Scroll at specified position."""
381
- try:
382
- # Move to position if specified
383
- if x is not None and y is not None:
384
- self.pyautogui.moveTo(x, y)
385
-
386
- # Hold keys if specified
387
- self._hold_keys_context(hold_keys)
388
-
389
- try:
390
- msg_parts = []
391
-
392
- # Perform vertical scroll
393
- if scroll_y and scroll_y != 0:
394
- # PyAutoGUI: positive = up, negative = down (opposite of our convention)
395
- self.pyautogui.scroll(-scroll_y)
396
- msg_parts.append(f"vertically by {scroll_y}")
397
-
398
- # Perform horizontal scroll (if supported)
399
- if scroll_x and scroll_x != 0:
400
- # PyAutoGUI horizontal scroll might not work on all platforms
401
- try:
402
- self.pyautogui.hscroll(scroll_x)
403
- msg_parts.append(f"horizontally by {scroll_x}")
404
- except AttributeError:
405
- # hscroll not available
406
- msg_parts.append(f"horizontally by {scroll_x} (not supported)")
407
-
408
- if not msg_parts:
409
- return ToolResult(output="No scroll amount specified")
410
-
411
- msg = "Scrolled " + " and ".join(msg_parts)
412
- if x is not None and y is not None:
413
- msg += f" at ({x}, {y})"
414
- if hold_keys:
415
- msg += f" while holding {hold_keys}"
416
- finally:
417
- # Release held keys
418
- self._release_keys(hold_keys)
419
-
420
- result = ToolResult(output=msg)
421
-
422
- if take_screenshot:
423
- await asyncio.sleep(self._screenshot_delay)
424
- screenshot = await self.screenshot()
425
- if screenshot:
426
- result = ToolResult(
427
- output=result.output, error=result.error, base64_image=screenshot
428
- )
429
-
430
- return result
431
- except Exception as e:
432
- return ToolResult(error=str(e))
433
-
434
- async def move(
435
- self,
436
- x: int | None = None,
437
- y: int | None = None,
438
- offset_x: int | None = None,
439
- offset_y: int | None = None,
440
- take_screenshot: bool = True,
441
- ) -> ToolResult:
442
- """Move mouse cursor."""
443
- try:
444
- if x is not None and y is not None:
445
- # Absolute move
446
- self.pyautogui.moveTo(x, y, duration=0.1)
447
- result = ToolResult(output=f"Moved mouse to ({x}, {y})")
448
- elif offset_x is not None or offset_y is not None:
449
- # Relative move
450
- offset_x = offset_x or 0
451
- offset_y = offset_y or 0
452
- self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
453
- result = ToolResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
454
- else:
455
- return ToolResult(output="No move coordinates specified")
456
-
457
- if take_screenshot:
458
- await asyncio.sleep(self._screenshot_delay)
459
- screenshot = await self.screenshot()
460
- if screenshot:
461
- result = ToolResult(
462
- output=result.output, error=result.error, base64_image=screenshot
463
- )
464
-
465
- return result
466
- except Exception as e:
467
- return ToolResult(error=str(e))
468
-
469
- async def drag(
470
- self,
471
- path: list[tuple[int, int]],
472
- pattern: list[int] | None = None,
473
- hold_keys: list[str] | None = None,
474
- take_screenshot: bool = True,
475
- ) -> ToolResult:
476
- """Drag along a path."""
477
- if len(path) < 2:
478
- return ToolResult(error="Drag path must have at least 2 points")
479
-
480
- try:
481
- # Hold keys if specified
482
- self._hold_keys_context(hold_keys)
483
-
484
- try:
485
- # Move to start
486
- start_x, start_y = path[0]
487
- self.pyautogui.moveTo(start_x, start_y)
488
-
489
- # Handle multi-point drag
490
- if len(path) == 2:
491
- # Simple drag
492
- end_x, end_y = path[1]
493
- self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
494
- result = ToolResult(
495
- output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
496
- )
497
- else:
498
- # Multi-point drag
499
- self.pyautogui.mouseDown(button="left")
500
- for i, (x, y) in enumerate(path[1:], 1):
501
- duration = 0.1
502
- if pattern and i - 1 < len(pattern):
503
- duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
504
- self.pyautogui.moveTo(x, y, duration=duration)
505
- self.pyautogui.mouseUp(button="left")
506
-
507
- result = ToolResult(output=f"Dragged along {len(path)} points")
508
-
509
- if hold_keys:
510
- result = ToolResult(output=f"{result.output} while holding {hold_keys}")
511
- finally:
512
- # Release held keys
513
- self._release_keys(hold_keys)
514
-
515
- if take_screenshot:
516
- await asyncio.sleep(self._screenshot_delay)
517
- screenshot = await self.screenshot()
518
- if screenshot:
519
- result = ToolResult(
520
- output=result.output, error=result.error, base64_image=screenshot
521
- )
522
-
523
- return result
524
- except Exception as e:
525
- return ToolResult(error=str(e))
526
-
527
- async def mouse_down(
528
- self,
529
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
530
- take_screenshot: bool = True,
531
- ) -> ToolResult:
532
- """Press and hold a mouse button."""
533
- try:
534
- # Map button names (PyAutoGUI doesn't support back/forward)
535
- button_map = {
536
- "left": "left",
537
- "right": "right",
538
- "middle": "middle",
539
- "back": "left",
540
- "forward": "right",
541
- } # Fallback for unsupported
542
- button_name = button_map.get(button, "left")
543
-
544
- self.pyautogui.mouseDown(button=button_name)
545
- result = ToolResult(output=f"Mouse down: {button} button")
546
-
547
- if take_screenshot:
548
- await asyncio.sleep(self._screenshot_delay)
549
- screenshot = await self.screenshot()
550
- if screenshot:
551
- result = ToolResult(
552
- output=result.output, error=result.error, base64_image=screenshot
553
- )
554
-
555
- return result
556
- except Exception as e:
557
- return ToolResult(error=str(e))
558
-
559
- async def mouse_up(
560
- self,
561
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
562
- take_screenshot: bool = True,
563
- ) -> ToolResult:
564
- """Release a mouse button."""
565
- try:
566
- # Map button names (PyAutoGUI doesn't support back/forward)
567
- button_map = {
568
- "left": "left",
569
- "right": "right",
570
- "middle": "middle",
571
- "back": "left",
572
- "forward": "right",
573
- } # Fallback for unsupported
574
- button_name = button_map.get(button, "left")
575
-
576
- self.pyautogui.mouseUp(button=button_name)
577
- result = ToolResult(output=f"Mouse up: {button} button")
578
-
579
- if take_screenshot:
580
- await asyncio.sleep(self._screenshot_delay)
581
- screenshot = await self.screenshot()
582
- if screenshot:
583
- result = ToolResult(
584
- output=result.output, error=result.error, base64_image=screenshot
585
- )
586
-
587
- return result
588
- except Exception as e:
589
- return ToolResult(error=str(e))
590
-
591
- async def hold_key(self, key: str, duration: float, take_screenshot: bool = True) -> ToolResult:
592
- """Hold a key for a specified duration."""
593
- try:
594
- # Map CLA key to PyAutoGUI key
595
- mapped_key = self._map_key(key)
596
- self.pyautogui.keyDown(mapped_key)
597
- await asyncio.sleep(duration)
598
- self.pyautogui.keyUp(mapped_key)
599
-
600
- result = ToolResult(output=f"Held key '{key}' for {duration} seconds")
601
-
602
- if take_screenshot:
603
- screenshot = await self.screenshot()
604
- if screenshot:
605
- result = ToolResult(
606
- output=result.output, error=result.error, base64_image=screenshot
607
- )
608
-
609
- return result
610
- except Exception as e:
611
- return ToolResult(error=str(e))
612
-
613
- async def position(self) -> ToolResult:
614
- """Get current cursor position."""
615
- try:
616
- x, y = self.pyautogui.position()
617
- return ToolResult(output=f"Mouse position: ({x}, {y})")
618
- except Exception as e:
619
- return ToolResult(error=str(e))
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import base64
5
+ import logging
6
+ import os
7
+ from io import BytesIO
8
+ from typing import Any, Literal
9
+
10
+ from hud.tools.types import ContentResult
11
+
12
+ from .base import BaseExecutor
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Lazy loading for pyautogui
17
+ _pyautogui = None
18
+ _pyautogui_available = None
19
+
20
+
21
+ def _get_pyautogui() -> Any | None:
22
+ """Lazily import and return pyautogui module."""
23
+ global _pyautogui, _pyautogui_available
24
+
25
+ if _pyautogui_available is False:
26
+ return None
27
+
28
+ if _pyautogui is None:
29
+ # Set display if not already set
30
+ if "DISPLAY" not in os.environ:
31
+ try:
32
+ from hud.tools.computer import computer_settings
33
+
34
+ os.environ["DISPLAY"] = str(computer_settings.DISPLAY_NUM)
35
+ except (ImportError, AttributeError):
36
+ os.environ["DISPLAY"] = ":0"
37
+
38
+ try:
39
+ import pyautogui # type: ignore[import-not-found]
40
+
41
+ _pyautogui = pyautogui
42
+ _pyautogui_available = True
43
+
44
+ # Configure PyAutoGUI settings
45
+ _pyautogui.FAILSAFE = False # Disable fail-safe feature
46
+ _pyautogui.PAUSE = 0.1 # Small pause between actions
47
+ except ImportError:
48
+ _pyautogui_available = False
49
+ logger.warning("PyAutoGUI is not available")
50
+ return None
51
+ except Exception as e:
52
+ _pyautogui_available = False
53
+ logger.warning("Failed to initialize PyAutoGUI: %s", e)
54
+ return None
55
+
56
+ return _pyautogui
57
+
58
+
59
+ # Map CLA standard keys to PyAutoGUI keys (only where they differ)
60
+ CLA_TO_PYAUTOGUI = {
61
+ # Most keys are the same in PyAutoGUI, only map the differences
62
+ "escape": "esc",
63
+ "enter": "return",
64
+ "pageup": "pgup",
65
+ "pagedown": "pgdn",
66
+ "printscreen": "prtscr",
67
+ "prtsc": "prtscr",
68
+ "super": "win",
69
+ "command": "cmd",
70
+ }
71
+
72
+
73
+ class PyAutoGUIExecutor(BaseExecutor):
74
+ """
75
+ Cross-platform executor using PyAutoGUI.
76
+ Works on Windows, macOS, and Linux.
77
+
78
+ This executor should only be instantiated when PyAutoGUI is available and functional.
79
+ """
80
+
81
+ def __init__(self, display_num: int | None = None) -> None:
82
+ """
83
+ Initialize the executor.
84
+
85
+ Args:
86
+ display_num: X display number (used only on Linux, ignored on Windows/macOS)
87
+ """
88
+ super().__init__(display_num)
89
+ self._pyautogui = None
90
+ logger.info("PyAutoGUIExecutor initialized")
91
+
92
+ @property
93
+ def pyautogui(self) -> Any:
94
+ """Get the pyautogui module, importing it lazily if needed."""
95
+ if self._pyautogui is None:
96
+ self._pyautogui = _get_pyautogui()
97
+ if self._pyautogui is None:
98
+ raise RuntimeError("PyAutoGUI is not available")
99
+ return self._pyautogui
100
+
101
+ def _map_key(self, key: str) -> str:
102
+ """Map CLA standard key to PyAutoGUI key."""
103
+ return CLA_TO_PYAUTOGUI.get(key.lower(), key.lower())
104
+
105
+ def _map_keys(self, keys: list[str]) -> list[str]:
106
+ """Map CLA standard keys to PyAutoGUI keys."""
107
+ mapped_keys = []
108
+ for key in keys:
109
+ # Handle key combinations like "ctrl+a"
110
+ if "+" in key:
111
+ parts = key.split("+")
112
+ mapped_parts = [self._map_key(part) for part in parts]
113
+ mapped_keys.append("+".join(mapped_parts))
114
+ else:
115
+ mapped_keys.append(self._map_key(key))
116
+ return mapped_keys
117
+
118
+ @classmethod
119
+ def is_available(cls) -> bool:
120
+ """
121
+ Check if PyAutoGUI is available and functional.
122
+
123
+ Returns:
124
+ True if PyAutoGUI is available and functional, False otherwise
125
+ """
126
+ pyautogui = _get_pyautogui()
127
+ if not pyautogui:
128
+ return False
129
+
130
+ try:
131
+ # Try to get screen size as a simple test
132
+ pyautogui.size()
133
+ return True
134
+ except Exception:
135
+ return False
136
+
137
+ async def screenshot(self) -> str | None:
138
+ """
139
+ Take a screenshot and return base64 encoded image.
140
+
141
+ Returns:
142
+ Base64 encoded PNG image or None if failed
143
+ """
144
+ try:
145
+ # Take screenshot using PyAutoGUI
146
+ screenshot = self.pyautogui.screenshot()
147
+
148
+ # Convert to base64
149
+ buffer = BytesIO()
150
+ screenshot.save(buffer, format="PNG")
151
+ image_data = buffer.getvalue()
152
+ return base64.b64encode(image_data).decode()
153
+ except Exception as e:
154
+ logger.error("Failed to take screenshot: %s", e)
155
+ return None
156
+
157
+ # ===== Helper Methods =====
158
+
159
+ def _hold_keys_context(self, keys: list[str] | None) -> None:
160
+ """
161
+ Press and hold keys.
162
+
163
+ Args:
164
+ keys: List of keys to hold
165
+ """
166
+ if keys:
167
+ for key in keys:
168
+ self.pyautogui.keyDown(key)
169
+
170
+ def _release_keys(self, keys: list[str] | None) -> None:
171
+ """Release held keys."""
172
+ if keys:
173
+ for key in reversed(keys): # Release in reverse order
174
+ self.pyautogui.keyUp(key)
175
+
176
+ # ===== CLA Action Implementations =====
177
+
178
+ async def click(
179
+ self,
180
+ x: int | None = None,
181
+ y: int | None = None,
182
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
183
+ pattern: list[int] | None = None,
184
+ hold_keys: list[str] | None = None,
185
+ take_screenshot: bool = True,
186
+ ) -> ContentResult:
187
+ """Click at specified coordinates or current position."""
188
+ try:
189
+ # Map button names (PyAutoGUI doesn't support back/forward)
190
+ button_map = {
191
+ "left": "left",
192
+ "right": "right",
193
+ "middle": "middle",
194
+ "back": "left",
195
+ "forward": "right",
196
+ } # Fallback for unsupported
197
+ button_name = button_map.get(button, "left")
198
+
199
+ # Hold keys if specified
200
+ self._hold_keys_context(hold_keys)
201
+
202
+ try:
203
+ # Handle multi-clicks based on pattern
204
+ if pattern:
205
+ clicks = len(pattern) + 1
206
+ interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
207
+
208
+ if x is not None and y is not None:
209
+ self.pyautogui.click(
210
+ x=x, y=y, clicks=clicks, interval=interval, button=button_name
211
+ )
212
+ else:
213
+ self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
214
+ else:
215
+ # Single click
216
+ if x is not None and y is not None:
217
+ self.pyautogui.click(x=x, y=y, button=button_name)
218
+ else:
219
+ self.pyautogui.click(button=button_name)
220
+ finally:
221
+ # Release held keys
222
+ self._release_keys(hold_keys)
223
+
224
+ result = ContentResult(
225
+ output=f"Clicked {button} button at ({x}, {y})" if x else f"Clicked {button} button"
226
+ )
227
+
228
+ if take_screenshot:
229
+ await asyncio.sleep(self._screenshot_delay)
230
+ screenshot = await self.screenshot()
231
+ if screenshot:
232
+ result = ContentResult(
233
+ output=result.output, error=result.error, base64_image=screenshot
234
+ )
235
+
236
+ return result
237
+ except Exception as e:
238
+ return ContentResult(error=str(e))
239
+
240
+ async def write(
241
+ self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
242
+ ) -> ContentResult:
243
+ """Type text with specified delay between keystrokes."""
244
+ try:
245
+ # Convert delay from milliseconds to seconds for PyAutoGUI
246
+ interval = delay / 1000.0
247
+ self.pyautogui.typewrite(text, interval=interval)
248
+
249
+ if enter_after:
250
+ self.pyautogui.press("enter")
251
+
252
+ result = ContentResult(
253
+ output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
254
+ )
255
+
256
+ if take_screenshot:
257
+ await asyncio.sleep(self._screenshot_delay)
258
+ screenshot = await self.screenshot()
259
+ if screenshot:
260
+ result = ContentResult(
261
+ output=result.output, error=result.error, base64_image=screenshot
262
+ )
263
+
264
+ return result
265
+ except Exception as e:
266
+ return ContentResult(error=str(e))
267
+
268
+ async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
269
+ """Press a key or key combination."""
270
+ try:
271
+ # Handle key combinations (e.g., "ctrl+c")
272
+ if "+" in key_sequence:
273
+ keys = key_sequence.split("+")
274
+ self.pyautogui.hotkey(*keys)
275
+ result = ContentResult(output=f"Pressed hotkey: {key_sequence}")
276
+ else:
277
+ # Map common key names from xdotool to PyAutoGUI
278
+ key = key_sequence.lower()
279
+ self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
280
+ result = ContentResult(output=f"Pressed key: {key_sequence}")
281
+
282
+ if take_screenshot:
283
+ await asyncio.sleep(self._screenshot_delay)
284
+ screenshot = await self.screenshot()
285
+ if screenshot:
286
+ result = ContentResult(
287
+ output=result.output, error=result.error, base64_image=screenshot
288
+ )
289
+
290
+ return result
291
+ except Exception as e:
292
+ return ContentResult(error=str(e))
293
+
294
+ async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
295
+ """Press a key combination (hotkey)."""
296
+ try:
297
+ # Map CLA keys to PyAutoGUI keys
298
+ mapped_keys = self._map_keys(keys)
299
+
300
+ # Handle single key or combination
301
+ if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
302
+ self.pyautogui.press(mapped_keys[0])
303
+ result = ContentResult(output=f"Pressed key: {keys[0]}")
304
+ else:
305
+ # For combinations, use hotkey
306
+ hotkey_parts = []
307
+ for key in mapped_keys:
308
+ if "+" in key:
309
+ hotkey_parts.extend(key.split("+"))
310
+ else:
311
+ hotkey_parts.append(key)
312
+ self.pyautogui.hotkey(*hotkey_parts)
313
+ result = ContentResult(output=f"Pressed hotkey: {'+'.join(keys)}")
314
+
315
+ if take_screenshot:
316
+ await asyncio.sleep(self._screenshot_delay)
317
+ screenshot = await self.screenshot()
318
+ if screenshot:
319
+ result = ContentResult(
320
+ output=result.output, error=result.error, base64_image=screenshot
321
+ )
322
+
323
+ return result
324
+ except Exception as e:
325
+ return ContentResult(error=str(e))
326
+
327
+ async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
328
+ """Press and hold keys."""
329
+ try:
330
+ # Map CLA keys to PyAutoGUI keys
331
+ mapped_keys = self._map_keys(keys)
332
+ for key in mapped_keys:
333
+ self.pyautogui.keyDown(key)
334
+
335
+ result = ContentResult(output=f"Keys down: {', '.join(keys)}")
336
+
337
+ if take_screenshot:
338
+ await asyncio.sleep(self._screenshot_delay)
339
+ screenshot = await self.screenshot()
340
+ if screenshot:
341
+ result = ContentResult(
342
+ output=result.output, error=result.error, base64_image=screenshot
343
+ )
344
+
345
+ return result
346
+ except Exception as e:
347
+ return ContentResult(error=str(e))
348
+
349
+ async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
350
+ """Release held keys."""
351
+ try:
352
+ # Map CLA keys to PyAutoGUI keys
353
+ mapped_keys = self._map_keys(keys)
354
+ for key in reversed(mapped_keys): # Release in reverse order
355
+ self.pyautogui.keyUp(key)
356
+
357
+ result = ContentResult(output=f"Keys up: {', '.join(keys)}")
358
+
359
+ if take_screenshot:
360
+ await asyncio.sleep(self._screenshot_delay)
361
+ screenshot = await self.screenshot()
362
+ if screenshot:
363
+ result = ContentResult(
364
+ output=result.output, error=result.error, base64_image=screenshot
365
+ )
366
+
367
+ return result
368
+ except Exception as e:
369
+ return ContentResult(error=str(e))
370
+
371
+ async def scroll(
372
+ self,
373
+ x: int | None = None,
374
+ y: int | None = None,
375
+ scroll_x: int | None = None,
376
+ scroll_y: int | None = None,
377
+ hold_keys: list[str] | None = None,
378
+ take_screenshot: bool = True,
379
+ ) -> ContentResult:
380
+ """Scroll at specified position."""
381
+ try:
382
+ # Move to position if specified
383
+ if x is not None and y is not None:
384
+ self.pyautogui.moveTo(x, y)
385
+
386
+ # Hold keys if specified
387
+ self._hold_keys_context(hold_keys)
388
+
389
+ try:
390
+ msg_parts = []
391
+
392
+ # Perform vertical scroll
393
+ if scroll_y and scroll_y != 0:
394
+ # PyAutoGUI: positive = up, negative = down (opposite of our convention)
395
+ self.pyautogui.scroll(-scroll_y)
396
+ msg_parts.append(f"vertically by {scroll_y}")
397
+
398
+ # Perform horizontal scroll (if supported)
399
+ if scroll_x and scroll_x != 0:
400
+ # PyAutoGUI horizontal scroll might not work on all platforms
401
+ try:
402
+ self.pyautogui.hscroll(scroll_x)
403
+ msg_parts.append(f"horizontally by {scroll_x}")
404
+ except AttributeError:
405
+ # hscroll not available
406
+ msg_parts.append(f"horizontally by {scroll_x} (not supported)")
407
+
408
+ if not msg_parts:
409
+ return ContentResult(output="No scroll amount specified")
410
+
411
+ msg = "Scrolled " + " and ".join(msg_parts)
412
+ if x is not None and y is not None:
413
+ msg += f" at ({x}, {y})"
414
+ if hold_keys:
415
+ msg += f" while holding {hold_keys}"
416
+ finally:
417
+ # Release held keys
418
+ self._release_keys(hold_keys)
419
+
420
+ result = ContentResult(output=msg)
421
+
422
+ if take_screenshot:
423
+ await asyncio.sleep(self._screenshot_delay)
424
+ screenshot = await self.screenshot()
425
+ if screenshot:
426
+ result = ContentResult(
427
+ output=result.output, error=result.error, base64_image=screenshot
428
+ )
429
+
430
+ return result
431
+ except Exception as e:
432
+ return ContentResult(error=str(e))
433
+
434
+ async def move(
435
+ self,
436
+ x: int | None = None,
437
+ y: int | None = None,
438
+ offset_x: int | None = None,
439
+ offset_y: int | None = None,
440
+ take_screenshot: bool = True,
441
+ ) -> ContentResult:
442
+ """Move mouse cursor."""
443
+ try:
444
+ if x is not None and y is not None:
445
+ # Absolute move
446
+ self.pyautogui.moveTo(x, y, duration=0.1)
447
+ result = ContentResult(output=f"Moved mouse to ({x}, {y})")
448
+ elif offset_x is not None or offset_y is not None:
449
+ # Relative move
450
+ offset_x = offset_x or 0
451
+ offset_y = offset_y or 0
452
+ self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
453
+ result = ContentResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
454
+ else:
455
+ return ContentResult(output="No move coordinates specified")
456
+
457
+ if take_screenshot:
458
+ await asyncio.sleep(self._screenshot_delay)
459
+ screenshot = await self.screenshot()
460
+ if screenshot:
461
+ result = ContentResult(
462
+ output=result.output, error=result.error, base64_image=screenshot
463
+ )
464
+
465
+ return result
466
+ except Exception as e:
467
+ return ContentResult(error=str(e))
468
+
469
+ async def drag(
470
+ self,
471
+ path: list[tuple[int, int]],
472
+ pattern: list[int] | None = None,
473
+ hold_keys: list[str] | None = None,
474
+ take_screenshot: bool = True,
475
+ ) -> ContentResult:
476
+ """Drag along a path."""
477
+ if len(path) < 2:
478
+ return ContentResult(error="Drag path must have at least 2 points")
479
+
480
+ try:
481
+ # Hold keys if specified
482
+ self._hold_keys_context(hold_keys)
483
+
484
+ try:
485
+ # Move to start
486
+ start_x, start_y = path[0]
487
+ self.pyautogui.moveTo(start_x, start_y)
488
+
489
+ # Handle multi-point drag
490
+ if len(path) == 2:
491
+ # Simple drag
492
+ end_x, end_y = path[1]
493
+ self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
494
+ result = ContentResult(
495
+ output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
496
+ )
497
+ else:
498
+ # Multi-point drag
499
+ self.pyautogui.mouseDown(button="left")
500
+ for i, (x, y) in enumerate(path[1:], 1):
501
+ duration = 0.1
502
+ if pattern and i - 1 < len(pattern):
503
+ duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
504
+ self.pyautogui.moveTo(x, y, duration=duration)
505
+ self.pyautogui.mouseUp(button="left")
506
+
507
+ result = ContentResult(output=f"Dragged along {len(path)} points")
508
+
509
+ if hold_keys:
510
+ result = ContentResult(output=f"{result.output} while holding {hold_keys}")
511
+ finally:
512
+ # Release held keys
513
+ self._release_keys(hold_keys)
514
+
515
+ if take_screenshot:
516
+ await asyncio.sleep(self._screenshot_delay)
517
+ screenshot = await self.screenshot()
518
+ if screenshot:
519
+ result = ContentResult(
520
+ output=result.output, error=result.error, base64_image=screenshot
521
+ )
522
+
523
+ return result
524
+ except Exception as e:
525
+ return ContentResult(error=str(e))
526
+
527
+ async def mouse_down(
528
+ self,
529
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
530
+ take_screenshot: bool = True,
531
+ ) -> ContentResult:
532
+ """Press and hold a mouse button."""
533
+ try:
534
+ # Map button names (PyAutoGUI doesn't support back/forward)
535
+ button_map = {
536
+ "left": "left",
537
+ "right": "right",
538
+ "middle": "middle",
539
+ "back": "left",
540
+ "forward": "right",
541
+ } # Fallback for unsupported
542
+ button_name = button_map.get(button, "left")
543
+
544
+ self.pyautogui.mouseDown(button=button_name)
545
+ result = ContentResult(output=f"Mouse down: {button} button")
546
+
547
+ if take_screenshot:
548
+ await asyncio.sleep(self._screenshot_delay)
549
+ screenshot = await self.screenshot()
550
+ if screenshot:
551
+ result = ContentResult(
552
+ output=result.output, error=result.error, base64_image=screenshot
553
+ )
554
+
555
+ return result
556
+ except Exception as e:
557
+ return ContentResult(error=str(e))
558
+
559
+ async def mouse_up(
560
+ self,
561
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
562
+ take_screenshot: bool = True,
563
+ ) -> ContentResult:
564
+ """Release a mouse button."""
565
+ try:
566
+ # Map button names (PyAutoGUI doesn't support back/forward)
567
+ button_map = {
568
+ "left": "left",
569
+ "right": "right",
570
+ "middle": "middle",
571
+ "back": "left",
572
+ "forward": "right",
573
+ } # Fallback for unsupported
574
+ button_name = button_map.get(button, "left")
575
+
576
+ self.pyautogui.mouseUp(button=button_name)
577
+ result = ContentResult(output=f"Mouse up: {button} button")
578
+
579
+ if take_screenshot:
580
+ await asyncio.sleep(self._screenshot_delay)
581
+ screenshot = await self.screenshot()
582
+ if screenshot:
583
+ result = ContentResult(
584
+ output=result.output, error=result.error, base64_image=screenshot
585
+ )
586
+
587
+ return result
588
+ except Exception as e:
589
+ return ContentResult(error=str(e))
590
+
591
+ async def hold_key(
592
+ self, key: str, duration: float, take_screenshot: bool = True
593
+ ) -> ContentResult:
594
+ """Hold a key for a specified duration."""
595
+ try:
596
+ # Map CLA key to PyAutoGUI key
597
+ mapped_key = self._map_key(key)
598
+ self.pyautogui.keyDown(mapped_key)
599
+ await asyncio.sleep(duration)
600
+ self.pyautogui.keyUp(mapped_key)
601
+
602
+ result = ContentResult(output=f"Held key '{key}' for {duration} seconds")
603
+
604
+ if take_screenshot:
605
+ screenshot = await self.screenshot()
606
+ if screenshot:
607
+ result = ContentResult(
608
+ output=result.output, error=result.error, base64_image=screenshot
609
+ )
610
+
611
+ return result
612
+ except Exception as e:
613
+ return ContentResult(error=str(e))
614
+
615
+ async def position(self) -> ContentResult:
616
+ """Get current cursor position."""
617
+ try:
618
+ x, y = self.pyautogui.position()
619
+ return ContentResult(output=f"Mouse position: ({x}, {y})")
620
+ except Exception as e:
621
+ return ContentResult(error=str(e))