hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show
  1. hud/__init__.py +22 -22
  2. hud/agents/__init__.py +13 -15
  3. hud/agents/base.py +599 -599
  4. hud/agents/claude.py +373 -373
  5. hud/agents/langchain.py +261 -250
  6. hud/agents/misc/__init__.py +7 -7
  7. hud/agents/misc/response_agent.py +82 -80
  8. hud/agents/openai.py +352 -352
  9. hud/agents/openai_chat_generic.py +154 -154
  10. hud/agents/tests/__init__.py +1 -1
  11. hud/agents/tests/test_base.py +742 -742
  12. hud/agents/tests/test_claude.py +324 -324
  13. hud/agents/tests/test_client.py +363 -363
  14. hud/agents/tests/test_openai.py +237 -237
  15. hud/cli/__init__.py +617 -617
  16. hud/cli/__main__.py +8 -8
  17. hud/cli/analyze.py +371 -371
  18. hud/cli/analyze_metadata.py +230 -230
  19. hud/cli/build.py +498 -427
  20. hud/cli/clone.py +185 -185
  21. hud/cli/cursor.py +92 -92
  22. hud/cli/debug.py +392 -392
  23. hud/cli/docker_utils.py +83 -83
  24. hud/cli/init.py +280 -281
  25. hud/cli/interactive.py +353 -353
  26. hud/cli/mcp_server.py +764 -756
  27. hud/cli/pull.py +330 -336
  28. hud/cli/push.py +404 -370
  29. hud/cli/remote_runner.py +311 -311
  30. hud/cli/runner.py +160 -160
  31. hud/cli/tests/__init__.py +3 -3
  32. hud/cli/tests/test_analyze.py +284 -284
  33. hud/cli/tests/test_cli_init.py +265 -265
  34. hud/cli/tests/test_cli_main.py +27 -27
  35. hud/cli/tests/test_clone.py +142 -142
  36. hud/cli/tests/test_cursor.py +253 -253
  37. hud/cli/tests/test_debug.py +453 -453
  38. hud/cli/tests/test_mcp_server.py +139 -139
  39. hud/cli/tests/test_utils.py +388 -388
  40. hud/cli/utils.py +263 -263
  41. hud/clients/README.md +143 -143
  42. hud/clients/__init__.py +16 -16
  43. hud/clients/base.py +378 -379
  44. hud/clients/fastmcp.py +222 -222
  45. hud/clients/mcp_use.py +298 -278
  46. hud/clients/tests/__init__.py +1 -1
  47. hud/clients/tests/test_client_integration.py +111 -111
  48. hud/clients/tests/test_fastmcp.py +342 -342
  49. hud/clients/tests/test_protocol.py +188 -188
  50. hud/clients/utils/__init__.py +1 -1
  51. hud/clients/utils/retry_transport.py +160 -160
  52. hud/datasets.py +327 -322
  53. hud/misc/__init__.py +1 -1
  54. hud/misc/claude_plays_pokemon.py +292 -292
  55. hud/otel/__init__.py +35 -35
  56. hud/otel/collector.py +142 -142
  57. hud/otel/config.py +164 -164
  58. hud/otel/context.py +536 -536
  59. hud/otel/exporters.py +366 -366
  60. hud/otel/instrumentation.py +97 -97
  61. hud/otel/processors.py +118 -118
  62. hud/otel/tests/__init__.py +1 -1
  63. hud/otel/tests/test_processors.py +197 -197
  64. hud/server/__init__.py +5 -5
  65. hud/server/context.py +114 -114
  66. hud/server/helper/__init__.py +5 -5
  67. hud/server/low_level.py +132 -132
  68. hud/server/server.py +170 -166
  69. hud/server/tests/__init__.py +3 -3
  70. hud/settings.py +73 -73
  71. hud/shared/__init__.py +5 -5
  72. hud/shared/exceptions.py +180 -180
  73. hud/shared/requests.py +264 -264
  74. hud/shared/tests/test_exceptions.py +157 -157
  75. hud/shared/tests/test_requests.py +275 -275
  76. hud/telemetry/__init__.py +25 -25
  77. hud/telemetry/instrument.py +379 -379
  78. hud/telemetry/job.py +309 -309
  79. hud/telemetry/replay.py +74 -74
  80. hud/telemetry/trace.py +83 -83
  81. hud/tools/__init__.py +33 -33
  82. hud/tools/base.py +365 -365
  83. hud/tools/bash.py +161 -161
  84. hud/tools/computer/__init__.py +15 -15
  85. hud/tools/computer/anthropic.py +437 -437
  86. hud/tools/computer/hud.py +376 -376
  87. hud/tools/computer/openai.py +295 -295
  88. hud/tools/computer/settings.py +82 -82
  89. hud/tools/edit.py +314 -314
  90. hud/tools/executors/__init__.py +30 -30
  91. hud/tools/executors/base.py +539 -539
  92. hud/tools/executors/pyautogui.py +621 -621
  93. hud/tools/executors/tests/__init__.py +1 -1
  94. hud/tools/executors/tests/test_base_executor.py +338 -338
  95. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  96. hud/tools/executors/xdo.py +511 -511
  97. hud/tools/playwright.py +412 -412
  98. hud/tools/tests/__init__.py +3 -3
  99. hud/tools/tests/test_base.py +282 -282
  100. hud/tools/tests/test_bash.py +158 -158
  101. hud/tools/tests/test_bash_extended.py +197 -197
  102. hud/tools/tests/test_computer.py +425 -425
  103. hud/tools/tests/test_computer_actions.py +34 -34
  104. hud/tools/tests/test_edit.py +259 -259
  105. hud/tools/tests/test_init.py +27 -27
  106. hud/tools/tests/test_playwright_tool.py +183 -183
  107. hud/tools/tests/test_tools.py +145 -145
  108. hud/tools/tests/test_utils.py +156 -156
  109. hud/tools/types.py +72 -72
  110. hud/tools/utils.py +50 -50
  111. hud/types.py +136 -136
  112. hud/utils/__init__.py +10 -10
  113. hud/utils/async_utils.py +65 -65
  114. hud/utils/design.py +236 -168
  115. hud/utils/mcp.py +55 -55
  116. hud/utils/progress.py +149 -149
  117. hud/utils/telemetry.py +66 -66
  118. hud/utils/tests/test_async_utils.py +173 -173
  119. hud/utils/tests/test_init.py +17 -17
  120. hud/utils/tests/test_progress.py +261 -261
  121. hud/utils/tests/test_telemetry.py +82 -82
  122. hud/utils/tests/test_version.py +8 -8
  123. hud/version.py +7 -7
  124. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
  125. hud_python-0.4.3.dist-info/RECORD +131 -0
  126. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
  127. hud/agents/art.py +0 -101
  128. hud_python-0.4.1.dist-info/RECORD +0 -132
  129. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
  130. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
@@ -1,621 +1,621 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import base64
5
- import logging
6
- import os
7
- from io import BytesIO
8
- from typing import Any, Literal
9
-
10
- from hud.tools.types import ContentResult
11
-
12
- from .base import BaseExecutor
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- # Lazy loading for pyautogui
17
- _pyautogui = None
18
- _pyautogui_available = None
19
-
20
-
21
- def _get_pyautogui() -> Any | None:
22
- """Lazily import and return pyautogui module."""
23
- global _pyautogui, _pyautogui_available
24
-
25
- if _pyautogui_available is False:
26
- return None
27
-
28
- if _pyautogui is None:
29
- # Set display if not already set
30
- if "DISPLAY" not in os.environ:
31
- try:
32
- from hud.tools.computer import computer_settings
33
-
34
- os.environ["DISPLAY"] = str(computer_settings.DISPLAY_NUM)
35
- except (ImportError, AttributeError):
36
- os.environ["DISPLAY"] = ":0"
37
-
38
- try:
39
- import pyautogui # type: ignore[import-not-found]
40
-
41
- _pyautogui = pyautogui
42
- _pyautogui_available = True
43
-
44
- # Configure PyAutoGUI settings
45
- _pyautogui.FAILSAFE = False # Disable fail-safe feature
46
- _pyautogui.PAUSE = 0.1 # Small pause between actions
47
- except ImportError:
48
- _pyautogui_available = False
49
- logger.warning("PyAutoGUI is not available")
50
- return None
51
- except Exception as e:
52
- _pyautogui_available = False
53
- logger.warning("Failed to initialize PyAutoGUI: %s", e)
54
- return None
55
-
56
- return _pyautogui
57
-
58
-
59
- # Map CLA standard keys to PyAutoGUI keys (only where they differ)
60
- CLA_TO_PYAUTOGUI = {
61
- # Most keys are the same in PyAutoGUI, only map the differences
62
- "escape": "esc",
63
- "enter": "return",
64
- "pageup": "pgup",
65
- "pagedown": "pgdn",
66
- "printscreen": "prtscr",
67
- "prtsc": "prtscr",
68
- "super": "win",
69
- "command": "cmd",
70
- }
71
-
72
-
73
- class PyAutoGUIExecutor(BaseExecutor):
74
- """
75
- Cross-platform executor using PyAutoGUI.
76
- Works on Windows, macOS, and Linux.
77
-
78
- This executor should only be instantiated when PyAutoGUI is available and functional.
79
- """
80
-
81
- def __init__(self, display_num: int | None = None) -> None:
82
- """
83
- Initialize the executor.
84
-
85
- Args:
86
- display_num: X display number (used only on Linux, ignored on Windows/macOS)
87
- """
88
- super().__init__(display_num)
89
- self._pyautogui = None
90
- logger.info("PyAutoGUIExecutor initialized")
91
-
92
- @property
93
- def pyautogui(self) -> Any:
94
- """Get the pyautogui module, importing it lazily if needed."""
95
- if self._pyautogui is None:
96
- self._pyautogui = _get_pyautogui()
97
- if self._pyautogui is None:
98
- raise RuntimeError("PyAutoGUI is not available")
99
- return self._pyautogui
100
-
101
- def _map_key(self, key: str) -> str:
102
- """Map CLA standard key to PyAutoGUI key."""
103
- return CLA_TO_PYAUTOGUI.get(key.lower(), key.lower())
104
-
105
- def _map_keys(self, keys: list[str]) -> list[str]:
106
- """Map CLA standard keys to PyAutoGUI keys."""
107
- mapped_keys = []
108
- for key in keys:
109
- # Handle key combinations like "ctrl+a"
110
- if "+" in key:
111
- parts = key.split("+")
112
- mapped_parts = [self._map_key(part) for part in parts]
113
- mapped_keys.append("+".join(mapped_parts))
114
- else:
115
- mapped_keys.append(self._map_key(key))
116
- return mapped_keys
117
-
118
- @classmethod
119
- def is_available(cls) -> bool:
120
- """
121
- Check if PyAutoGUI is available and functional.
122
-
123
- Returns:
124
- True if PyAutoGUI is available and functional, False otherwise
125
- """
126
- pyautogui = _get_pyautogui()
127
- if not pyautogui:
128
- return False
129
-
130
- try:
131
- # Try to get screen size as a simple test
132
- pyautogui.size()
133
- return True
134
- except Exception:
135
- return False
136
-
137
- async def screenshot(self) -> str | None:
138
- """
139
- Take a screenshot and return base64 encoded image.
140
-
141
- Returns:
142
- Base64 encoded PNG image or None if failed
143
- """
144
- try:
145
- # Take screenshot using PyAutoGUI
146
- screenshot = self.pyautogui.screenshot()
147
-
148
- # Convert to base64
149
- buffer = BytesIO()
150
- screenshot.save(buffer, format="PNG")
151
- image_data = buffer.getvalue()
152
- return base64.b64encode(image_data).decode()
153
- except Exception as e:
154
- logger.error("Failed to take screenshot: %s", e)
155
- return None
156
-
157
- # ===== Helper Methods =====
158
-
159
- def _hold_keys_context(self, keys: list[str] | None) -> None:
160
- """
161
- Press and hold keys.
162
-
163
- Args:
164
- keys: List of keys to hold
165
- """
166
- if keys:
167
- for key in keys:
168
- self.pyautogui.keyDown(key)
169
-
170
- def _release_keys(self, keys: list[str] | None) -> None:
171
- """Release held keys."""
172
- if keys:
173
- for key in reversed(keys): # Release in reverse order
174
- self.pyautogui.keyUp(key)
175
-
176
- # ===== CLA Action Implementations =====
177
-
178
- async def click(
179
- self,
180
- x: int | None = None,
181
- y: int | None = None,
182
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
183
- pattern: list[int] | None = None,
184
- hold_keys: list[str] | None = None,
185
- take_screenshot: bool = True,
186
- ) -> ContentResult:
187
- """Click at specified coordinates or current position."""
188
- try:
189
- # Map button names (PyAutoGUI doesn't support back/forward)
190
- button_map = {
191
- "left": "left",
192
- "right": "right",
193
- "middle": "middle",
194
- "back": "left",
195
- "forward": "right",
196
- } # Fallback for unsupported
197
- button_name = button_map.get(button, "left")
198
-
199
- # Hold keys if specified
200
- self._hold_keys_context(hold_keys)
201
-
202
- try:
203
- # Handle multi-clicks based on pattern
204
- if pattern:
205
- clicks = len(pattern) + 1
206
- interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
207
-
208
- if x is not None and y is not None:
209
- self.pyautogui.click(
210
- x=x, y=y, clicks=clicks, interval=interval, button=button_name
211
- )
212
- else:
213
- self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
214
- else:
215
- # Single click
216
- if x is not None and y is not None:
217
- self.pyautogui.click(x=x, y=y, button=button_name)
218
- else:
219
- self.pyautogui.click(button=button_name)
220
- finally:
221
- # Release held keys
222
- self._release_keys(hold_keys)
223
-
224
- result = ContentResult(
225
- output=f"Clicked {button} button at ({x}, {y})" if x else f"Clicked {button} button"
226
- )
227
-
228
- if take_screenshot:
229
- await asyncio.sleep(self._screenshot_delay)
230
- screenshot = await self.screenshot()
231
- if screenshot:
232
- result = ContentResult(
233
- output=result.output, error=result.error, base64_image=screenshot
234
- )
235
-
236
- return result
237
- except Exception as e:
238
- return ContentResult(error=str(e))
239
-
240
- async def write(
241
- self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
242
- ) -> ContentResult:
243
- """Type text with specified delay between keystrokes."""
244
- try:
245
- # Convert delay from milliseconds to seconds for PyAutoGUI
246
- interval = delay / 1000.0
247
- self.pyautogui.typewrite(text, interval=interval)
248
-
249
- if enter_after:
250
- self.pyautogui.press("enter")
251
-
252
- result = ContentResult(
253
- output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
254
- )
255
-
256
- if take_screenshot:
257
- await asyncio.sleep(self._screenshot_delay)
258
- screenshot = await self.screenshot()
259
- if screenshot:
260
- result = ContentResult(
261
- output=result.output, error=result.error, base64_image=screenshot
262
- )
263
-
264
- return result
265
- except Exception as e:
266
- return ContentResult(error=str(e))
267
-
268
- async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
269
- """Press a key or key combination."""
270
- try:
271
- # Handle key combinations (e.g., "ctrl+c")
272
- if "+" in key_sequence:
273
- keys = key_sequence.split("+")
274
- self.pyautogui.hotkey(*keys)
275
- result = ContentResult(output=f"Pressed hotkey: {key_sequence}")
276
- else:
277
- # Map common key names from xdotool to PyAutoGUI
278
- key = key_sequence.lower()
279
- self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
280
- result = ContentResult(output=f"Pressed key: {key_sequence}")
281
-
282
- if take_screenshot:
283
- await asyncio.sleep(self._screenshot_delay)
284
- screenshot = await self.screenshot()
285
- if screenshot:
286
- result = ContentResult(
287
- output=result.output, error=result.error, base64_image=screenshot
288
- )
289
-
290
- return result
291
- except Exception as e:
292
- return ContentResult(error=str(e))
293
-
294
- async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
295
- """Press a key combination (hotkey)."""
296
- try:
297
- # Map CLA keys to PyAutoGUI keys
298
- mapped_keys = self._map_keys(keys)
299
-
300
- # Handle single key or combination
301
- if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
302
- self.pyautogui.press(mapped_keys[0])
303
- result = ContentResult(output=f"Pressed key: {keys[0]}")
304
- else:
305
- # For combinations, use hotkey
306
- hotkey_parts = []
307
- for key in mapped_keys:
308
- if "+" in key:
309
- hotkey_parts.extend(key.split("+"))
310
- else:
311
- hotkey_parts.append(key)
312
- self.pyautogui.hotkey(*hotkey_parts)
313
- result = ContentResult(output=f"Pressed hotkey: {'+'.join(keys)}")
314
-
315
- if take_screenshot:
316
- await asyncio.sleep(self._screenshot_delay)
317
- screenshot = await self.screenshot()
318
- if screenshot:
319
- result = ContentResult(
320
- output=result.output, error=result.error, base64_image=screenshot
321
- )
322
-
323
- return result
324
- except Exception as e:
325
- return ContentResult(error=str(e))
326
-
327
- async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
328
- """Press and hold keys."""
329
- try:
330
- # Map CLA keys to PyAutoGUI keys
331
- mapped_keys = self._map_keys(keys)
332
- for key in mapped_keys:
333
- self.pyautogui.keyDown(key)
334
-
335
- result = ContentResult(output=f"Keys down: {', '.join(keys)}")
336
-
337
- if take_screenshot:
338
- await asyncio.sleep(self._screenshot_delay)
339
- screenshot = await self.screenshot()
340
- if screenshot:
341
- result = ContentResult(
342
- output=result.output, error=result.error, base64_image=screenshot
343
- )
344
-
345
- return result
346
- except Exception as e:
347
- return ContentResult(error=str(e))
348
-
349
- async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
350
- """Release held keys."""
351
- try:
352
- # Map CLA keys to PyAutoGUI keys
353
- mapped_keys = self._map_keys(keys)
354
- for key in reversed(mapped_keys): # Release in reverse order
355
- self.pyautogui.keyUp(key)
356
-
357
- result = ContentResult(output=f"Keys up: {', '.join(keys)}")
358
-
359
- if take_screenshot:
360
- await asyncio.sleep(self._screenshot_delay)
361
- screenshot = await self.screenshot()
362
- if screenshot:
363
- result = ContentResult(
364
- output=result.output, error=result.error, base64_image=screenshot
365
- )
366
-
367
- return result
368
- except Exception as e:
369
- return ContentResult(error=str(e))
370
-
371
- async def scroll(
372
- self,
373
- x: int | None = None,
374
- y: int | None = None,
375
- scroll_x: int | None = None,
376
- scroll_y: int | None = None,
377
- hold_keys: list[str] | None = None,
378
- take_screenshot: bool = True,
379
- ) -> ContentResult:
380
- """Scroll at specified position."""
381
- try:
382
- # Move to position if specified
383
- if x is not None and y is not None:
384
- self.pyautogui.moveTo(x, y)
385
-
386
- # Hold keys if specified
387
- self._hold_keys_context(hold_keys)
388
-
389
- try:
390
- msg_parts = []
391
-
392
- # Perform vertical scroll
393
- if scroll_y and scroll_y != 0:
394
- # PyAutoGUI: positive = up, negative = down (opposite of our convention)
395
- self.pyautogui.scroll(-scroll_y)
396
- msg_parts.append(f"vertically by {scroll_y}")
397
-
398
- # Perform horizontal scroll (if supported)
399
- if scroll_x and scroll_x != 0:
400
- # PyAutoGUI horizontal scroll might not work on all platforms
401
- try:
402
- self.pyautogui.hscroll(scroll_x)
403
- msg_parts.append(f"horizontally by {scroll_x}")
404
- except AttributeError:
405
- # hscroll not available
406
- msg_parts.append(f"horizontally by {scroll_x} (not supported)")
407
-
408
- if not msg_parts:
409
- return ContentResult(output="No scroll amount specified")
410
-
411
- msg = "Scrolled " + " and ".join(msg_parts)
412
- if x is not None and y is not None:
413
- msg += f" at ({x}, {y})"
414
- if hold_keys:
415
- msg += f" while holding {hold_keys}"
416
- finally:
417
- # Release held keys
418
- self._release_keys(hold_keys)
419
-
420
- result = ContentResult(output=msg)
421
-
422
- if take_screenshot:
423
- await asyncio.sleep(self._screenshot_delay)
424
- screenshot = await self.screenshot()
425
- if screenshot:
426
- result = ContentResult(
427
- output=result.output, error=result.error, base64_image=screenshot
428
- )
429
-
430
- return result
431
- except Exception as e:
432
- return ContentResult(error=str(e))
433
-
434
- async def move(
435
- self,
436
- x: int | None = None,
437
- y: int | None = None,
438
- offset_x: int | None = None,
439
- offset_y: int | None = None,
440
- take_screenshot: bool = True,
441
- ) -> ContentResult:
442
- """Move mouse cursor."""
443
- try:
444
- if x is not None and y is not None:
445
- # Absolute move
446
- self.pyautogui.moveTo(x, y, duration=0.1)
447
- result = ContentResult(output=f"Moved mouse to ({x}, {y})")
448
- elif offset_x is not None or offset_y is not None:
449
- # Relative move
450
- offset_x = offset_x or 0
451
- offset_y = offset_y or 0
452
- self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
453
- result = ContentResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
454
- else:
455
- return ContentResult(output="No move coordinates specified")
456
-
457
- if take_screenshot:
458
- await asyncio.sleep(self._screenshot_delay)
459
- screenshot = await self.screenshot()
460
- if screenshot:
461
- result = ContentResult(
462
- output=result.output, error=result.error, base64_image=screenshot
463
- )
464
-
465
- return result
466
- except Exception as e:
467
- return ContentResult(error=str(e))
468
-
469
- async def drag(
470
- self,
471
- path: list[tuple[int, int]],
472
- pattern: list[int] | None = None,
473
- hold_keys: list[str] | None = None,
474
- take_screenshot: bool = True,
475
- ) -> ContentResult:
476
- """Drag along a path."""
477
- if len(path) < 2:
478
- return ContentResult(error="Drag path must have at least 2 points")
479
-
480
- try:
481
- # Hold keys if specified
482
- self._hold_keys_context(hold_keys)
483
-
484
- try:
485
- # Move to start
486
- start_x, start_y = path[0]
487
- self.pyautogui.moveTo(start_x, start_y)
488
-
489
- # Handle multi-point drag
490
- if len(path) == 2:
491
- # Simple drag
492
- end_x, end_y = path[1]
493
- self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
494
- result = ContentResult(
495
- output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
496
- )
497
- else:
498
- # Multi-point drag
499
- self.pyautogui.mouseDown(button="left")
500
- for i, (x, y) in enumerate(path[1:], 1):
501
- duration = 0.1
502
- if pattern and i - 1 < len(pattern):
503
- duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
504
- self.pyautogui.moveTo(x, y, duration=duration)
505
- self.pyautogui.mouseUp(button="left")
506
-
507
- result = ContentResult(output=f"Dragged along {len(path)} points")
508
-
509
- if hold_keys:
510
- result = ContentResult(output=f"{result.output} while holding {hold_keys}")
511
- finally:
512
- # Release held keys
513
- self._release_keys(hold_keys)
514
-
515
- if take_screenshot:
516
- await asyncio.sleep(self._screenshot_delay)
517
- screenshot = await self.screenshot()
518
- if screenshot:
519
- result = ContentResult(
520
- output=result.output, error=result.error, base64_image=screenshot
521
- )
522
-
523
- return result
524
- except Exception as e:
525
- return ContentResult(error=str(e))
526
-
527
- async def mouse_down(
528
- self,
529
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
530
- take_screenshot: bool = True,
531
- ) -> ContentResult:
532
- """Press and hold a mouse button."""
533
- try:
534
- # Map button names (PyAutoGUI doesn't support back/forward)
535
- button_map = {
536
- "left": "left",
537
- "right": "right",
538
- "middle": "middle",
539
- "back": "left",
540
- "forward": "right",
541
- } # Fallback for unsupported
542
- button_name = button_map.get(button, "left")
543
-
544
- self.pyautogui.mouseDown(button=button_name)
545
- result = ContentResult(output=f"Mouse down: {button} button")
546
-
547
- if take_screenshot:
548
- await asyncio.sleep(self._screenshot_delay)
549
- screenshot = await self.screenshot()
550
- if screenshot:
551
- result = ContentResult(
552
- output=result.output, error=result.error, base64_image=screenshot
553
- )
554
-
555
- return result
556
- except Exception as e:
557
- return ContentResult(error=str(e))
558
-
559
- async def mouse_up(
560
- self,
561
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
562
- take_screenshot: bool = True,
563
- ) -> ContentResult:
564
- """Release a mouse button."""
565
- try:
566
- # Map button names (PyAutoGUI doesn't support back/forward)
567
- button_map = {
568
- "left": "left",
569
- "right": "right",
570
- "middle": "middle",
571
- "back": "left",
572
- "forward": "right",
573
- } # Fallback for unsupported
574
- button_name = button_map.get(button, "left")
575
-
576
- self.pyautogui.mouseUp(button=button_name)
577
- result = ContentResult(output=f"Mouse up: {button} button")
578
-
579
- if take_screenshot:
580
- await asyncio.sleep(self._screenshot_delay)
581
- screenshot = await self.screenshot()
582
- if screenshot:
583
- result = ContentResult(
584
- output=result.output, error=result.error, base64_image=screenshot
585
- )
586
-
587
- return result
588
- except Exception as e:
589
- return ContentResult(error=str(e))
590
-
591
- async def hold_key(
592
- self, key: str, duration: float, take_screenshot: bool = True
593
- ) -> ContentResult:
594
- """Hold a key for a specified duration."""
595
- try:
596
- # Map CLA key to PyAutoGUI key
597
- mapped_key = self._map_key(key)
598
- self.pyautogui.keyDown(mapped_key)
599
- await asyncio.sleep(duration)
600
- self.pyautogui.keyUp(mapped_key)
601
-
602
- result = ContentResult(output=f"Held key '{key}' for {duration} seconds")
603
-
604
- if take_screenshot:
605
- screenshot = await self.screenshot()
606
- if screenshot:
607
- result = ContentResult(
608
- output=result.output, error=result.error, base64_image=screenshot
609
- )
610
-
611
- return result
612
- except Exception as e:
613
- return ContentResult(error=str(e))
614
-
615
- async def position(self) -> ContentResult:
616
- """Get current cursor position."""
617
- try:
618
- x, y = self.pyautogui.position()
619
- return ContentResult(output=f"Mouse position: ({x}, {y})")
620
- except Exception as e:
621
- return ContentResult(error=str(e))
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import base64
5
+ import logging
6
+ import os
7
+ from io import BytesIO
8
+ from typing import Any, Literal
9
+
10
+ from hud.tools.types import ContentResult
11
+
12
+ from .base import BaseExecutor
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Lazy loading for pyautogui
17
+ _pyautogui = None
18
+ _pyautogui_available = None
19
+
20
+
21
+ def _get_pyautogui() -> Any | None:
22
+ """Lazily import and return pyautogui module."""
23
+ global _pyautogui, _pyautogui_available
24
+
25
+ if _pyautogui_available is False:
26
+ return None
27
+
28
+ if _pyautogui is None:
29
+ # Set display if not already set
30
+ if "DISPLAY" not in os.environ:
31
+ try:
32
+ from hud.tools.computer import computer_settings
33
+
34
+ os.environ["DISPLAY"] = str(computer_settings.DISPLAY_NUM)
35
+ except (ImportError, AttributeError):
36
+ os.environ["DISPLAY"] = ":0"
37
+
38
+ try:
39
+ import pyautogui # type: ignore[import-not-found]
40
+
41
+ _pyautogui = pyautogui
42
+ _pyautogui_available = True
43
+
44
+ # Configure PyAutoGUI settings
45
+ _pyautogui.FAILSAFE = False # Disable fail-safe feature
46
+ _pyautogui.PAUSE = 0.1 # Small pause between actions
47
+ except ImportError:
48
+ _pyautogui_available = False
49
+ logger.warning("PyAutoGUI is not available")
50
+ return None
51
+ except Exception as e:
52
+ _pyautogui_available = False
53
+ logger.warning("Failed to initialize PyAutoGUI: %s", e)
54
+ return None
55
+
56
+ return _pyautogui
57
+
58
+
59
+ # Map CLA standard keys to PyAutoGUI keys (only where they differ)
60
+ CLA_TO_PYAUTOGUI = {
61
+ # Most keys are the same in PyAutoGUI, only map the differences
62
+ "escape": "esc",
63
+ "enter": "return",
64
+ "pageup": "pgup",
65
+ "pagedown": "pgdn",
66
+ "printscreen": "prtscr",
67
+ "prtsc": "prtscr",
68
+ "super": "win",
69
+ "command": "cmd",
70
+ }
71
+
72
+
73
+ class PyAutoGUIExecutor(BaseExecutor):
74
+ """
75
+ Cross-platform executor using PyAutoGUI.
76
+ Works on Windows, macOS, and Linux.
77
+
78
+ This executor should only be instantiated when PyAutoGUI is available and functional.
79
+ """
80
+
81
+ def __init__(self, display_num: int | None = None) -> None:
82
+ """
83
+ Initialize the executor.
84
+
85
+ Args:
86
+ display_num: X display number (used only on Linux, ignored on Windows/macOS)
87
+ """
88
+ super().__init__(display_num)
89
+ self._pyautogui = None
90
+ logger.info("PyAutoGUIExecutor initialized")
91
+
92
+ @property
93
+ def pyautogui(self) -> Any:
94
+ """Get the pyautogui module, importing it lazily if needed."""
95
+ if self._pyautogui is None:
96
+ self._pyautogui = _get_pyautogui()
97
+ if self._pyautogui is None:
98
+ raise RuntimeError("PyAutoGUI is not available")
99
+ return self._pyautogui
100
+
101
+ def _map_key(self, key: str) -> str:
102
+ """Map CLA standard key to PyAutoGUI key."""
103
+ return CLA_TO_PYAUTOGUI.get(key.lower(), key.lower())
104
+
105
+ def _map_keys(self, keys: list[str]) -> list[str]:
106
+ """Map CLA standard keys to PyAutoGUI keys."""
107
+ mapped_keys = []
108
+ for key in keys:
109
+ # Handle key combinations like "ctrl+a"
110
+ if "+" in key:
111
+ parts = key.split("+")
112
+ mapped_parts = [self._map_key(part) for part in parts]
113
+ mapped_keys.append("+".join(mapped_parts))
114
+ else:
115
+ mapped_keys.append(self._map_key(key))
116
+ return mapped_keys
117
+
118
+ @classmethod
119
+ def is_available(cls) -> bool:
120
+ """
121
+ Check if PyAutoGUI is available and functional.
122
+
123
+ Returns:
124
+ True if PyAutoGUI is available and functional, False otherwise
125
+ """
126
+ pyautogui = _get_pyautogui()
127
+ if not pyautogui:
128
+ return False
129
+
130
+ try:
131
+ # Try to get screen size as a simple test
132
+ pyautogui.size()
133
+ return True
134
+ except Exception:
135
+ return False
136
+
137
+ async def screenshot(self) -> str | None:
138
+ """
139
+ Take a screenshot and return base64 encoded image.
140
+
141
+ Returns:
142
+ Base64 encoded PNG image or None if failed
143
+ """
144
+ try:
145
+ # Take screenshot using PyAutoGUI
146
+ screenshot = self.pyautogui.screenshot()
147
+
148
+ # Convert to base64
149
+ buffer = BytesIO()
150
+ screenshot.save(buffer, format="PNG")
151
+ image_data = buffer.getvalue()
152
+ return base64.b64encode(image_data).decode()
153
+ except Exception as e:
154
+ logger.error("Failed to take screenshot: %s", e)
155
+ return None
156
+
157
+ # ===== Helper Methods =====
158
+
159
+ def _hold_keys_context(self, keys: list[str] | None) -> None:
160
+ """
161
+ Press and hold keys.
162
+
163
+ Args:
164
+ keys: List of keys to hold
165
+ """
166
+ if keys:
167
+ for key in keys:
168
+ self.pyautogui.keyDown(key)
169
+
170
+ def _release_keys(self, keys: list[str] | None) -> None:
171
+ """Release held keys."""
172
+ if keys:
173
+ for key in reversed(keys): # Release in reverse order
174
+ self.pyautogui.keyUp(key)
175
+
176
+ # ===== CLA Action Implementations =====
177
+
178
+ async def click(
179
+ self,
180
+ x: int | None = None,
181
+ y: int | None = None,
182
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
183
+ pattern: list[int] | None = None,
184
+ hold_keys: list[str] | None = None,
185
+ take_screenshot: bool = True,
186
+ ) -> ContentResult:
187
+ """Click at specified coordinates or current position."""
188
+ try:
189
+ # Map button names (PyAutoGUI doesn't support back/forward)
190
+ button_map = {
191
+ "left": "left",
192
+ "right": "right",
193
+ "middle": "middle",
194
+ "back": "left",
195
+ "forward": "right",
196
+ } # Fallback for unsupported
197
+ button_name = button_map.get(button, "left")
198
+
199
+ # Hold keys if specified
200
+ self._hold_keys_context(hold_keys)
201
+
202
+ try:
203
+ # Handle multi-clicks based on pattern
204
+ if pattern:
205
+ clicks = len(pattern) + 1
206
+ interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
207
+
208
+ if x is not None and y is not None:
209
+ self.pyautogui.click(
210
+ x=x, y=y, clicks=clicks, interval=interval, button=button_name
211
+ )
212
+ else:
213
+ self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
214
+ else:
215
+ # Single click
216
+ if x is not None and y is not None:
217
+ self.pyautogui.click(x=x, y=y, button=button_name)
218
+ else:
219
+ self.pyautogui.click(button=button_name)
220
+ finally:
221
+ # Release held keys
222
+ self._release_keys(hold_keys)
223
+
224
+ result = ContentResult(
225
+ output=f"Clicked {button} button at ({x}, {y})" if x else f"Clicked {button} button"
226
+ )
227
+
228
+ if take_screenshot:
229
+ await asyncio.sleep(self._screenshot_delay)
230
+ screenshot = await self.screenshot()
231
+ if screenshot:
232
+ result = ContentResult(
233
+ output=result.output, error=result.error, base64_image=screenshot
234
+ )
235
+
236
+ return result
237
+ except Exception as e:
238
+ return ContentResult(error=str(e))
239
+
240
+ async def write(
241
+ self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
242
+ ) -> ContentResult:
243
+ """Type text with specified delay between keystrokes."""
244
+ try:
245
+ # Convert delay from milliseconds to seconds for PyAutoGUI
246
+ interval = delay / 1000.0
247
+ self.pyautogui.typewrite(text, interval=interval)
248
+
249
+ if enter_after:
250
+ self.pyautogui.press("enter")
251
+
252
+ result = ContentResult(
253
+ output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
254
+ )
255
+
256
+ if take_screenshot:
257
+ await asyncio.sleep(self._screenshot_delay)
258
+ screenshot = await self.screenshot()
259
+ if screenshot:
260
+ result = ContentResult(
261
+ output=result.output, error=result.error, base64_image=screenshot
262
+ )
263
+
264
+ return result
265
+ except Exception as e:
266
+ return ContentResult(error=str(e))
267
+
268
+ async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
269
+ """Press a key or key combination."""
270
+ try:
271
+ # Handle key combinations (e.g., "ctrl+c")
272
+ if "+" in key_sequence:
273
+ keys = key_sequence.split("+")
274
+ self.pyautogui.hotkey(*keys)
275
+ result = ContentResult(output=f"Pressed hotkey: {key_sequence}")
276
+ else:
277
+ # Map common key names from xdotool to PyAutoGUI
278
+ key = key_sequence.lower()
279
+ self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
280
+ result = ContentResult(output=f"Pressed key: {key_sequence}")
281
+
282
+ if take_screenshot:
283
+ await asyncio.sleep(self._screenshot_delay)
284
+ screenshot = await self.screenshot()
285
+ if screenshot:
286
+ result = ContentResult(
287
+ output=result.output, error=result.error, base64_image=screenshot
288
+ )
289
+
290
+ return result
291
+ except Exception as e:
292
+ return ContentResult(error=str(e))
293
+
294
+ async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
295
+ """Press a key combination (hotkey)."""
296
+ try:
297
+ # Map CLA keys to PyAutoGUI keys
298
+ mapped_keys = self._map_keys(keys)
299
+
300
+ # Handle single key or combination
301
+ if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
302
+ self.pyautogui.press(mapped_keys[0])
303
+ result = ContentResult(output=f"Pressed key: {keys[0]}")
304
+ else:
305
+ # For combinations, use hotkey
306
+ hotkey_parts = []
307
+ for key in mapped_keys:
308
+ if "+" in key:
309
+ hotkey_parts.extend(key.split("+"))
310
+ else:
311
+ hotkey_parts.append(key)
312
+ self.pyautogui.hotkey(*hotkey_parts)
313
+ result = ContentResult(output=f"Pressed hotkey: {'+'.join(keys)}")
314
+
315
+ if take_screenshot:
316
+ await asyncio.sleep(self._screenshot_delay)
317
+ screenshot = await self.screenshot()
318
+ if screenshot:
319
+ result = ContentResult(
320
+ output=result.output, error=result.error, base64_image=screenshot
321
+ )
322
+
323
+ return result
324
+ except Exception as e:
325
+ return ContentResult(error=str(e))
326
+
327
+ async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
328
+ """Press and hold keys."""
329
+ try:
330
+ # Map CLA keys to PyAutoGUI keys
331
+ mapped_keys = self._map_keys(keys)
332
+ for key in mapped_keys:
333
+ self.pyautogui.keyDown(key)
334
+
335
+ result = ContentResult(output=f"Keys down: {', '.join(keys)}")
336
+
337
+ if take_screenshot:
338
+ await asyncio.sleep(self._screenshot_delay)
339
+ screenshot = await self.screenshot()
340
+ if screenshot:
341
+ result = ContentResult(
342
+ output=result.output, error=result.error, base64_image=screenshot
343
+ )
344
+
345
+ return result
346
+ except Exception as e:
347
+ return ContentResult(error=str(e))
348
+
349
+ async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
350
+ """Release held keys."""
351
+ try:
352
+ # Map CLA keys to PyAutoGUI keys
353
+ mapped_keys = self._map_keys(keys)
354
+ for key in reversed(mapped_keys): # Release in reverse order
355
+ self.pyautogui.keyUp(key)
356
+
357
+ result = ContentResult(output=f"Keys up: {', '.join(keys)}")
358
+
359
+ if take_screenshot:
360
+ await asyncio.sleep(self._screenshot_delay)
361
+ screenshot = await self.screenshot()
362
+ if screenshot:
363
+ result = ContentResult(
364
+ output=result.output, error=result.error, base64_image=screenshot
365
+ )
366
+
367
+ return result
368
+ except Exception as e:
369
+ return ContentResult(error=str(e))
370
+
371
+ async def scroll(
372
+ self,
373
+ x: int | None = None,
374
+ y: int | None = None,
375
+ scroll_x: int | None = None,
376
+ scroll_y: int | None = None,
377
+ hold_keys: list[str] | None = None,
378
+ take_screenshot: bool = True,
379
+ ) -> ContentResult:
380
+ """Scroll at specified position."""
381
+ try:
382
+ # Move to position if specified
383
+ if x is not None and y is not None:
384
+ self.pyautogui.moveTo(x, y)
385
+
386
+ # Hold keys if specified
387
+ self._hold_keys_context(hold_keys)
388
+
389
+ try:
390
+ msg_parts = []
391
+
392
+ # Perform vertical scroll
393
+ if scroll_y and scroll_y != 0:
394
+ # PyAutoGUI: positive = up, negative = down (opposite of our convention)
395
+ self.pyautogui.scroll(-scroll_y)
396
+ msg_parts.append(f"vertically by {scroll_y}")
397
+
398
+ # Perform horizontal scroll (if supported)
399
+ if scroll_x and scroll_x != 0:
400
+ # PyAutoGUI horizontal scroll might not work on all platforms
401
+ try:
402
+ self.pyautogui.hscroll(scroll_x)
403
+ msg_parts.append(f"horizontally by {scroll_x}")
404
+ except AttributeError:
405
+ # hscroll not available
406
+ msg_parts.append(f"horizontally by {scroll_x} (not supported)")
407
+
408
+ if not msg_parts:
409
+ return ContentResult(output="No scroll amount specified")
410
+
411
+ msg = "Scrolled " + " and ".join(msg_parts)
412
+ if x is not None and y is not None:
413
+ msg += f" at ({x}, {y})"
414
+ if hold_keys:
415
+ msg += f" while holding {hold_keys}"
416
+ finally:
417
+ # Release held keys
418
+ self._release_keys(hold_keys)
419
+
420
+ result = ContentResult(output=msg)
421
+
422
+ if take_screenshot:
423
+ await asyncio.sleep(self._screenshot_delay)
424
+ screenshot = await self.screenshot()
425
+ if screenshot:
426
+ result = ContentResult(
427
+ output=result.output, error=result.error, base64_image=screenshot
428
+ )
429
+
430
+ return result
431
+ except Exception as e:
432
+ return ContentResult(error=str(e))
433
+
434
+ async def move(
435
+ self,
436
+ x: int | None = None,
437
+ y: int | None = None,
438
+ offset_x: int | None = None,
439
+ offset_y: int | None = None,
440
+ take_screenshot: bool = True,
441
+ ) -> ContentResult:
442
+ """Move mouse cursor."""
443
+ try:
444
+ if x is not None and y is not None:
445
+ # Absolute move
446
+ self.pyautogui.moveTo(x, y, duration=0.1)
447
+ result = ContentResult(output=f"Moved mouse to ({x}, {y})")
448
+ elif offset_x is not None or offset_y is not None:
449
+ # Relative move
450
+ offset_x = offset_x or 0
451
+ offset_y = offset_y or 0
452
+ self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
453
+ result = ContentResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
454
+ else:
455
+ return ContentResult(output="No move coordinates specified")
456
+
457
+ if take_screenshot:
458
+ await asyncio.sleep(self._screenshot_delay)
459
+ screenshot = await self.screenshot()
460
+ if screenshot:
461
+ result = ContentResult(
462
+ output=result.output, error=result.error, base64_image=screenshot
463
+ )
464
+
465
+ return result
466
+ except Exception as e:
467
+ return ContentResult(error=str(e))
468
+
469
+ async def drag(
470
+ self,
471
+ path: list[tuple[int, int]],
472
+ pattern: list[int] | None = None,
473
+ hold_keys: list[str] | None = None,
474
+ take_screenshot: bool = True,
475
+ ) -> ContentResult:
476
+ """Drag along a path."""
477
+ if len(path) < 2:
478
+ return ContentResult(error="Drag path must have at least 2 points")
479
+
480
+ try:
481
+ # Hold keys if specified
482
+ self._hold_keys_context(hold_keys)
483
+
484
+ try:
485
+ # Move to start
486
+ start_x, start_y = path[0]
487
+ self.pyautogui.moveTo(start_x, start_y)
488
+
489
+ # Handle multi-point drag
490
+ if len(path) == 2:
491
+ # Simple drag
492
+ end_x, end_y = path[1]
493
+ self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
494
+ result = ContentResult(
495
+ output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
496
+ )
497
+ else:
498
+ # Multi-point drag
499
+ self.pyautogui.mouseDown(button="left")
500
+ for i, (x, y) in enumerate(path[1:], 1):
501
+ duration = 0.1
502
+ if pattern and i - 1 < len(pattern):
503
+ duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
504
+ self.pyautogui.moveTo(x, y, duration=duration)
505
+ self.pyautogui.mouseUp(button="left")
506
+
507
+ result = ContentResult(output=f"Dragged along {len(path)} points")
508
+
509
+ if hold_keys:
510
+ result = ContentResult(output=f"{result.output} while holding {hold_keys}")
511
+ finally:
512
+ # Release held keys
513
+ self._release_keys(hold_keys)
514
+
515
+ if take_screenshot:
516
+ await asyncio.sleep(self._screenshot_delay)
517
+ screenshot = await self.screenshot()
518
+ if screenshot:
519
+ result = ContentResult(
520
+ output=result.output, error=result.error, base64_image=screenshot
521
+ )
522
+
523
+ return result
524
+ except Exception as e:
525
+ return ContentResult(error=str(e))
526
+
527
+ async def mouse_down(
528
+ self,
529
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
530
+ take_screenshot: bool = True,
531
+ ) -> ContentResult:
532
+ """Press and hold a mouse button."""
533
+ try:
534
+ # Map button names (PyAutoGUI doesn't support back/forward)
535
+ button_map = {
536
+ "left": "left",
537
+ "right": "right",
538
+ "middle": "middle",
539
+ "back": "left",
540
+ "forward": "right",
541
+ } # Fallback for unsupported
542
+ button_name = button_map.get(button, "left")
543
+
544
+ self.pyautogui.mouseDown(button=button_name)
545
+ result = ContentResult(output=f"Mouse down: {button} button")
546
+
547
+ if take_screenshot:
548
+ await asyncio.sleep(self._screenshot_delay)
549
+ screenshot = await self.screenshot()
550
+ if screenshot:
551
+ result = ContentResult(
552
+ output=result.output, error=result.error, base64_image=screenshot
553
+ )
554
+
555
+ return result
556
+ except Exception as e:
557
+ return ContentResult(error=str(e))
558
+
559
+ async def mouse_up(
560
+ self,
561
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
562
+ take_screenshot: bool = True,
563
+ ) -> ContentResult:
564
+ """Release a mouse button."""
565
+ try:
566
+ # Map button names (PyAutoGUI doesn't support back/forward)
567
+ button_map = {
568
+ "left": "left",
569
+ "right": "right",
570
+ "middle": "middle",
571
+ "back": "left",
572
+ "forward": "right",
573
+ } # Fallback for unsupported
574
+ button_name = button_map.get(button, "left")
575
+
576
+ self.pyautogui.mouseUp(button=button_name)
577
+ result = ContentResult(output=f"Mouse up: {button} button")
578
+
579
+ if take_screenshot:
580
+ await asyncio.sleep(self._screenshot_delay)
581
+ screenshot = await self.screenshot()
582
+ if screenshot:
583
+ result = ContentResult(
584
+ output=result.output, error=result.error, base64_image=screenshot
585
+ )
586
+
587
+ return result
588
+ except Exception as e:
589
+ return ContentResult(error=str(e))
590
+
591
+ async def hold_key(
592
+ self, key: str, duration: float, take_screenshot: bool = True
593
+ ) -> ContentResult:
594
+ """Hold a key for a specified duration."""
595
+ try:
596
+ # Map CLA key to PyAutoGUI key
597
+ mapped_key = self._map_key(key)
598
+ self.pyautogui.keyDown(mapped_key)
599
+ await asyncio.sleep(duration)
600
+ self.pyautogui.keyUp(mapped_key)
601
+
602
+ result = ContentResult(output=f"Held key '{key}' for {duration} seconds")
603
+
604
+ if take_screenshot:
605
+ screenshot = await self.screenshot()
606
+ if screenshot:
607
+ result = ContentResult(
608
+ output=result.output, error=result.error, base64_image=screenshot
609
+ )
610
+
611
+ return result
612
+ except Exception as e:
613
+ return ContentResult(error=str(e))
614
+
615
+ async def position(self) -> ContentResult:
616
+ """Get current cursor position."""
617
+ try:
618
+ x, y = self.pyautogui.position()
619
+ return ContentResult(output=f"Mouse position: ({x}, {y})")
620
+ except Exception as e:
621
+ return ContentResult(error=str(e))