hud-python 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +15 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +370 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +379 -0
  45. hud/clients/fastmcp.py +222 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.1.dist-info/METADATA +476 -0
  126. hud_python-0.4.1.dist-info/RECORD +132 -0
  127. hud_python-0.4.1.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/WHEEL +0 -0
@@ -1,532 +1,539 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import logging
5
- from typing import Literal, TypeAlias
6
-
7
- from hud.tools.base import ToolResult
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
-
12
- class BaseExecutor:
13
- """
14
- Base executor that provides simulation implementations for all CLA (Common Language Actions).
15
-
16
- This class:
17
- 1. Defines all action methods that HudComputer expects
18
- 2. Provides simulation implementations for environments without display
19
- 3. Serves as the base class for platform-specific executors (XDO, PyAutoGUI)
20
-
21
- When used directly, it simulates all actions. Subclasses provide real implementations.
22
- """
23
-
24
- def __init__(self, display_num: int | None = None) -> None:
25
- """
26
- Initialize the base executor.
27
-
28
- Args:
29
- display_num: X display number (for Linux/X11 systems)
30
- """
31
- self.display_num = display_num
32
- self._screenshot_delay = 0.5
33
- logger.info("BaseExecutor initialized")
34
-
35
- # ===== Core CLA Actions =====
36
-
37
- async def click(
38
- self,
39
- x: int | None = None,
40
- y: int | None = None,
41
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
42
- pattern: list[int] | None = None,
43
- hold_keys: list[str] | None = None,
44
- take_screenshot: bool = True,
45
- ) -> ToolResult:
46
- """
47
- Click at specified coordinates.
48
-
49
- Args:
50
- x, y: Coordinates to click at (None = current position)
51
- button: Mouse button to use
52
- pattern: List of delays for multi-clicks (e.g., [100] for double-click)
53
- hold_keys: Keys to hold during click
54
- take_screenshot: Whether to capture screenshot after action
55
- """
56
- msg = f"[SIMULATED] Click at ({x}, {y}) with {button} button"
57
- if pattern:
58
- msg += f" (multi-click pattern: {pattern})"
59
- if hold_keys:
60
- msg += f" while holding {hold_keys}"
61
-
62
- screenshot = await self.screenshot() if take_screenshot else None
63
- return ToolResult(output=msg, base64_image=screenshot)
64
-
65
- async def type(
66
- self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
67
- ) -> ToolResult:
68
- """
69
- Type text using keyboard.
70
-
71
- Args:
72
- text: Text to type
73
- enter_after: Whether to press Enter after typing
74
- delay: Delay between keystrokes in milliseconds
75
- take_screenshot: Whether to capture screenshot after action
76
- """
77
- msg = f"[SIMULATED] Type '{text}'"
78
- if enter_after:
79
- msg += " followed by Enter"
80
-
81
- screenshot = await self.screenshot() if take_screenshot else None
82
- return ToolResult(output=msg, base64_image=screenshot)
83
-
84
- async def press(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
85
- """
86
- Press a key combination (hotkey).
87
-
88
- Args:
89
- keys: List of keys to press together (e.g., ["ctrl", "c"])
90
- take_screenshot: Whether to capture screenshot after action
91
- """
92
- key_combo = "+".join(keys)
93
- msg = f"[SIMULATED] Press key combination: {key_combo}"
94
-
95
- screenshot = await self.screenshot() if take_screenshot else None
96
- return ToolResult(output=msg, base64_image=screenshot)
97
-
98
- async def key(self, key_sequence: str, take_screenshot: bool = True) -> ToolResult:
99
- """
100
- Press a single key or key combination.
101
-
102
- Args:
103
- key_sequence: Key or combination like "Return" or "ctrl+a"
104
- take_screenshot: Whether to capture screenshot after action
105
- """
106
- msg = f"[SIMULATED] Press key: {key_sequence}"
107
-
108
- screenshot = await self.screenshot() if take_screenshot else None
109
- return ToolResult(output=msg, base64_image=screenshot)
110
-
111
- async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
112
- """
113
- Press and hold keys.
114
-
115
- Args:
116
- keys: Keys to press and hold
117
- take_screenshot: Whether to capture screenshot after action
118
- """
119
- msg = f"[SIMULATED] Key down: {', '.join(keys)}"
120
-
121
- screenshot = await self.screenshot() if take_screenshot else None
122
- return ToolResult(output=msg, base64_image=screenshot)
123
-
124
- async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
125
- """
126
- Release held keys.
127
-
128
- Args:
129
- keys: Keys to release
130
- take_screenshot: Whether to capture screenshot after action
131
- """
132
- msg = f"[SIMULATED] Key up: {', '.join(keys)}"
133
-
134
- screenshot = await self.screenshot() if take_screenshot else None
135
- return ToolResult(output=msg, base64_image=screenshot)
136
-
137
- async def scroll(
138
- self,
139
- x: int | None = None,
140
- y: int | None = None,
141
- scroll_x: int | None = None,
142
- scroll_y: int | None = None,
143
- hold_keys: list[str] | None = None,
144
- take_screenshot: bool = True,
145
- ) -> ToolResult:
146
- """
147
- Scroll at specified position.
148
-
149
- Args:
150
- x, y: Position to scroll at (None = current position)
151
- scroll_x: Horizontal scroll amount (positive = right)
152
- scroll_y: Vertical scroll amount (positive = down)
153
- hold_keys: Keys to hold during scroll
154
- take_screenshot: Whether to capture screenshot after action
155
- """
156
- msg = "[SIMULATED] Scroll"
157
- if x is not None and y is not None:
158
- msg += f" at ({x}, {y})"
159
- if scroll_x:
160
- msg += f" horizontally by {scroll_x}"
161
- if scroll_y:
162
- msg += f" vertically by {scroll_y}"
163
- if hold_keys:
164
- msg += f" while holding {hold_keys}"
165
-
166
- screenshot = await self.screenshot() if take_screenshot else None
167
- return ToolResult(output=msg, base64_image=screenshot)
168
-
169
- async def move(
170
- self,
171
- x: int | None = None,
172
- y: int | None = None,
173
- offset_x: int | None = None,
174
- offset_y: int | None = None,
175
- take_screenshot: bool = True,
176
- ) -> ToolResult:
177
- """
178
- Move mouse cursor.
179
-
180
- Args:
181
- x, y: Absolute coordinates to move to
182
- offset_x, offset_y: Relative offset from current position
183
- take_screenshot: Whether to capture screenshot after action
184
- """
185
- if x is not None and y is not None:
186
- msg = f"[SIMULATED] Move mouse to ({x}, {y})"
187
- elif offset_x is not None or offset_y is not None:
188
- msg = f"[SIMULATED] Move mouse by offset ({offset_x or 0}, {offset_y or 0})"
189
- else:
190
- msg = "[SIMULATED] Move mouse (no coordinates specified)"
191
-
192
- screenshot = await self.screenshot() if take_screenshot else None
193
- return ToolResult(output=msg, base64_image=screenshot)
194
-
195
- async def drag(
196
- self,
197
- path: list[tuple[int, int]],
198
- pattern: list[int] | None = None,
199
- hold_keys: list[str] | None = None,
200
- take_screenshot: bool = True,
201
- ) -> ToolResult:
202
- """
203
- Drag along a path.
204
-
205
- Args:
206
- path: List of (x, y) coordinates defining the drag path
207
- pattern: Delays between path points in milliseconds
208
- hold_keys: Keys to hold during drag
209
- take_screenshot: Whether to capture screenshot after action
210
- """
211
- if len(path) < 2:
212
- return ToolResult(error="Drag path must have at least 2 points")
213
-
214
- start = path[0]
215
- end = path[-1]
216
- msg = f"[SIMULATED] Drag from {start} to {end}"
217
- if len(path) > 2:
218
- msg += f" via {len(path) - 2} intermediate points"
219
- if hold_keys:
220
- msg += f" while holding {hold_keys}"
221
-
222
- screenshot = await self.screenshot() if take_screenshot else None
223
- return ToolResult(output=msg, base64_image=screenshot)
224
-
225
- async def mouse_down(
226
- self,
227
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
228
- take_screenshot: bool = True,
229
- ) -> ToolResult:
230
- """
231
- Press and hold a mouse button.
232
-
233
- Args:
234
- button: Mouse button to press
235
- take_screenshot: Whether to capture screenshot after action
236
- """
237
- msg = f"[SIMULATED] Mouse down: {button} button"
238
-
239
- screenshot = await self.screenshot() if take_screenshot else None
240
- return ToolResult(output=msg, base64_image=screenshot)
241
-
242
- async def mouse_up(
243
- self,
244
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
245
- take_screenshot: bool = True,
246
- ) -> ToolResult:
247
- """
248
- Release a mouse button.
249
-
250
- Args:
251
- button: Mouse button to release
252
- take_screenshot: Whether to capture screenshot after action
253
- """
254
- msg = f"[SIMULATED] Mouse up: {button} button"
255
-
256
- screenshot = await self.screenshot() if take_screenshot else None
257
- return ToolResult(output=msg, base64_image=screenshot)
258
-
259
- async def hold_key(self, key: str, duration: float, take_screenshot: bool = True) -> ToolResult:
260
- """
261
- Hold a key for a specified duration.
262
-
263
- Args:
264
- key: The key to hold
265
- duration: Duration in seconds
266
- take_screenshot: Whether to capture screenshot after action
267
- """
268
- msg = f"[SIMULATED] Hold key '{key}' for {duration} seconds"
269
- await asyncio.sleep(duration) # Simulate the wait
270
-
271
- screenshot = await self.screenshot() if take_screenshot else None
272
- return ToolResult(output=msg, base64_image=screenshot)
273
-
274
- # ===== Utility Actions =====
275
-
276
- async def wait(self, time: int) -> ToolResult:
277
- """
278
- Wait for specified time.
279
-
280
- Args:
281
- time: Time to wait in milliseconds
282
- """
283
- duration_seconds = time / 1000.0
284
- await asyncio.sleep(duration_seconds)
285
- return ToolResult(output=f"Waited {time}ms")
286
-
287
- async def screenshot(self) -> str | None:
288
- """
289
- Take a screenshot and return base64 encoded image.
290
-
291
- Returns:
292
- Base64 encoded PNG image or None if failed
293
- """
294
- logger.info("[SIMULATION] Taking screenshot")
295
- return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
296
-
297
- async def position(self) -> ToolResult:
298
- """
299
- Get current cursor position.
300
-
301
- Returns:
302
- ToolResult with position information
303
- """
304
- return ToolResult(output="[SIMULATED] Mouse position: (0, 0)")
305
-
306
- # ===== Legacy/Compatibility Methods =====
307
-
308
- async def execute(self, command: str, take_screenshot: bool = True) -> ToolResult:
309
- """
310
- Execute a raw command (for backwards compatibility).
311
-
312
- Args:
313
- command: Command to execute
314
- take_screenshot: Whether to capture screenshot after action
315
- """
316
- msg = f"[SIMULATED] Execute: {command}"
317
- screenshot = await self.screenshot() if take_screenshot else None
318
- return ToolResult(output=msg, base64_image=screenshot)
319
-
320
- # Compatibility aliases
321
- async def type_text(
322
- self, text: str, delay: int = 12, take_screenshot: bool = True
323
- ) -> ToolResult:
324
- """Alias for type() to maintain compatibility."""
325
- return await self.type(
326
- text, enter_after=False, delay=delay, take_screenshot=take_screenshot
327
- )
328
-
329
- async def mouse_move(self, x: int, y: int, take_screenshot: bool = True) -> ToolResult:
330
- """Alias for move() to maintain compatibility."""
331
- return await self.move(x=x, y=y, take_screenshot=take_screenshot)
332
-
333
-
334
- CLAKey: TypeAlias = Literal[
335
- # Control keys
336
- "backspace",
337
- "tab",
338
- "enter",
339
- "shift",
340
- "shiftleft",
341
- "shiftright",
342
- "ctrl",
343
- "ctrlleft",
344
- "ctrlright",
345
- "alt",
346
- "altleft",
347
- "altright",
348
- "pause",
349
- "capslock",
350
- "esc",
351
- "escape",
352
- "space",
353
- "pageup",
354
- "pagedown",
355
- "end",
356
- "home",
357
- "left",
358
- "up",
359
- "right",
360
- "down",
361
- "select",
362
- "print",
363
- "execute",
364
- "printscreen",
365
- "prtsc",
366
- "insert",
367
- "delete",
368
- "help",
369
- "sleep",
370
- # Special keys
371
- "numlock",
372
- "scrolllock",
373
- "clear",
374
- "separator",
375
- "modechange",
376
- "apps",
377
- "browserback",
378
- "browserfavorites",
379
- "browserforward",
380
- "browserhome",
381
- "browserrefresh",
382
- "browsersearch",
383
- "browserstop",
384
- "launchapp1",
385
- "launchapp2",
386
- "launchmail",
387
- "launchmediaselect",
388
- "playpause",
389
- "start",
390
- "stop",
391
- "prevtrack",
392
- "nexttrack",
393
- "volumemute",
394
- "volumeup",
395
- "volumedown",
396
- "zoom",
397
- # Modifier keys
398
- "win",
399
- "winleft",
400
- "winright",
401
- "command",
402
- "option",
403
- "optionleft",
404
- "optionright",
405
- "fn",
406
- # Numpad keys
407
- "num0",
408
- "num1",
409
- "num2",
410
- "num3",
411
- "num4",
412
- "num5",
413
- "num6",
414
- "num7",
415
- "num8",
416
- "num9",
417
- "multiply",
418
- "add",
419
- "subtract",
420
- "decimal",
421
- "divide",
422
- # Function keys
423
- "f1",
424
- "f2",
425
- "f3",
426
- "f4",
427
- "f5",
428
- "f6",
429
- "f7",
430
- "f8",
431
- "f9",
432
- "f10",
433
- "f11",
434
- "f12",
435
- "f13",
436
- "f14",
437
- "f15",
438
- "f16",
439
- "f17",
440
- "f18",
441
- "f19",
442
- "f20",
443
- "f21",
444
- "f22",
445
- "f23",
446
- "f24",
447
- # Language-specific keys
448
- "hanguel",
449
- "hangul",
450
- "hanja",
451
- "kana",
452
- "kanji",
453
- "junja",
454
- "convert",
455
- "nonconvert",
456
- "yen",
457
- # Characters
458
- "\t",
459
- "\n",
460
- "\r",
461
- " ",
462
- "!",
463
- '"',
464
- "#",
465
- "$",
466
- "%",
467
- "&",
468
- "'",
469
- "(",
470
- ")",
471
- "*",
472
- "+",
473
- ",",
474
- "-",
475
- ".",
476
- "/",
477
- "0",
478
- "1",
479
- "2",
480
- "3",
481
- "4",
482
- "5",
483
- "6",
484
- "7",
485
- "8",
486
- "9",
487
- ":",
488
- ";",
489
- "<",
490
- "=",
491
- ">",
492
- "?",
493
- "@",
494
- "[",
495
- "\\",
496
- "]",
497
- "^",
498
- "_",
499
- "`",
500
- "a",
501
- "b",
502
- "c",
503
- "d",
504
- "e",
505
- "f",
506
- "g",
507
- "h",
508
- "i",
509
- "j",
510
- "k",
511
- "l",
512
- "m",
513
- "n",
514
- "o",
515
- "p",
516
- "q",
517
- "r",
518
- "s",
519
- "t",
520
- "u",
521
- "v",
522
- "w",
523
- "x",
524
- "y",
525
- "z",
526
- "{",
527
- "|",
528
- "}",
529
- "~",
530
- ]
531
-
532
- CLAButton: TypeAlias = Literal["left", "right", "middle", "back", "forward"]
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import Literal, TypeAlias
6
+
7
+ from hud.tools.types import ContentResult
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class BaseExecutor:
13
+ """
14
+ Base executor that provides simulation implementations for all CLA (Common Language Actions).
15
+
16
+ This class:
17
+ 1. Defines all action methods that HudComputer expects
18
+ 2. Provides simulation implementations for environments without display
19
+ 3. Serves as the base class for platform-specific executors (XDO, PyAutoGUI)
20
+
21
+ When used directly, it simulates all actions. Subclasses provide real implementations.
22
+ """
23
+
24
+ def __init__(self, display_num: int | None = None) -> None:
25
+ """
26
+ Initialize the base executor.
27
+
28
+ Args:
29
+ display_num: X display number (for Linux/X11 systems)
30
+ """
31
+ if display_num is None:
32
+ from hud.tools.computer.settings import computer_settings
33
+
34
+ self.display_num = computer_settings.DISPLAY_NUM
35
+ else:
36
+ self.display_num = display_num
37
+ self._screenshot_delay = 0.5
38
+ logger.info("BaseExecutor initialized")
39
+
40
+ # ===== Core CLA Actions =====
41
+
42
+ async def click(
43
+ self,
44
+ x: int | None = None,
45
+ y: int | None = None,
46
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
47
+ pattern: list[int] | None = None,
48
+ hold_keys: list[str] | None = None,
49
+ take_screenshot: bool = True,
50
+ ) -> ContentResult:
51
+ """
52
+ Click at specified coordinates.
53
+
54
+ Args:
55
+ x, y: Coordinates to click at (None = current position)
56
+ button: Mouse button to use
57
+ pattern: List of delays for multi-clicks (e.g., [100] for double-click)
58
+ hold_keys: Keys to hold during click
59
+ take_screenshot: Whether to capture screenshot after action
60
+ """
61
+ msg = f"[SIMULATED] Click at ({x}, {y}) with {button} button"
62
+ if pattern:
63
+ msg += f" (multi-click pattern: {pattern})"
64
+ if hold_keys:
65
+ msg += f" while holding {hold_keys}"
66
+
67
+ screenshot = await self.screenshot() if take_screenshot else None
68
+ return ContentResult(output=msg, base64_image=screenshot)
69
+
70
+ async def write(
71
+ self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
72
+ ) -> ContentResult:
73
+ """
74
+ Type text using keyboard.
75
+
76
+ Args:
77
+ text: Text to type
78
+ enter_after: Whether to press Enter after typing
79
+ delay: Delay between keystrokes in milliseconds
80
+ take_screenshot: Whether to capture screenshot after action
81
+ """
82
+ msg = f"[SIMULATED] Type '{text}'"
83
+ if enter_after:
84
+ msg += " followed by Enter"
85
+
86
+ screenshot = await self.screenshot() if take_screenshot else None
87
+ return ContentResult(output=msg, base64_image=screenshot)
88
+
89
+ async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
90
+ """
91
+ Press a key combination (hotkey).
92
+
93
+ Args:
94
+ keys: List of keys to press together (e.g., ["ctrl", "c"])
95
+ take_screenshot: Whether to capture screenshot after action
96
+ """
97
+ key_combo = "+".join(keys)
98
+ msg = f"[SIMULATED] Press key combination: {key_combo}"
99
+
100
+ screenshot = await self.screenshot() if take_screenshot else None
101
+ return ContentResult(output=msg, base64_image=screenshot)
102
+
103
+ async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
104
+ """
105
+ Press a single key or key combination.
106
+
107
+ Args:
108
+ key_sequence: Key or combination like "Return" or "ctrl+a"
109
+ take_screenshot: Whether to capture screenshot after action
110
+ """
111
+ msg = f"[SIMULATED] Press key: {key_sequence}"
112
+
113
+ screenshot = await self.screenshot() if take_screenshot else None
114
+ return ContentResult(output=msg, base64_image=screenshot)
115
+
116
+ async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
117
+ """
118
+ Press and hold keys.
119
+
120
+ Args:
121
+ keys: Keys to press and hold
122
+ take_screenshot: Whether to capture screenshot after action
123
+ """
124
+ msg = f"[SIMULATED] Key down: {', '.join(keys)}"
125
+
126
+ screenshot = await self.screenshot() if take_screenshot else None
127
+ return ContentResult(output=msg, base64_image=screenshot)
128
+
129
+ async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
130
+ """
131
+ Release held keys.
132
+
133
+ Args:
134
+ keys: Keys to release
135
+ take_screenshot: Whether to capture screenshot after action
136
+ """
137
+ msg = f"[SIMULATED] Key up: {', '.join(keys)}"
138
+
139
+ screenshot = await self.screenshot() if take_screenshot else None
140
+ return ContentResult(output=msg, base64_image=screenshot)
141
+
142
+ async def scroll(
143
+ self,
144
+ x: int | None = None,
145
+ y: int | None = None,
146
+ scroll_x: int | None = None,
147
+ scroll_y: int | None = None,
148
+ hold_keys: list[str] | None = None,
149
+ take_screenshot: bool = True,
150
+ ) -> ContentResult:
151
+ """
152
+ Scroll at specified position.
153
+
154
+ Args:
155
+ x, y: Position to scroll at (None = current position)
156
+ scroll_x: Horizontal scroll amount (positive = right)
157
+ scroll_y: Vertical scroll amount (positive = down)
158
+ hold_keys: Keys to hold during scroll
159
+ take_screenshot: Whether to capture screenshot after action
160
+ """
161
+ msg = "[SIMULATED] Scroll"
162
+ if x is not None and y is not None:
163
+ msg += f" at ({x}, {y})"
164
+ if scroll_x:
165
+ msg += f" horizontally by {scroll_x}"
166
+ if scroll_y:
167
+ msg += f" vertically by {scroll_y}"
168
+ if hold_keys:
169
+ msg += f" while holding {hold_keys}"
170
+
171
+ screenshot = await self.screenshot() if take_screenshot else None
172
+ return ContentResult(output=msg, base64_image=screenshot)
173
+
174
+ async def move(
175
+ self,
176
+ x: int | None = None,
177
+ y: int | None = None,
178
+ offset_x: int | None = None,
179
+ offset_y: int | None = None,
180
+ take_screenshot: bool = True,
181
+ ) -> ContentResult:
182
+ """
183
+ Move mouse cursor.
184
+
185
+ Args:
186
+ x, y: Absolute coordinates to move to
187
+ offset_x, offset_y: Relative offset from current position
188
+ take_screenshot: Whether to capture screenshot after action
189
+ """
190
+ if x is not None and y is not None:
191
+ msg = f"[SIMULATED] Move mouse to ({x}, {y})"
192
+ elif offset_x is not None or offset_y is not None:
193
+ msg = f"[SIMULATED] Move mouse by offset ({offset_x or 0}, {offset_y or 0})"
194
+ else:
195
+ msg = "[SIMULATED] Move mouse (no coordinates specified)"
196
+
197
+ screenshot = await self.screenshot() if take_screenshot else None
198
+ return ContentResult(output=msg, base64_image=screenshot)
199
+
200
+ async def drag(
201
+ self,
202
+ path: list[tuple[int, int]],
203
+ pattern: list[int] | None = None,
204
+ hold_keys: list[str] | None = None,
205
+ take_screenshot: bool = True,
206
+ ) -> ContentResult:
207
+ """
208
+ Drag along a path.
209
+
210
+ Args:
211
+ path: List of (x, y) coordinates defining the drag path
212
+ pattern: Delays between path points in milliseconds
213
+ hold_keys: Keys to hold during drag
214
+ take_screenshot: Whether to capture screenshot after action
215
+ """
216
+ if len(path) < 2:
217
+ return ContentResult(error="Drag path must have at least 2 points")
218
+
219
+ start = path[0]
220
+ end = path[-1]
221
+ msg = f"[SIMULATED] Drag from {start} to {end}"
222
+ if len(path) > 2:
223
+ msg += f" via {len(path) - 2} intermediate points"
224
+ if hold_keys:
225
+ msg += f" while holding {hold_keys}"
226
+
227
+ screenshot = await self.screenshot() if take_screenshot else None
228
+ return ContentResult(output=msg, base64_image=screenshot)
229
+
230
+ async def mouse_down(
231
+ self,
232
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
233
+ take_screenshot: bool = True,
234
+ ) -> ContentResult:
235
+ """
236
+ Press and hold a mouse button.
237
+
238
+ Args:
239
+ button: Mouse button to press
240
+ take_screenshot: Whether to capture screenshot after action
241
+ """
242
+ msg = f"[SIMULATED] Mouse down: {button} button"
243
+
244
+ screenshot = await self.screenshot() if take_screenshot else None
245
+ return ContentResult(output=msg, base64_image=screenshot)
246
+
247
+ async def mouse_up(
248
+ self,
249
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
250
+ take_screenshot: bool = True,
251
+ ) -> ContentResult:
252
+ """
253
+ Release a mouse button.
254
+
255
+ Args:
256
+ button: Mouse button to release
257
+ take_screenshot: Whether to capture screenshot after action
258
+ """
259
+ msg = f"[SIMULATED] Mouse up: {button} button"
260
+
261
+ screenshot = await self.screenshot() if take_screenshot else None
262
+ return ContentResult(output=msg, base64_image=screenshot)
263
+
264
+ async def hold_key(
265
+ self, key: str, duration: float, take_screenshot: bool = True
266
+ ) -> ContentResult:
267
+ """
268
+ Hold a key for a specified duration.
269
+
270
+ Args:
271
+ key: The key to hold
272
+ duration: Duration in seconds
273
+ take_screenshot: Whether to capture screenshot after action
274
+ """
275
+ msg = f"[SIMULATED] Hold key '{key}' for {duration} seconds"
276
+ await asyncio.sleep(duration) # Simulate the wait
277
+
278
+ screenshot = await self.screenshot() if take_screenshot else None
279
+ return ContentResult(output=msg, base64_image=screenshot)
280
+
281
+ # ===== Utility Actions =====
282
+
283
+ async def wait(self, time: int) -> ContentResult:
284
+ """
285
+ Wait for specified time.
286
+
287
+ Args:
288
+ time: Time to wait in milliseconds
289
+ """
290
+ duration_seconds = time / 1000.0
291
+ await asyncio.sleep(duration_seconds)
292
+ return ContentResult(output=f"Waited {time}ms")
293
+
294
+ async def screenshot(self) -> str | None:
295
+ """
296
+ Take a screenshot and return base64 encoded image.
297
+
298
+ Returns:
299
+ Base64 encoded PNG image or None if failed
300
+ """
301
+ logger.info("[SIMULATION] Taking screenshot")
302
+ return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
303
+
304
+ async def position(self) -> ContentResult:
305
+ """
306
+ Get current cursor position.
307
+
308
+ Returns:
309
+ ToolResult with position information
310
+ """
311
+ return ContentResult(output="[SIMULATED] Mouse position: (0, 0)")
312
+
313
+ # ===== Legacy/Compatibility Methods =====
314
+
315
+ async def execute(self, command: str, take_screenshot: bool = True) -> ContentResult:
316
+ """
317
+ Execute a raw command (for backwards compatibility).
318
+
319
+ Args:
320
+ command: Command to execute
321
+ take_screenshot: Whether to capture screenshot after action
322
+ """
323
+ msg = f"[SIMULATED] Execute: {command}"
324
+ screenshot = await self.screenshot() if take_screenshot else None
325
+ return ContentResult(output=msg, base64_image=screenshot)
326
+
327
+ # Compatibility aliases
328
+ async def type_text(
329
+ self, text: str, delay: int = 12, take_screenshot: bool = True
330
+ ) -> ContentResult:
331
+ """Alias for type() to maintain compatibility."""
332
+ return await self.write(
333
+ text, enter_after=False, delay=delay, take_screenshot=take_screenshot
334
+ )
335
+
336
+ async def mouse_move(self, x: int, y: int, take_screenshot: bool = True) -> ContentResult:
337
+ """Alias for move() to maintain compatibility."""
338
+ return await self.move(x=x, y=y, take_screenshot=take_screenshot)
339
+
340
+
341
+ CLAKey: TypeAlias = Literal[
342
+ # Control keys
343
+ "backspace",
344
+ "tab",
345
+ "enter",
346
+ "shift",
347
+ "shiftleft",
348
+ "shiftright",
349
+ "ctrl",
350
+ "ctrlleft",
351
+ "ctrlright",
352
+ "alt",
353
+ "altleft",
354
+ "altright",
355
+ "pause",
356
+ "capslock",
357
+ "esc",
358
+ "escape",
359
+ "space",
360
+ "pageup",
361
+ "pagedown",
362
+ "end",
363
+ "home",
364
+ "left",
365
+ "up",
366
+ "right",
367
+ "down",
368
+ "select",
369
+ "print",
370
+ "execute",
371
+ "printscreen",
372
+ "prtsc",
373
+ "insert",
374
+ "delete",
375
+ "help",
376
+ "sleep",
377
+ # Special keys
378
+ "numlock",
379
+ "scrolllock",
380
+ "clear",
381
+ "separator",
382
+ "modechange",
383
+ "apps",
384
+ "browserback",
385
+ "browserfavorites",
386
+ "browserforward",
387
+ "browserhome",
388
+ "browserrefresh",
389
+ "browsersearch",
390
+ "browserstop",
391
+ "launchapp1",
392
+ "launchapp2",
393
+ "launchmail",
394
+ "launchmediaselect",
395
+ "playpause",
396
+ "start",
397
+ "stop",
398
+ "prevtrack",
399
+ "nexttrack",
400
+ "volumemute",
401
+ "volumeup",
402
+ "volumedown",
403
+ "zoom",
404
+ # Modifier keys
405
+ "win",
406
+ "winleft",
407
+ "winright",
408
+ "command",
409
+ "option",
410
+ "optionleft",
411
+ "optionright",
412
+ "fn",
413
+ # Numpad keys
414
+ "num0",
415
+ "num1",
416
+ "num2",
417
+ "num3",
418
+ "num4",
419
+ "num5",
420
+ "num6",
421
+ "num7",
422
+ "num8",
423
+ "num9",
424
+ "multiply",
425
+ "add",
426
+ "subtract",
427
+ "decimal",
428
+ "divide",
429
+ # Function keys
430
+ "f1",
431
+ "f2",
432
+ "f3",
433
+ "f4",
434
+ "f5",
435
+ "f6",
436
+ "f7",
437
+ "f8",
438
+ "f9",
439
+ "f10",
440
+ "f11",
441
+ "f12",
442
+ "f13",
443
+ "f14",
444
+ "f15",
445
+ "f16",
446
+ "f17",
447
+ "f18",
448
+ "f19",
449
+ "f20",
450
+ "f21",
451
+ "f22",
452
+ "f23",
453
+ "f24",
454
+ # Language-specific keys
455
+ "hanguel",
456
+ "hangul",
457
+ "hanja",
458
+ "kana",
459
+ "kanji",
460
+ "junja",
461
+ "convert",
462
+ "nonconvert",
463
+ "yen",
464
+ # Characters
465
+ "\t",
466
+ "\n",
467
+ "\r",
468
+ " ",
469
+ "!",
470
+ '"',
471
+ "#",
472
+ "$",
473
+ "%",
474
+ "&",
475
+ "'",
476
+ "(",
477
+ ")",
478
+ "*",
479
+ "+",
480
+ ",",
481
+ "-",
482
+ ".",
483
+ "/",
484
+ "0",
485
+ "1",
486
+ "2",
487
+ "3",
488
+ "4",
489
+ "5",
490
+ "6",
491
+ "7",
492
+ "8",
493
+ "9",
494
+ ":",
495
+ ";",
496
+ "<",
497
+ "=",
498
+ ">",
499
+ "?",
500
+ "@",
501
+ "[",
502
+ "\\",
503
+ "]",
504
+ "^",
505
+ "_",
506
+ "`",
507
+ "a",
508
+ "b",
509
+ "c",
510
+ "d",
511
+ "e",
512
+ "f",
513
+ "g",
514
+ "h",
515
+ "i",
516
+ "j",
517
+ "k",
518
+ "l",
519
+ "m",
520
+ "n",
521
+ "o",
522
+ "p",
523
+ "q",
524
+ "r",
525
+ "s",
526
+ "t",
527
+ "u",
528
+ "v",
529
+ "w",
530
+ "x",
531
+ "y",
532
+ "z",
533
+ "{",
534
+ "|",
535
+ "}",
536
+ "~",
537
+ ]
538
+
539
+ CLAButton: TypeAlias = Literal["left", "right", "middle", "back", "forward"]