hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show
  1. hud/__init__.py +22 -22
  2. hud/agents/__init__.py +13 -15
  3. hud/agents/base.py +599 -599
  4. hud/agents/claude.py +373 -373
  5. hud/agents/langchain.py +261 -250
  6. hud/agents/misc/__init__.py +7 -7
  7. hud/agents/misc/response_agent.py +82 -80
  8. hud/agents/openai.py +352 -352
  9. hud/agents/openai_chat_generic.py +154 -154
  10. hud/agents/tests/__init__.py +1 -1
  11. hud/agents/tests/test_base.py +742 -742
  12. hud/agents/tests/test_claude.py +324 -324
  13. hud/agents/tests/test_client.py +363 -363
  14. hud/agents/tests/test_openai.py +237 -237
  15. hud/cli/__init__.py +617 -617
  16. hud/cli/__main__.py +8 -8
  17. hud/cli/analyze.py +371 -371
  18. hud/cli/analyze_metadata.py +230 -230
  19. hud/cli/build.py +498 -427
  20. hud/cli/clone.py +185 -185
  21. hud/cli/cursor.py +92 -92
  22. hud/cli/debug.py +392 -392
  23. hud/cli/docker_utils.py +83 -83
  24. hud/cli/init.py +280 -281
  25. hud/cli/interactive.py +353 -353
  26. hud/cli/mcp_server.py +764 -756
  27. hud/cli/pull.py +330 -336
  28. hud/cli/push.py +404 -370
  29. hud/cli/remote_runner.py +311 -311
  30. hud/cli/runner.py +160 -160
  31. hud/cli/tests/__init__.py +3 -3
  32. hud/cli/tests/test_analyze.py +284 -284
  33. hud/cli/tests/test_cli_init.py +265 -265
  34. hud/cli/tests/test_cli_main.py +27 -27
  35. hud/cli/tests/test_clone.py +142 -142
  36. hud/cli/tests/test_cursor.py +253 -253
  37. hud/cli/tests/test_debug.py +453 -453
  38. hud/cli/tests/test_mcp_server.py +139 -139
  39. hud/cli/tests/test_utils.py +388 -388
  40. hud/cli/utils.py +263 -263
  41. hud/clients/README.md +143 -143
  42. hud/clients/__init__.py +16 -16
  43. hud/clients/base.py +378 -379
  44. hud/clients/fastmcp.py +222 -222
  45. hud/clients/mcp_use.py +298 -278
  46. hud/clients/tests/__init__.py +1 -1
  47. hud/clients/tests/test_client_integration.py +111 -111
  48. hud/clients/tests/test_fastmcp.py +342 -342
  49. hud/clients/tests/test_protocol.py +188 -188
  50. hud/clients/utils/__init__.py +1 -1
  51. hud/clients/utils/retry_transport.py +160 -160
  52. hud/datasets.py +327 -322
  53. hud/misc/__init__.py +1 -1
  54. hud/misc/claude_plays_pokemon.py +292 -292
  55. hud/otel/__init__.py +35 -35
  56. hud/otel/collector.py +142 -142
  57. hud/otel/config.py +164 -164
  58. hud/otel/context.py +536 -536
  59. hud/otel/exporters.py +366 -366
  60. hud/otel/instrumentation.py +97 -97
  61. hud/otel/processors.py +118 -118
  62. hud/otel/tests/__init__.py +1 -1
  63. hud/otel/tests/test_processors.py +197 -197
  64. hud/server/__init__.py +5 -5
  65. hud/server/context.py +114 -114
  66. hud/server/helper/__init__.py +5 -5
  67. hud/server/low_level.py +132 -132
  68. hud/server/server.py +170 -166
  69. hud/server/tests/__init__.py +3 -3
  70. hud/settings.py +73 -73
  71. hud/shared/__init__.py +5 -5
  72. hud/shared/exceptions.py +180 -180
  73. hud/shared/requests.py +264 -264
  74. hud/shared/tests/test_exceptions.py +157 -157
  75. hud/shared/tests/test_requests.py +275 -275
  76. hud/telemetry/__init__.py +25 -25
  77. hud/telemetry/instrument.py +379 -379
  78. hud/telemetry/job.py +309 -309
  79. hud/telemetry/replay.py +74 -74
  80. hud/telemetry/trace.py +83 -83
  81. hud/tools/__init__.py +33 -33
  82. hud/tools/base.py +365 -365
  83. hud/tools/bash.py +161 -161
  84. hud/tools/computer/__init__.py +15 -15
  85. hud/tools/computer/anthropic.py +437 -437
  86. hud/tools/computer/hud.py +376 -376
  87. hud/tools/computer/openai.py +295 -295
  88. hud/tools/computer/settings.py +82 -82
  89. hud/tools/edit.py +314 -314
  90. hud/tools/executors/__init__.py +30 -30
  91. hud/tools/executors/base.py +539 -539
  92. hud/tools/executors/pyautogui.py +621 -621
  93. hud/tools/executors/tests/__init__.py +1 -1
  94. hud/tools/executors/tests/test_base_executor.py +338 -338
  95. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  96. hud/tools/executors/xdo.py +511 -511
  97. hud/tools/playwright.py +412 -412
  98. hud/tools/tests/__init__.py +3 -3
  99. hud/tools/tests/test_base.py +282 -282
  100. hud/tools/tests/test_bash.py +158 -158
  101. hud/tools/tests/test_bash_extended.py +197 -197
  102. hud/tools/tests/test_computer.py +425 -425
  103. hud/tools/tests/test_computer_actions.py +34 -34
  104. hud/tools/tests/test_edit.py +259 -259
  105. hud/tools/tests/test_init.py +27 -27
  106. hud/tools/tests/test_playwright_tool.py +183 -183
  107. hud/tools/tests/test_tools.py +145 -145
  108. hud/tools/tests/test_utils.py +156 -156
  109. hud/tools/types.py +72 -72
  110. hud/tools/utils.py +50 -50
  111. hud/types.py +136 -136
  112. hud/utils/__init__.py +10 -10
  113. hud/utils/async_utils.py +65 -65
  114. hud/utils/design.py +236 -168
  115. hud/utils/mcp.py +55 -55
  116. hud/utils/progress.py +149 -149
  117. hud/utils/telemetry.py +66 -66
  118. hud/utils/tests/test_async_utils.py +173 -173
  119. hud/utils/tests/test_init.py +17 -17
  120. hud/utils/tests/test_progress.py +261 -261
  121. hud/utils/tests/test_telemetry.py +82 -82
  122. hud/utils/tests/test_version.py +8 -8
  123. hud/version.py +7 -7
  124. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
  125. hud_python-0.4.3.dist-info/RECORD +131 -0
  126. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
  127. hud/agents/art.py +0 -101
  128. hud_python-0.4.1.dist-info/RECORD +0 -132
  129. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
  130. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
@@ -1,539 +1,539 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import logging
5
- from typing import Literal, TypeAlias
6
-
7
- from hud.tools.types import ContentResult
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
-
12
- class BaseExecutor:
13
- """
14
- Base executor that provides simulation implementations for all CLA (Common Language Actions).
15
-
16
- This class:
17
- 1. Defines all action methods that HudComputer expects
18
- 2. Provides simulation implementations for environments without display
19
- 3. Serves as the base class for platform-specific executors (XDO, PyAutoGUI)
20
-
21
- When used directly, it simulates all actions. Subclasses provide real implementations.
22
- """
23
-
24
- def __init__(self, display_num: int | None = None) -> None:
25
- """
26
- Initialize the base executor.
27
-
28
- Args:
29
- display_num: X display number (for Linux/X11 systems)
30
- """
31
- if display_num is None:
32
- from hud.tools.computer.settings import computer_settings
33
-
34
- self.display_num = computer_settings.DISPLAY_NUM
35
- else:
36
- self.display_num = display_num
37
- self._screenshot_delay = 0.5
38
- logger.info("BaseExecutor initialized")
39
-
40
- # ===== Core CLA Actions =====
41
-
42
- async def click(
43
- self,
44
- x: int | None = None,
45
- y: int | None = None,
46
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
47
- pattern: list[int] | None = None,
48
- hold_keys: list[str] | None = None,
49
- take_screenshot: bool = True,
50
- ) -> ContentResult:
51
- """
52
- Click at specified coordinates.
53
-
54
- Args:
55
- x, y: Coordinates to click at (None = current position)
56
- button: Mouse button to use
57
- pattern: List of delays for multi-clicks (e.g., [100] for double-click)
58
- hold_keys: Keys to hold during click
59
- take_screenshot: Whether to capture screenshot after action
60
- """
61
- msg = f"[SIMULATED] Click at ({x}, {y}) with {button} button"
62
- if pattern:
63
- msg += f" (multi-click pattern: {pattern})"
64
- if hold_keys:
65
- msg += f" while holding {hold_keys}"
66
-
67
- screenshot = await self.screenshot() if take_screenshot else None
68
- return ContentResult(output=msg, base64_image=screenshot)
69
-
70
- async def write(
71
- self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
72
- ) -> ContentResult:
73
- """
74
- Type text using keyboard.
75
-
76
- Args:
77
- text: Text to type
78
- enter_after: Whether to press Enter after typing
79
- delay: Delay between keystrokes in milliseconds
80
- take_screenshot: Whether to capture screenshot after action
81
- """
82
- msg = f"[SIMULATED] Type '{text}'"
83
- if enter_after:
84
- msg += " followed by Enter"
85
-
86
- screenshot = await self.screenshot() if take_screenshot else None
87
- return ContentResult(output=msg, base64_image=screenshot)
88
-
89
- async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
90
- """
91
- Press a key combination (hotkey).
92
-
93
- Args:
94
- keys: List of keys to press together (e.g., ["ctrl", "c"])
95
- take_screenshot: Whether to capture screenshot after action
96
- """
97
- key_combo = "+".join(keys)
98
- msg = f"[SIMULATED] Press key combination: {key_combo}"
99
-
100
- screenshot = await self.screenshot() if take_screenshot else None
101
- return ContentResult(output=msg, base64_image=screenshot)
102
-
103
- async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
104
- """
105
- Press a single key or key combination.
106
-
107
- Args:
108
- key_sequence: Key or combination like "Return" or "ctrl+a"
109
- take_screenshot: Whether to capture screenshot after action
110
- """
111
- msg = f"[SIMULATED] Press key: {key_sequence}"
112
-
113
- screenshot = await self.screenshot() if take_screenshot else None
114
- return ContentResult(output=msg, base64_image=screenshot)
115
-
116
- async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
117
- """
118
- Press and hold keys.
119
-
120
- Args:
121
- keys: Keys to press and hold
122
- take_screenshot: Whether to capture screenshot after action
123
- """
124
- msg = f"[SIMULATED] Key down: {', '.join(keys)}"
125
-
126
- screenshot = await self.screenshot() if take_screenshot else None
127
- return ContentResult(output=msg, base64_image=screenshot)
128
-
129
- async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
130
- """
131
- Release held keys.
132
-
133
- Args:
134
- keys: Keys to release
135
- take_screenshot: Whether to capture screenshot after action
136
- """
137
- msg = f"[SIMULATED] Key up: {', '.join(keys)}"
138
-
139
- screenshot = await self.screenshot() if take_screenshot else None
140
- return ContentResult(output=msg, base64_image=screenshot)
141
-
142
- async def scroll(
143
- self,
144
- x: int | None = None,
145
- y: int | None = None,
146
- scroll_x: int | None = None,
147
- scroll_y: int | None = None,
148
- hold_keys: list[str] | None = None,
149
- take_screenshot: bool = True,
150
- ) -> ContentResult:
151
- """
152
- Scroll at specified position.
153
-
154
- Args:
155
- x, y: Position to scroll at (None = current position)
156
- scroll_x: Horizontal scroll amount (positive = right)
157
- scroll_y: Vertical scroll amount (positive = down)
158
- hold_keys: Keys to hold during scroll
159
- take_screenshot: Whether to capture screenshot after action
160
- """
161
- msg = "[SIMULATED] Scroll"
162
- if x is not None and y is not None:
163
- msg += f" at ({x}, {y})"
164
- if scroll_x:
165
- msg += f" horizontally by {scroll_x}"
166
- if scroll_y:
167
- msg += f" vertically by {scroll_y}"
168
- if hold_keys:
169
- msg += f" while holding {hold_keys}"
170
-
171
- screenshot = await self.screenshot() if take_screenshot else None
172
- return ContentResult(output=msg, base64_image=screenshot)
173
-
174
- async def move(
175
- self,
176
- x: int | None = None,
177
- y: int | None = None,
178
- offset_x: int | None = None,
179
- offset_y: int | None = None,
180
- take_screenshot: bool = True,
181
- ) -> ContentResult:
182
- """
183
- Move mouse cursor.
184
-
185
- Args:
186
- x, y: Absolute coordinates to move to
187
- offset_x, offset_y: Relative offset from current position
188
- take_screenshot: Whether to capture screenshot after action
189
- """
190
- if x is not None and y is not None:
191
- msg = f"[SIMULATED] Move mouse to ({x}, {y})"
192
- elif offset_x is not None or offset_y is not None:
193
- msg = f"[SIMULATED] Move mouse by offset ({offset_x or 0}, {offset_y or 0})"
194
- else:
195
- msg = "[SIMULATED] Move mouse (no coordinates specified)"
196
-
197
- screenshot = await self.screenshot() if take_screenshot else None
198
- return ContentResult(output=msg, base64_image=screenshot)
199
-
200
- async def drag(
201
- self,
202
- path: list[tuple[int, int]],
203
- pattern: list[int] | None = None,
204
- hold_keys: list[str] | None = None,
205
- take_screenshot: bool = True,
206
- ) -> ContentResult:
207
- """
208
- Drag along a path.
209
-
210
- Args:
211
- path: List of (x, y) coordinates defining the drag path
212
- pattern: Delays between path points in milliseconds
213
- hold_keys: Keys to hold during drag
214
- take_screenshot: Whether to capture screenshot after action
215
- """
216
- if len(path) < 2:
217
- return ContentResult(error="Drag path must have at least 2 points")
218
-
219
- start = path[0]
220
- end = path[-1]
221
- msg = f"[SIMULATED] Drag from {start} to {end}"
222
- if len(path) > 2:
223
- msg += f" via {len(path) - 2} intermediate points"
224
- if hold_keys:
225
- msg += f" while holding {hold_keys}"
226
-
227
- screenshot = await self.screenshot() if take_screenshot else None
228
- return ContentResult(output=msg, base64_image=screenshot)
229
-
230
- async def mouse_down(
231
- self,
232
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
233
- take_screenshot: bool = True,
234
- ) -> ContentResult:
235
- """
236
- Press and hold a mouse button.
237
-
238
- Args:
239
- button: Mouse button to press
240
- take_screenshot: Whether to capture screenshot after action
241
- """
242
- msg = f"[SIMULATED] Mouse down: {button} button"
243
-
244
- screenshot = await self.screenshot() if take_screenshot else None
245
- return ContentResult(output=msg, base64_image=screenshot)
246
-
247
- async def mouse_up(
248
- self,
249
- button: Literal["left", "right", "middle", "back", "forward"] = "left",
250
- take_screenshot: bool = True,
251
- ) -> ContentResult:
252
- """
253
- Release a mouse button.
254
-
255
- Args:
256
- button: Mouse button to release
257
- take_screenshot: Whether to capture screenshot after action
258
- """
259
- msg = f"[SIMULATED] Mouse up: {button} button"
260
-
261
- screenshot = await self.screenshot() if take_screenshot else None
262
- return ContentResult(output=msg, base64_image=screenshot)
263
-
264
- async def hold_key(
265
- self, key: str, duration: float, take_screenshot: bool = True
266
- ) -> ContentResult:
267
- """
268
- Hold a key for a specified duration.
269
-
270
- Args:
271
- key: The key to hold
272
- duration: Duration in seconds
273
- take_screenshot: Whether to capture screenshot after action
274
- """
275
- msg = f"[SIMULATED] Hold key '{key}' for {duration} seconds"
276
- await asyncio.sleep(duration) # Simulate the wait
277
-
278
- screenshot = await self.screenshot() if take_screenshot else None
279
- return ContentResult(output=msg, base64_image=screenshot)
280
-
281
- # ===== Utility Actions =====
282
-
283
- async def wait(self, time: int) -> ContentResult:
284
- """
285
- Wait for specified time.
286
-
287
- Args:
288
- time: Time to wait in milliseconds
289
- """
290
- duration_seconds = time / 1000.0
291
- await asyncio.sleep(duration_seconds)
292
- return ContentResult(output=f"Waited {time}ms")
293
-
294
- async def screenshot(self) -> str | None:
295
- """
296
- Take a screenshot and return base64 encoded image.
297
-
298
- Returns:
299
- Base64 encoded PNG image or None if failed
300
- """
301
- logger.info("[SIMULATION] Taking screenshot")
302
- return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
303
-
304
- async def position(self) -> ContentResult:
305
- """
306
- Get current cursor position.
307
-
308
- Returns:
309
- ToolResult with position information
310
- """
311
- return ContentResult(output="[SIMULATED] Mouse position: (0, 0)")
312
-
313
- # ===== Legacy/Compatibility Methods =====
314
-
315
- async def execute(self, command: str, take_screenshot: bool = True) -> ContentResult:
316
- """
317
- Execute a raw command (for backwards compatibility).
318
-
319
- Args:
320
- command: Command to execute
321
- take_screenshot: Whether to capture screenshot after action
322
- """
323
- msg = f"[SIMULATED] Execute: {command}"
324
- screenshot = await self.screenshot() if take_screenshot else None
325
- return ContentResult(output=msg, base64_image=screenshot)
326
-
327
- # Compatibility aliases
328
- async def type_text(
329
- self, text: str, delay: int = 12, take_screenshot: bool = True
330
- ) -> ContentResult:
331
- """Alias for type() to maintain compatibility."""
332
- return await self.write(
333
- text, enter_after=False, delay=delay, take_screenshot=take_screenshot
334
- )
335
-
336
- async def mouse_move(self, x: int, y: int, take_screenshot: bool = True) -> ContentResult:
337
- """Alias for move() to maintain compatibility."""
338
- return await self.move(x=x, y=y, take_screenshot=take_screenshot)
339
-
340
-
341
- CLAKey: TypeAlias = Literal[
342
- # Control keys
343
- "backspace",
344
- "tab",
345
- "enter",
346
- "shift",
347
- "shiftleft",
348
- "shiftright",
349
- "ctrl",
350
- "ctrlleft",
351
- "ctrlright",
352
- "alt",
353
- "altleft",
354
- "altright",
355
- "pause",
356
- "capslock",
357
- "esc",
358
- "escape",
359
- "space",
360
- "pageup",
361
- "pagedown",
362
- "end",
363
- "home",
364
- "left",
365
- "up",
366
- "right",
367
- "down",
368
- "select",
369
- "print",
370
- "execute",
371
- "printscreen",
372
- "prtsc",
373
- "insert",
374
- "delete",
375
- "help",
376
- "sleep",
377
- # Special keys
378
- "numlock",
379
- "scrolllock",
380
- "clear",
381
- "separator",
382
- "modechange",
383
- "apps",
384
- "browserback",
385
- "browserfavorites",
386
- "browserforward",
387
- "browserhome",
388
- "browserrefresh",
389
- "browsersearch",
390
- "browserstop",
391
- "launchapp1",
392
- "launchapp2",
393
- "launchmail",
394
- "launchmediaselect",
395
- "playpause",
396
- "start",
397
- "stop",
398
- "prevtrack",
399
- "nexttrack",
400
- "volumemute",
401
- "volumeup",
402
- "volumedown",
403
- "zoom",
404
- # Modifier keys
405
- "win",
406
- "winleft",
407
- "winright",
408
- "command",
409
- "option",
410
- "optionleft",
411
- "optionright",
412
- "fn",
413
- # Numpad keys
414
- "num0",
415
- "num1",
416
- "num2",
417
- "num3",
418
- "num4",
419
- "num5",
420
- "num6",
421
- "num7",
422
- "num8",
423
- "num9",
424
- "multiply",
425
- "add",
426
- "subtract",
427
- "decimal",
428
- "divide",
429
- # Function keys
430
- "f1",
431
- "f2",
432
- "f3",
433
- "f4",
434
- "f5",
435
- "f6",
436
- "f7",
437
- "f8",
438
- "f9",
439
- "f10",
440
- "f11",
441
- "f12",
442
- "f13",
443
- "f14",
444
- "f15",
445
- "f16",
446
- "f17",
447
- "f18",
448
- "f19",
449
- "f20",
450
- "f21",
451
- "f22",
452
- "f23",
453
- "f24",
454
- # Language-specific keys
455
- "hanguel",
456
- "hangul",
457
- "hanja",
458
- "kana",
459
- "kanji",
460
- "junja",
461
- "convert",
462
- "nonconvert",
463
- "yen",
464
- # Characters
465
- "\t",
466
- "\n",
467
- "\r",
468
- " ",
469
- "!",
470
- '"',
471
- "#",
472
- "$",
473
- "%",
474
- "&",
475
- "'",
476
- "(",
477
- ")",
478
- "*",
479
- "+",
480
- ",",
481
- "-",
482
- ".",
483
- "/",
484
- "0",
485
- "1",
486
- "2",
487
- "3",
488
- "4",
489
- "5",
490
- "6",
491
- "7",
492
- "8",
493
- "9",
494
- ":",
495
- ";",
496
- "<",
497
- "=",
498
- ">",
499
- "?",
500
- "@",
501
- "[",
502
- "\\",
503
- "]",
504
- "^",
505
- "_",
506
- "`",
507
- "a",
508
- "b",
509
- "c",
510
- "d",
511
- "e",
512
- "f",
513
- "g",
514
- "h",
515
- "i",
516
- "j",
517
- "k",
518
- "l",
519
- "m",
520
- "n",
521
- "o",
522
- "p",
523
- "q",
524
- "r",
525
- "s",
526
- "t",
527
- "u",
528
- "v",
529
- "w",
530
- "x",
531
- "y",
532
- "z",
533
- "{",
534
- "|",
535
- "}",
536
- "~",
537
- ]
538
-
539
- CLAButton: TypeAlias = Literal["left", "right", "middle", "back", "forward"]
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import Literal, TypeAlias
6
+
7
+ from hud.tools.types import ContentResult
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class BaseExecutor:
13
+ """
14
+ Base executor that provides simulation implementations for all CLA (Common Language Actions).
15
+
16
+ This class:
17
+ 1. Defines all action methods that HudComputer expects
18
+ 2. Provides simulation implementations for environments without display
19
+ 3. Serves as the base class for platform-specific executors (XDO, PyAutoGUI)
20
+
21
+ When used directly, it simulates all actions. Subclasses provide real implementations.
22
+ """
23
+
24
+ def __init__(self, display_num: int | None = None) -> None:
25
+ """
26
+ Initialize the base executor.
27
+
28
+ Args:
29
+ display_num: X display number (for Linux/X11 systems)
30
+ """
31
+ if display_num is None:
32
+ from hud.tools.computer.settings import computer_settings
33
+
34
+ self.display_num = computer_settings.DISPLAY_NUM
35
+ else:
36
+ self.display_num = display_num
37
+ self._screenshot_delay = 0.5
38
+ logger.info("BaseExecutor initialized")
39
+
40
+ # ===== Core CLA Actions =====
41
+
42
+ async def click(
43
+ self,
44
+ x: int | None = None,
45
+ y: int | None = None,
46
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
47
+ pattern: list[int] | None = None,
48
+ hold_keys: list[str] | None = None,
49
+ take_screenshot: bool = True,
50
+ ) -> ContentResult:
51
+ """
52
+ Click at specified coordinates.
53
+
54
+ Args:
55
+ x, y: Coordinates to click at (None = current position)
56
+ button: Mouse button to use
57
+ pattern: List of delays for multi-clicks (e.g., [100] for double-click)
58
+ hold_keys: Keys to hold during click
59
+ take_screenshot: Whether to capture screenshot after action
60
+ """
61
+ msg = f"[SIMULATED] Click at ({x}, {y}) with {button} button"
62
+ if pattern:
63
+ msg += f" (multi-click pattern: {pattern})"
64
+ if hold_keys:
65
+ msg += f" while holding {hold_keys}"
66
+
67
+ screenshot = await self.screenshot() if take_screenshot else None
68
+ return ContentResult(output=msg, base64_image=screenshot)
69
+
70
+ async def write(
71
+ self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
72
+ ) -> ContentResult:
73
+ """
74
+ Type text using keyboard.
75
+
76
+ Args:
77
+ text: Text to type
78
+ enter_after: Whether to press Enter after typing
79
+ delay: Delay between keystrokes in milliseconds
80
+ take_screenshot: Whether to capture screenshot after action
81
+ """
82
+ msg = f"[SIMULATED] Type '{text}'"
83
+ if enter_after:
84
+ msg += " followed by Enter"
85
+
86
+ screenshot = await self.screenshot() if take_screenshot else None
87
+ return ContentResult(output=msg, base64_image=screenshot)
88
+
89
+ async def press(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
90
+ """
91
+ Press a key combination (hotkey).
92
+
93
+ Args:
94
+ keys: List of keys to press together (e.g., ["ctrl", "c"])
95
+ take_screenshot: Whether to capture screenshot after action
96
+ """
97
+ key_combo = "+".join(keys)
98
+ msg = f"[SIMULATED] Press key combination: {key_combo}"
99
+
100
+ screenshot = await self.screenshot() if take_screenshot else None
101
+ return ContentResult(output=msg, base64_image=screenshot)
102
+
103
+ async def key(self, key_sequence: str, take_screenshot: bool = True) -> ContentResult:
104
+ """
105
+ Press a single key or key combination.
106
+
107
+ Args:
108
+ key_sequence: Key or combination like "Return" or "ctrl+a"
109
+ take_screenshot: Whether to capture screenshot after action
110
+ """
111
+ msg = f"[SIMULATED] Press key: {key_sequence}"
112
+
113
+ screenshot = await self.screenshot() if take_screenshot else None
114
+ return ContentResult(output=msg, base64_image=screenshot)
115
+
116
+ async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
117
+ """
118
+ Press and hold keys.
119
+
120
+ Args:
121
+ keys: Keys to press and hold
122
+ take_screenshot: Whether to capture screenshot after action
123
+ """
124
+ msg = f"[SIMULATED] Key down: {', '.join(keys)}"
125
+
126
+ screenshot = await self.screenshot() if take_screenshot else None
127
+ return ContentResult(output=msg, base64_image=screenshot)
128
+
129
+ async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ContentResult:
130
+ """
131
+ Release held keys.
132
+
133
+ Args:
134
+ keys: Keys to release
135
+ take_screenshot: Whether to capture screenshot after action
136
+ """
137
+ msg = f"[SIMULATED] Key up: {', '.join(keys)}"
138
+
139
+ screenshot = await self.screenshot() if take_screenshot else None
140
+ return ContentResult(output=msg, base64_image=screenshot)
141
+
142
+ async def scroll(
143
+ self,
144
+ x: int | None = None,
145
+ y: int | None = None,
146
+ scroll_x: int | None = None,
147
+ scroll_y: int | None = None,
148
+ hold_keys: list[str] | None = None,
149
+ take_screenshot: bool = True,
150
+ ) -> ContentResult:
151
+ """
152
+ Scroll at specified position.
153
+
154
+ Args:
155
+ x, y: Position to scroll at (None = current position)
156
+ scroll_x: Horizontal scroll amount (positive = right)
157
+ scroll_y: Vertical scroll amount (positive = down)
158
+ hold_keys: Keys to hold during scroll
159
+ take_screenshot: Whether to capture screenshot after action
160
+ """
161
+ msg = "[SIMULATED] Scroll"
162
+ if x is not None and y is not None:
163
+ msg += f" at ({x}, {y})"
164
+ if scroll_x:
165
+ msg += f" horizontally by {scroll_x}"
166
+ if scroll_y:
167
+ msg += f" vertically by {scroll_y}"
168
+ if hold_keys:
169
+ msg += f" while holding {hold_keys}"
170
+
171
+ screenshot = await self.screenshot() if take_screenshot else None
172
+ return ContentResult(output=msg, base64_image=screenshot)
173
+
174
+ async def move(
175
+ self,
176
+ x: int | None = None,
177
+ y: int | None = None,
178
+ offset_x: int | None = None,
179
+ offset_y: int | None = None,
180
+ take_screenshot: bool = True,
181
+ ) -> ContentResult:
182
+ """
183
+ Move mouse cursor.
184
+
185
+ Args:
186
+ x, y: Absolute coordinates to move to
187
+ offset_x, offset_y: Relative offset from current position
188
+ take_screenshot: Whether to capture screenshot after action
189
+ """
190
+ if x is not None and y is not None:
191
+ msg = f"[SIMULATED] Move mouse to ({x}, {y})"
192
+ elif offset_x is not None or offset_y is not None:
193
+ msg = f"[SIMULATED] Move mouse by offset ({offset_x or 0}, {offset_y or 0})"
194
+ else:
195
+ msg = "[SIMULATED] Move mouse (no coordinates specified)"
196
+
197
+ screenshot = await self.screenshot() if take_screenshot else None
198
+ return ContentResult(output=msg, base64_image=screenshot)
199
+
200
+ async def drag(
201
+ self,
202
+ path: list[tuple[int, int]],
203
+ pattern: list[int] | None = None,
204
+ hold_keys: list[str] | None = None,
205
+ take_screenshot: bool = True,
206
+ ) -> ContentResult:
207
+ """
208
+ Drag along a path.
209
+
210
+ Args:
211
+ path: List of (x, y) coordinates defining the drag path
212
+ pattern: Delays between path points in milliseconds
213
+ hold_keys: Keys to hold during drag
214
+ take_screenshot: Whether to capture screenshot after action
215
+ """
216
+ if len(path) < 2:
217
+ return ContentResult(error="Drag path must have at least 2 points")
218
+
219
+ start = path[0]
220
+ end = path[-1]
221
+ msg = f"[SIMULATED] Drag from {start} to {end}"
222
+ if len(path) > 2:
223
+ msg += f" via {len(path) - 2} intermediate points"
224
+ if hold_keys:
225
+ msg += f" while holding {hold_keys}"
226
+
227
+ screenshot = await self.screenshot() if take_screenshot else None
228
+ return ContentResult(output=msg, base64_image=screenshot)
229
+
230
+ async def mouse_down(
231
+ self,
232
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
233
+ take_screenshot: bool = True,
234
+ ) -> ContentResult:
235
+ """
236
+ Press and hold a mouse button.
237
+
238
+ Args:
239
+ button: Mouse button to press
240
+ take_screenshot: Whether to capture screenshot after action
241
+ """
242
+ msg = f"[SIMULATED] Mouse down: {button} button"
243
+
244
+ screenshot = await self.screenshot() if take_screenshot else None
245
+ return ContentResult(output=msg, base64_image=screenshot)
246
+
247
+ async def mouse_up(
248
+ self,
249
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
250
+ take_screenshot: bool = True,
251
+ ) -> ContentResult:
252
+ """
253
+ Release a mouse button.
254
+
255
+ Args:
256
+ button: Mouse button to release
257
+ take_screenshot: Whether to capture screenshot after action
258
+ """
259
+ msg = f"[SIMULATED] Mouse up: {button} button"
260
+
261
+ screenshot = await self.screenshot() if take_screenshot else None
262
+ return ContentResult(output=msg, base64_image=screenshot)
263
+
264
+ async def hold_key(
265
+ self, key: str, duration: float, take_screenshot: bool = True
266
+ ) -> ContentResult:
267
+ """
268
+ Hold a key for a specified duration.
269
+
270
+ Args:
271
+ key: The key to hold
272
+ duration: Duration in seconds
273
+ take_screenshot: Whether to capture screenshot after action
274
+ """
275
+ msg = f"[SIMULATED] Hold key '{key}' for {duration} seconds"
276
+ await asyncio.sleep(duration) # Simulate the wait
277
+
278
+ screenshot = await self.screenshot() if take_screenshot else None
279
+ return ContentResult(output=msg, base64_image=screenshot)
280
+
281
+ # ===== Utility Actions =====
282
+
283
+ async def wait(self, time: int) -> ContentResult:
284
+ """
285
+ Wait for specified time.
286
+
287
+ Args:
288
+ time: Time to wait in milliseconds
289
+ """
290
+ duration_seconds = time / 1000.0
291
+ await asyncio.sleep(duration_seconds)
292
+ return ContentResult(output=f"Waited {time}ms")
293
+
294
+ async def screenshot(self) -> str | None:
295
+ """
296
+ Take a screenshot and return base64 encoded image.
297
+
298
+ Returns:
299
+ Base64 encoded PNG image or None if failed
300
+ """
301
+ logger.info("[SIMULATION] Taking screenshot")
302
+ return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
303
+
304
+ async def position(self) -> ContentResult:
305
+ """
306
+ Get current cursor position.
307
+
308
+ Returns:
309
+ ToolResult with position information
310
+ """
311
+ return ContentResult(output="[SIMULATED] Mouse position: (0, 0)")
312
+
313
+ # ===== Legacy/Compatibility Methods =====
314
+
315
+ async def execute(self, command: str, take_screenshot: bool = True) -> ContentResult:
316
+ """
317
+ Execute a raw command (for backwards compatibility).
318
+
319
+ Args:
320
+ command: Command to execute
321
+ take_screenshot: Whether to capture screenshot after action
322
+ """
323
+ msg = f"[SIMULATED] Execute: {command}"
324
+ screenshot = await self.screenshot() if take_screenshot else None
325
+ return ContentResult(output=msg, base64_image=screenshot)
326
+
327
+ # Compatibility aliases
328
+ async def type_text(
329
+ self, text: str, delay: int = 12, take_screenshot: bool = True
330
+ ) -> ContentResult:
331
+ """Alias for type() to maintain compatibility."""
332
+ return await self.write(
333
+ text, enter_after=False, delay=delay, take_screenshot=take_screenshot
334
+ )
335
+
336
+ async def mouse_move(self, x: int, y: int, take_screenshot: bool = True) -> ContentResult:
337
+ """Alias for move() to maintain compatibility."""
338
+ return await self.move(x=x, y=y, take_screenshot=take_screenshot)
339
+
340
+
341
+ CLAKey: TypeAlias = Literal[
342
+ # Control keys
343
+ "backspace",
344
+ "tab",
345
+ "enter",
346
+ "shift",
347
+ "shiftleft",
348
+ "shiftright",
349
+ "ctrl",
350
+ "ctrlleft",
351
+ "ctrlright",
352
+ "alt",
353
+ "altleft",
354
+ "altright",
355
+ "pause",
356
+ "capslock",
357
+ "esc",
358
+ "escape",
359
+ "space",
360
+ "pageup",
361
+ "pagedown",
362
+ "end",
363
+ "home",
364
+ "left",
365
+ "up",
366
+ "right",
367
+ "down",
368
+ "select",
369
+ "print",
370
+ "execute",
371
+ "printscreen",
372
+ "prtsc",
373
+ "insert",
374
+ "delete",
375
+ "help",
376
+ "sleep",
377
+ # Special keys
378
+ "numlock",
379
+ "scrolllock",
380
+ "clear",
381
+ "separator",
382
+ "modechange",
383
+ "apps",
384
+ "browserback",
385
+ "browserfavorites",
386
+ "browserforward",
387
+ "browserhome",
388
+ "browserrefresh",
389
+ "browsersearch",
390
+ "browserstop",
391
+ "launchapp1",
392
+ "launchapp2",
393
+ "launchmail",
394
+ "launchmediaselect",
395
+ "playpause",
396
+ "start",
397
+ "stop",
398
+ "prevtrack",
399
+ "nexttrack",
400
+ "volumemute",
401
+ "volumeup",
402
+ "volumedown",
403
+ "zoom",
404
+ # Modifier keys
405
+ "win",
406
+ "winleft",
407
+ "winright",
408
+ "command",
409
+ "option",
410
+ "optionleft",
411
+ "optionright",
412
+ "fn",
413
+ # Numpad keys
414
+ "num0",
415
+ "num1",
416
+ "num2",
417
+ "num3",
418
+ "num4",
419
+ "num5",
420
+ "num6",
421
+ "num7",
422
+ "num8",
423
+ "num9",
424
+ "multiply",
425
+ "add",
426
+ "subtract",
427
+ "decimal",
428
+ "divide",
429
+ # Function keys
430
+ "f1",
431
+ "f2",
432
+ "f3",
433
+ "f4",
434
+ "f5",
435
+ "f6",
436
+ "f7",
437
+ "f8",
438
+ "f9",
439
+ "f10",
440
+ "f11",
441
+ "f12",
442
+ "f13",
443
+ "f14",
444
+ "f15",
445
+ "f16",
446
+ "f17",
447
+ "f18",
448
+ "f19",
449
+ "f20",
450
+ "f21",
451
+ "f22",
452
+ "f23",
453
+ "f24",
454
+ # Language-specific keys
455
+ "hanguel",
456
+ "hangul",
457
+ "hanja",
458
+ "kana",
459
+ "kanji",
460
+ "junja",
461
+ "convert",
462
+ "nonconvert",
463
+ "yen",
464
+ # Characters
465
+ "\t",
466
+ "\n",
467
+ "\r",
468
+ " ",
469
+ "!",
470
+ '"',
471
+ "#",
472
+ "$",
473
+ "%",
474
+ "&",
475
+ "'",
476
+ "(",
477
+ ")",
478
+ "*",
479
+ "+",
480
+ ",",
481
+ "-",
482
+ ".",
483
+ "/",
484
+ "0",
485
+ "1",
486
+ "2",
487
+ "3",
488
+ "4",
489
+ "5",
490
+ "6",
491
+ "7",
492
+ "8",
493
+ "9",
494
+ ":",
495
+ ";",
496
+ "<",
497
+ "=",
498
+ ">",
499
+ "?",
500
+ "@",
501
+ "[",
502
+ "\\",
503
+ "]",
504
+ "^",
505
+ "_",
506
+ "`",
507
+ "a",
508
+ "b",
509
+ "c",
510
+ "d",
511
+ "e",
512
+ "f",
513
+ "g",
514
+ "h",
515
+ "i",
516
+ "j",
517
+ "k",
518
+ "l",
519
+ "m",
520
+ "n",
521
+ "o",
522
+ "p",
523
+ "q",
524
+ "r",
525
+ "s",
526
+ "t",
527
+ "u",
528
+ "v",
529
+ "w",
530
+ "x",
531
+ "y",
532
+ "z",
533
+ "{",
534
+ "|",
535
+ "}",
536
+ "~",
537
+ ]
538
+
539
+ CLAButton: TypeAlias = Literal["left", "right", "middle", "back", "forward"]