hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (86) hide show
  1. hud/__init__.py +20 -8
  2. hud/adapters/common/adapter.py +14 -3
  3. hud/adapters/common/tests/test_adapter.py +16 -4
  4. hud/datasets.py +188 -0
  5. hud/env/docker_client.py +15 -3
  6. hud/env/environment.py +10 -7
  7. hud/env/local_docker_client.py +29 -7
  8. hud/env/remote_client.py +1 -1
  9. hud/env/remote_docker_client.py +2 -2
  10. hud/exceptions.py +2 -1
  11. hud/gym.py +0 -9
  12. hud/mcp/__init__.py +17 -0
  13. hud/mcp/base.py +631 -0
  14. hud/mcp/claude.py +321 -0
  15. hud/mcp/client.py +312 -0
  16. hud/mcp/langchain.py +250 -0
  17. hud/mcp/openai.py +334 -0
  18. hud/mcp/tests/__init__.py +1 -0
  19. hud/mcp/tests/test_base.py +512 -0
  20. hud/mcp/tests/test_claude.py +294 -0
  21. hud/mcp/tests/test_client.py +324 -0
  22. hud/mcp/tests/test_openai.py +238 -0
  23. hud/settings.py +20 -2
  24. hud/task.py +5 -88
  25. hud/taskset.py +2 -23
  26. hud/telemetry/__init__.py +16 -7
  27. hud/telemetry/_trace.py +246 -72
  28. hud/telemetry/context.py +88 -27
  29. hud/telemetry/exporter.py +171 -11
  30. hud/telemetry/instrumentation/mcp.py +174 -410
  31. hud/telemetry/job.py +141 -0
  32. hud/telemetry/mcp_models.py +13 -74
  33. hud/telemetry/tests/test_context.py +9 -6
  34. hud/telemetry/tests/test_trace.py +120 -78
  35. hud/tools/__init__.py +34 -0
  36. hud/tools/base.py +65 -0
  37. hud/tools/bash.py +137 -0
  38. hud/tools/computer/__init__.py +13 -0
  39. hud/tools/computer/anthropic.py +411 -0
  40. hud/tools/computer/hud.py +315 -0
  41. hud/tools/computer/openai.py +283 -0
  42. hud/tools/edit.py +290 -0
  43. hud/tools/executors/__init__.py +30 -0
  44. hud/tools/executors/base.py +331 -0
  45. hud/tools/executors/pyautogui.py +619 -0
  46. hud/tools/executors/tests/__init__.py +1 -0
  47. hud/tools/executors/tests/test_base_executor.py +338 -0
  48. hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
  49. hud/tools/executors/xdo.py +503 -0
  50. hud/tools/helper/README.md +56 -0
  51. hud/tools/helper/__init__.py +9 -0
  52. hud/tools/helper/mcp_server.py +78 -0
  53. hud/tools/helper/server_initialization.py +115 -0
  54. hud/tools/helper/utils.py +58 -0
  55. hud/tools/playwright_tool.py +379 -0
  56. hud/tools/tests/__init__.py +3 -0
  57. hud/tools/tests/test_bash.py +152 -0
  58. hud/tools/tests/test_computer.py +52 -0
  59. hud/tools/tests/test_computer_actions.py +34 -0
  60. hud/tools/tests/test_edit.py +240 -0
  61. hud/tools/tests/test_init.py +27 -0
  62. hud/tools/tests/test_playwright_tool.py +183 -0
  63. hud/tools/tests/test_tools.py +157 -0
  64. hud/tools/tests/test_utils.py +156 -0
  65. hud/tools/utils.py +50 -0
  66. hud/trajectory.py +5 -1
  67. hud/types.py +10 -1
  68. hud/utils/tests/test_init.py +21 -0
  69. hud/utils/tests/test_version.py +1 -1
  70. hud/version.py +1 -1
  71. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
  72. hud_python-0.3.1.dist-info/RECORD +119 -0
  73. hud/evaluators/__init__.py +0 -9
  74. hud/evaluators/base.py +0 -32
  75. hud/evaluators/inspect.py +0 -24
  76. hud/evaluators/judge.py +0 -189
  77. hud/evaluators/match.py +0 -156
  78. hud/evaluators/remote.py +0 -65
  79. hud/evaluators/tests/__init__.py +0 -0
  80. hud/evaluators/tests/test_inspect.py +0 -12
  81. hud/evaluators/tests/test_judge.py +0 -231
  82. hud/evaluators/tests/test_match.py +0 -115
  83. hud/evaluators/tests/test_remote.py +0 -98
  84. hud_python-0.2.10.dist-info/RECORD +0 -85
  85. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
  86. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/tools/edit.py ADDED
@@ -0,0 +1,290 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from pathlib import Path
5
+ from typing import Any, Literal, get_args
6
+
7
+ from .base import CLIResult, ToolError, ToolResult
8
+ from .utils import maybe_truncate, run
9
+
10
+ Command = Literal[
11
+ "view",
12
+ "create",
13
+ "str_replace",
14
+ "insert",
15
+ "undo_edit",
16
+ ]
17
+ SNIPPET_LINES: int = 4
18
+
19
+
20
+ class EditTool:
21
+ """
22
+ An filesystem editor tool that allows the agent to view, create, and edit files.
23
+ The tool parameters are defined by Anthropic and are not editable.
24
+ """
25
+
26
+ _file_history: dict[Path, list[str]]
27
+
28
+ def __init__(self) -> None:
29
+ self._file_history = defaultdict(list)
30
+
31
+ async def __call__(
32
+ self,
33
+ *,
34
+ command: Command,
35
+ path: str,
36
+ file_text: str | None = None,
37
+ view_range: list[int] | None = None,
38
+ old_str: str | None = None,
39
+ new_str: str | None = None,
40
+ insert_line: int | None = None,
41
+ **kwargs: Any,
42
+ ) -> CLIResult:
43
+ _path = Path(path)
44
+ self.validate_path(command, _path)
45
+ if command == "view":
46
+ return await self.view(_path, view_range)
47
+ elif command == "create":
48
+ if file_text is None:
49
+ raise ToolError("Parameter `file_text` is required for command: create")
50
+ await self.write_file(_path, file_text)
51
+ self._file_history[_path].append(file_text)
52
+ return ToolResult(output=f"File created successfully at: {_path}")
53
+ elif command == "str_replace":
54
+ if old_str is None:
55
+ raise ToolError("Parameter `old_str` is required for command: str_replace")
56
+ return await self.str_replace(_path, old_str, new_str)
57
+ elif command == "insert":
58
+ if insert_line is None:
59
+ raise ToolError("Parameter `insert_line` is required for command: insert")
60
+ if new_str is None:
61
+ raise ToolError("Parameter `new_str` is required for command: insert")
62
+ return await self.insert(_path, insert_line, new_str)
63
+ elif command == "undo_edit":
64
+ return await self.undo_edit(_path)
65
+ raise ToolError(
66
+ f"Unrecognized command {command}. The allowed commands for the {self.name} tool are: "
67
+ f"{', '.join(get_args(Command))}"
68
+ )
69
+
70
+ def validate_path(self, command: str, path: Path) -> None:
71
+ """
72
+ Check that the path/command combination is valid.
73
+ """
74
+ # Check if its an absolute path
75
+ if not path.is_absolute():
76
+ suggested_path = Path("") / path
77
+ raise ToolError(
78
+ f"The path {path} is not an absolute path, it should start with `/`. "
79
+ f"Maybe you meant {suggested_path}?"
80
+ )
81
+ # Check if path exists
82
+ if not path.exists() and command != "create":
83
+ raise ToolError(f"The path {path} does not exist. Please provide a valid path.")
84
+ if path.exists() and command == "create":
85
+ raise ToolError(
86
+ f"File already exists at: {path}. Cannot overwrite files using command `create`."
87
+ )
88
+ # Check if the path points to a directory
89
+ if path.is_dir() and command != "view":
90
+ raise ToolError(
91
+ f"The path {path} is a dir and only the `view` command can be used on dirs."
92
+ )
93
+
94
+ async def view(self, path: Path, view_range: list[int] | None = None) -> CLIResult:
95
+ """Implement the view command"""
96
+ if path.is_dir():
97
+ if view_range:
98
+ raise ToolError(
99
+ "The `view_range` parameter is not allowed when `path` points to a directory."
100
+ )
101
+
102
+ import shlex
103
+
104
+ safe_path = shlex.quote(str(path))
105
+ _, stdout, stderr = await run(rf"find {safe_path} -maxdepth 2 -not -path '*/\.*'")
106
+ if not stderr:
107
+ stdout = (
108
+ f"Here's the files and directories up to 2 levels deep in {path}, "
109
+ f"excluding hidden items:\n{stdout}\n"
110
+ )
111
+ return CLIResult(output=stdout, error=stderr)
112
+
113
+ file_content = await self.read_file(path)
114
+ init_line = 1
115
+ if view_range:
116
+ if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range):
117
+ raise ToolError("Invalid `view_range`. It should be a list of two integers.")
118
+ file_lines = file_content.split("\n")
119
+ n_lines_file = len(file_lines)
120
+ init_line, final_line = view_range
121
+ if init_line < 1 or init_line > n_lines_file:
122
+ raise ToolError(
123
+ f"Invalid `view_range`: {view_range}. Its first element `{init_line}` "
124
+ f"should be within the range of lines of the file: {[1, n_lines_file]}"
125
+ )
126
+ if final_line > n_lines_file:
127
+ raise ToolError(
128
+ f"Invalid `view_range`: {view_range}. Its second element `{final_line}` "
129
+ f"should be smaller than the number of lines in the file: `{n_lines_file}`"
130
+ )
131
+ if final_line != -1 and final_line < init_line:
132
+ raise ToolError(
133
+ f"Invalid `view_range`: {view_range}. Its second element `{final_line}` "
134
+ f"should be larger or equal than its first `{init_line}`"
135
+ )
136
+
137
+ if final_line == -1:
138
+ file_content = "\n".join(file_lines[init_line - 1 :])
139
+ else:
140
+ file_content = "\n".join(file_lines[init_line - 1 : final_line])
141
+
142
+ return CLIResult(output=self._make_output(file_content, str(path), init_line=init_line))
143
+
144
+ async def str_replace(self, path: Path, old_str: str, new_str: str | None) -> CLIResult:
145
+ """
146
+ Implement the str_replace command, which replaces old_str with new_str in the file content.
147
+ """
148
+ # Read the file content
149
+ file_content = (await self.read_file(path)).expandtabs()
150
+ old_str = old_str.expandtabs()
151
+ new_str = new_str.expandtabs() if new_str is not None else ""
152
+
153
+ # Check if old_str is unique in the file
154
+ occurrences = file_content.count(old_str)
155
+ if occurrences == 0:
156
+ raise ToolError(
157
+ f"No replacement was performed, old_str `{old_str}` did not appear verbatim in "
158
+ f"{path}."
159
+ )
160
+ elif occurrences > 1:
161
+ file_content_lines = file_content.split("\n")
162
+ lines = [idx + 1 for idx, line in enumerate(file_content_lines) if old_str in line]
163
+ raise ToolError(
164
+ f"No replacement was performed. Multiple occurrences of old_str `{old_str}` "
165
+ f"in lines {lines}. Please ensure it is unique"
166
+ )
167
+
168
+ # Replace old_str with new_str
169
+ new_file_content = file_content.replace(old_str, new_str)
170
+
171
+ # Write the new content to the file
172
+ await self.write_file(path, new_file_content)
173
+
174
+ # Save the content to history
175
+ self._file_history[path].append(file_content)
176
+
177
+ # Create a snippet of the edited section
178
+ replacement_line = file_content.split(old_str)[0].count("\n")
179
+ start_line = max(0, replacement_line - SNIPPET_LINES)
180
+ end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
181
+ snippet = "\n".join(new_file_content.split("\n")[start_line : end_line + 1])
182
+
183
+ # Prepare the success message
184
+ success_msg = f"The file {path} has been edited. "
185
+ success_msg += self._make_output(snippet, f"a snippet of {path}", start_line + 1)
186
+ success_msg += (
187
+ "Review the changes and make sure they are as expected. "
188
+ "Edit the file again if necessary."
189
+ )
190
+
191
+ return CLIResult(output=success_msg)
192
+
193
+ async def insert(self, path: Path, insert_line: int, new_str: str) -> CLIResult:
194
+ """
195
+ Implement the insert command, which inserts new_str at the specified line in the file.
196
+ """
197
+ file_text = (await self.read_file(path)).expandtabs()
198
+ new_str = new_str.expandtabs()
199
+ file_text_lines = file_text.split("\n")
200
+ n_lines_file = len(file_text_lines)
201
+
202
+ if insert_line < 0 or insert_line > n_lines_file:
203
+ raise ToolError(
204
+ f"Invalid `insert_line` parameter: {insert_line}. It should be within the range "
205
+ f"of lines of the file: {[0, n_lines_file]}"
206
+ )
207
+
208
+ new_str_lines = new_str.split("\n")
209
+ new_file_text_lines = (
210
+ file_text_lines[:insert_line] + new_str_lines + file_text_lines[insert_line:]
211
+ )
212
+ snippet_lines = (
213
+ file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
214
+ + new_str_lines
215
+ + file_text_lines[insert_line : insert_line + SNIPPET_LINES]
216
+ )
217
+
218
+ new_file_text = "\n".join(new_file_text_lines)
219
+ snippet = "\n".join(snippet_lines)
220
+
221
+ await self.write_file(path, new_file_text)
222
+ self._file_history[path].append(file_text)
223
+
224
+ success_msg = f"The file {path} has been edited. "
225
+ success_msg += self._make_output(
226
+ snippet,
227
+ "a snippet of the edited file",
228
+ max(1, insert_line - SNIPPET_LINES + 1),
229
+ )
230
+ success_msg += (
231
+ "Review the changes and make sure they are as expected (correct indentation, "
232
+ "no duplicate lines, etc). Edit the file again if necessary."
233
+ )
234
+ return CLIResult(output=success_msg)
235
+
236
+ async def undo_edit(self, path: Path) -> CLIResult:
237
+ """Implement the undo_edit command."""
238
+ if not self._file_history[path]:
239
+ raise ToolError(f"No edit history found for {path}.")
240
+
241
+ old_text = self._file_history[path].pop()
242
+ await self.write_file(path, old_text)
243
+
244
+ return CLIResult(
245
+ output=f"Last edit to {path} undone successfully. "
246
+ f"{self._make_output(old_text, str(path))}"
247
+ )
248
+
249
+ async def read_file(self, path: Path) -> str:
250
+ """Read the content of a file from a given path; raise a ToolError if an error occurs."""
251
+ try:
252
+ import shlex
253
+
254
+ safe_path = shlex.quote(str(path))
255
+ code, out, err = await run(f"cat {safe_path}")
256
+ if code != 0:
257
+ raise ToolError(f"Ran into {err} while trying to read {path}")
258
+ return out
259
+ except Exception as e:
260
+ raise ToolError(f"Ran into {e} while trying to read {path}") from None
261
+
262
+ async def write_file(self, path: Path, file: str) -> None:
263
+ """Write the content of a file to a given path; raise a ToolError if an error occurs."""
264
+ try:
265
+ import shlex
266
+
267
+ safe_path = shlex.quote(str(path))
268
+ code, _, err = await run(f"cat > {safe_path} << 'EOF'\n{file}\nEOF")
269
+ if code != 0:
270
+ raise ToolError(f"Ran into {err} while trying to write to {path}")
271
+ except Exception as e:
272
+ raise ToolError(f"Ran into {e} while trying to write to {path}") from None
273
+
274
+ def _make_output(
275
+ self,
276
+ file_content: str,
277
+ file_descriptor: str,
278
+ init_line: int = 1,
279
+ expand_tabs: bool = True,
280
+ ) -> str:
281
+ """Generate output for the CLI based on the content of a file."""
282
+ file_content = maybe_truncate(file_content)
283
+ if expand_tabs:
284
+ file_content = file_content.expandtabs()
285
+ file_content = "\n".join(
286
+ [f"{i + init_line:6}\t{line}" for i, line in enumerate(file_content.split("\n"))]
287
+ )
288
+ return (
289
+ f"Here's the result of running `cat -n` on {file_descriptor}:\n" + file_content + "\n"
290
+ )
@@ -0,0 +1,30 @@
1
+ """Executors for running system commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from .base import BaseExecutor
8
+
9
+ if TYPE_CHECKING:
10
+ from .pyautogui import PyAutoGUIExecutor
11
+ from .xdo import XDOExecutor
12
+
13
+ __all__ = [
14
+ "BaseExecutor",
15
+ "PyAutoGUIExecutor",
16
+ "XDOExecutor",
17
+ ]
18
+
19
+
20
+ def __getattr__(name: str) -> Any:
21
+ """Lazy import executors to avoid importing pyautogui unless needed."""
22
+ if name == "PyAutoGUIExecutor":
23
+ from .pyautogui import PyAutoGUIExecutor
24
+
25
+ return PyAutoGUIExecutor
26
+ elif name == "XDOExecutor":
27
+ from .xdo import XDOExecutor
28
+
29
+ return XDOExecutor
30
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
@@ -0,0 +1,331 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import Literal
6
+
7
+ from hud.tools.base import ToolResult
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class BaseExecutor:
13
+ """
14
+ Base executor that provides simulation implementations for all CLA (Common Language Actions).
15
+
16
+ This class:
17
+ 1. Defines all action methods that HudComputer expects
18
+ 2. Provides simulation implementations for environments without display
19
+ 3. Serves as the base class for platform-specific executors (XDO, PyAutoGUI)
20
+
21
+ When used directly, it simulates all actions. Subclasses provide real implementations.
22
+ """
23
+
24
+ def __init__(self, display_num: int | None = None) -> None:
25
+ """
26
+ Initialize the base executor.
27
+
28
+ Args:
29
+ display_num: X display number (for Linux/X11 systems)
30
+ """
31
+ self.display_num = display_num
32
+ self._screenshot_delay = 0.5
33
+ logger.info("BaseExecutor initialized")
34
+
35
+ # ===== Core CLA Actions =====
36
+
37
+ async def click(
38
+ self,
39
+ x: int | None = None,
40
+ y: int | None = None,
41
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
42
+ pattern: list[int] | None = None,
43
+ hold_keys: list[str] | None = None,
44
+ take_screenshot: bool = True,
45
+ ) -> ToolResult:
46
+ """
47
+ Click at specified coordinates.
48
+
49
+ Args:
50
+ x, y: Coordinates to click at (None = current position)
51
+ button: Mouse button to use
52
+ pattern: List of delays for multi-clicks (e.g., [100] for double-click)
53
+ hold_keys: Keys to hold during click
54
+ take_screenshot: Whether to capture screenshot after action
55
+ """
56
+ msg = f"[SIMULATED] Click at ({x}, {y}) with {button} button"
57
+ if pattern:
58
+ msg += f" (multi-click pattern: {pattern})"
59
+ if hold_keys:
60
+ msg += f" while holding {hold_keys}"
61
+
62
+ screenshot = await self.screenshot() if take_screenshot else None
63
+ return ToolResult(output=msg, base64_image=screenshot)
64
+
65
+ async def type(
66
+ self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
67
+ ) -> ToolResult:
68
+ """
69
+ Type text using keyboard.
70
+
71
+ Args:
72
+ text: Text to type
73
+ enter_after: Whether to press Enter after typing
74
+ delay: Delay between keystrokes in milliseconds
75
+ take_screenshot: Whether to capture screenshot after action
76
+ """
77
+ msg = f"[SIMULATED] Type '{text}'"
78
+ if enter_after:
79
+ msg += " followed by Enter"
80
+
81
+ screenshot = await self.screenshot() if take_screenshot else None
82
+ return ToolResult(output=msg, base64_image=screenshot)
83
+
84
+ async def press(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
85
+ """
86
+ Press a key combination (hotkey).
87
+
88
+ Args:
89
+ keys: List of keys to press together (e.g., ["ctrl", "c"])
90
+ take_screenshot: Whether to capture screenshot after action
91
+ """
92
+ key_combo = "+".join(keys)
93
+ msg = f"[SIMULATED] Press key combination: {key_combo}"
94
+
95
+ screenshot = await self.screenshot() if take_screenshot else None
96
+ return ToolResult(output=msg, base64_image=screenshot)
97
+
98
+ async def key(self, key_sequence: str, take_screenshot: bool = True) -> ToolResult:
99
+ """
100
+ Press a single key or key combination.
101
+
102
+ Args:
103
+ key_sequence: Key or combination like "Return" or "ctrl+a"
104
+ take_screenshot: Whether to capture screenshot after action
105
+ """
106
+ msg = f"[SIMULATED] Press key: {key_sequence}"
107
+
108
+ screenshot = await self.screenshot() if take_screenshot else None
109
+ return ToolResult(output=msg, base64_image=screenshot)
110
+
111
+ async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
112
+ """
113
+ Press and hold keys.
114
+
115
+ Args:
116
+ keys: Keys to press and hold
117
+ take_screenshot: Whether to capture screenshot after action
118
+ """
119
+ msg = f"[SIMULATED] Key down: {', '.join(keys)}"
120
+
121
+ screenshot = await self.screenshot() if take_screenshot else None
122
+ return ToolResult(output=msg, base64_image=screenshot)
123
+
124
+ async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
125
+ """
126
+ Release held keys.
127
+
128
+ Args:
129
+ keys: Keys to release
130
+ take_screenshot: Whether to capture screenshot after action
131
+ """
132
+ msg = f"[SIMULATED] Key up: {', '.join(keys)}"
133
+
134
+ screenshot = await self.screenshot() if take_screenshot else None
135
+ return ToolResult(output=msg, base64_image=screenshot)
136
+
137
+ async def scroll(
138
+ self,
139
+ x: int | None = None,
140
+ y: int | None = None,
141
+ scroll_x: int | None = None,
142
+ scroll_y: int | None = None,
143
+ hold_keys: list[str] | None = None,
144
+ take_screenshot: bool = True,
145
+ ) -> ToolResult:
146
+ """
147
+ Scroll at specified position.
148
+
149
+ Args:
150
+ x, y: Position to scroll at (None = current position)
151
+ scroll_x: Horizontal scroll amount (positive = right)
152
+ scroll_y: Vertical scroll amount (positive = down)
153
+ hold_keys: Keys to hold during scroll
154
+ take_screenshot: Whether to capture screenshot after action
155
+ """
156
+ msg = "[SIMULATED] Scroll"
157
+ if x is not None and y is not None:
158
+ msg += f" at ({x}, {y})"
159
+ if scroll_x:
160
+ msg += f" horizontally by {scroll_x}"
161
+ if scroll_y:
162
+ msg += f" vertically by {scroll_y}"
163
+ if hold_keys:
164
+ msg += f" while holding {hold_keys}"
165
+
166
+ screenshot = await self.screenshot() if take_screenshot else None
167
+ return ToolResult(output=msg, base64_image=screenshot)
168
+
169
+ async def move(
170
+ self,
171
+ x: int | None = None,
172
+ y: int | None = None,
173
+ offset_x: int | None = None,
174
+ offset_y: int | None = None,
175
+ take_screenshot: bool = True,
176
+ ) -> ToolResult:
177
+ """
178
+ Move mouse cursor.
179
+
180
+ Args:
181
+ x, y: Absolute coordinates to move to
182
+ offset_x, offset_y: Relative offset from current position
183
+ take_screenshot: Whether to capture screenshot after action
184
+ """
185
+ if x is not None and y is not None:
186
+ msg = f"[SIMULATED] Move mouse to ({x}, {y})"
187
+ elif offset_x is not None or offset_y is not None:
188
+ msg = f"[SIMULATED] Move mouse by offset ({offset_x or 0}, {offset_y or 0})"
189
+ else:
190
+ msg = "[SIMULATED] Move mouse (no coordinates specified)"
191
+
192
+ screenshot = await self.screenshot() if take_screenshot else None
193
+ return ToolResult(output=msg, base64_image=screenshot)
194
+
195
+ async def drag(
196
+ self,
197
+ path: list[tuple[int, int]],
198
+ pattern: list[int] | None = None,
199
+ hold_keys: list[str] | None = None,
200
+ take_screenshot: bool = True,
201
+ ) -> ToolResult:
202
+ """
203
+ Drag along a path.
204
+
205
+ Args:
206
+ path: List of (x, y) coordinates defining the drag path
207
+ pattern: Delays between path points in milliseconds
208
+ hold_keys: Keys to hold during drag
209
+ take_screenshot: Whether to capture screenshot after action
210
+ """
211
+ if len(path) < 2:
212
+ return ToolResult(error="Drag path must have at least 2 points")
213
+
214
+ start = path[0]
215
+ end = path[-1]
216
+ msg = f"[SIMULATED] Drag from {start} to {end}"
217
+ if len(path) > 2:
218
+ msg += f" via {len(path) - 2} intermediate points"
219
+ if hold_keys:
220
+ msg += f" while holding {hold_keys}"
221
+
222
+ screenshot = await self.screenshot() if take_screenshot else None
223
+ return ToolResult(output=msg, base64_image=screenshot)
224
+
225
+ async def mouse_down(
226
+ self,
227
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
228
+ take_screenshot: bool = True,
229
+ ) -> ToolResult:
230
+ """
231
+ Press and hold a mouse button.
232
+
233
+ Args:
234
+ button: Mouse button to press
235
+ take_screenshot: Whether to capture screenshot after action
236
+ """
237
+ msg = f"[SIMULATED] Mouse down: {button} button"
238
+
239
+ screenshot = await self.screenshot() if take_screenshot else None
240
+ return ToolResult(output=msg, base64_image=screenshot)
241
+
242
+ async def mouse_up(
243
+ self,
244
+ button: Literal["left", "right", "middle", "back", "forward"] = "left",
245
+ take_screenshot: bool = True,
246
+ ) -> ToolResult:
247
+ """
248
+ Release a mouse button.
249
+
250
+ Args:
251
+ button: Mouse button to release
252
+ take_screenshot: Whether to capture screenshot after action
253
+ """
254
+ msg = f"[SIMULATED] Mouse up: {button} button"
255
+
256
+ screenshot = await self.screenshot() if take_screenshot else None
257
+ return ToolResult(output=msg, base64_image=screenshot)
258
+
259
+ async def hold_key(self, key: str, duration: float, take_screenshot: bool = True) -> ToolResult:
260
+ """
261
+ Hold a key for a specified duration.
262
+
263
+ Args:
264
+ key: The key to hold
265
+ duration: Duration in seconds
266
+ take_screenshot: Whether to capture screenshot after action
267
+ """
268
+ msg = f"[SIMULATED] Hold key '{key}' for {duration} seconds"
269
+ await asyncio.sleep(duration) # Simulate the wait
270
+
271
+ screenshot = await self.screenshot() if take_screenshot else None
272
+ return ToolResult(output=msg, base64_image=screenshot)
273
+
274
+ # ===== Utility Actions =====
275
+
276
+ async def wait(self, time: int) -> ToolResult:
277
+ """
278
+ Wait for specified time.
279
+
280
+ Args:
281
+ time: Time to wait in milliseconds
282
+ """
283
+ duration_seconds = time / 1000.0
284
+ await asyncio.sleep(duration_seconds)
285
+ return ToolResult(output=f"Waited {time}ms")
286
+
287
+ async def screenshot(self) -> str | None:
288
+ """
289
+ Take a screenshot and return base64 encoded image.
290
+
291
+ Returns:
292
+ Base64 encoded PNG image or None if failed
293
+ """
294
+ logger.info("[SIMULATION] Taking screenshot")
295
+ return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
296
+
297
+ async def position(self) -> ToolResult:
298
+ """
299
+ Get current cursor position.
300
+
301
+ Returns:
302
+ ToolResult with position information
303
+ """
304
+ return ToolResult(output="[SIMULATED] Mouse position: (0, 0)")
305
+
306
+ # ===== Legacy/Compatibility Methods =====
307
+
308
+ async def execute(self, command: str, take_screenshot: bool = True) -> ToolResult:
309
+ """
310
+ Execute a raw command (for backwards compatibility).
311
+
312
+ Args:
313
+ command: Command to execute
314
+ take_screenshot: Whether to capture screenshot after action
315
+ """
316
+ msg = f"[SIMULATED] Execute: {command}"
317
+ screenshot = await self.screenshot() if take_screenshot else None
318
+ return ToolResult(output=msg, base64_image=screenshot)
319
+
320
+ # Compatibility aliases
321
+ async def type_text(
322
+ self, text: str, delay: int = 12, take_screenshot: bool = True
323
+ ) -> ToolResult:
324
+ """Alias for type() to maintain compatibility."""
325
+ return await self.type(
326
+ text, enter_after=False, delay=delay, take_screenshot=take_screenshot
327
+ )
328
+
329
+ async def mouse_move(self, x: int, y: int, take_screenshot: bool = True) -> ToolResult:
330
+ """Alias for move() to maintain compatibility."""
331
+ return await self.move(x=x, y=y, take_screenshot=take_screenshot)