hud-python 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +14 -5
- hud/env/docker_client.py +1 -1
- hud/env/environment.py +10 -7
- hud/env/local_docker_client.py +1 -1
- hud/env/remote_client.py +1 -1
- hud/env/remote_docker_client.py +2 -2
- hud/exceptions.py +2 -1
- hud/mcp_agent/__init__.py +15 -0
- hud/mcp_agent/base.py +723 -0
- hud/mcp_agent/claude.py +316 -0
- hud/mcp_agent/langchain.py +231 -0
- hud/mcp_agent/openai.py +318 -0
- hud/mcp_agent/tests/__init__.py +1 -0
- hud/mcp_agent/tests/test_base.py +437 -0
- hud/settings.py +14 -2
- hud/task.py +4 -0
- hud/telemetry/__init__.py +11 -7
- hud/telemetry/_trace.py +82 -71
- hud/telemetry/context.py +9 -27
- hud/telemetry/exporter.py +6 -5
- hud/telemetry/instrumentation/mcp.py +174 -410
- hud/telemetry/mcp_models.py +13 -74
- hud/telemetry/tests/test_context.py +9 -6
- hud/telemetry/tests/test_trace.py +92 -61
- hud/tools/__init__.py +21 -0
- hud/tools/base.py +65 -0
- hud/tools/bash.py +137 -0
- hud/tools/computer/__init__.py +13 -0
- hud/tools/computer/anthropic.py +411 -0
- hud/tools/computer/hud.py +315 -0
- hud/tools/computer/openai.py +283 -0
- hud/tools/edit.py +290 -0
- hud/tools/executors/__init__.py +13 -0
- hud/tools/executors/base.py +331 -0
- hud/tools/executors/pyautogui.py +585 -0
- hud/tools/executors/tests/__init__.py +1 -0
- hud/tools/executors/tests/test_base_executor.py +338 -0
- hud/tools/executors/tests/test_pyautogui_executor.py +162 -0
- hud/tools/executors/xdo.py +503 -0
- hud/tools/helper/README.md +56 -0
- hud/tools/helper/__init__.py +9 -0
- hud/tools/helper/mcp_server.py +78 -0
- hud/tools/helper/server_initialization.py +115 -0
- hud/tools/helper/utils.py +58 -0
- hud/tools/playwright_tool.py +373 -0
- hud/tools/tests/__init__.py +3 -0
- hud/tools/tests/test_bash.py +152 -0
- hud/tools/tests/test_computer.py +52 -0
- hud/tools/tests/test_computer_actions.py +34 -0
- hud/tools/tests/test_edit.py +233 -0
- hud/tools/tests/test_init.py +27 -0
- hud/tools/tests/test_playwright_tool.py +183 -0
- hud/tools/tests/test_tools.py +154 -0
- hud/tools/tests/test_utils.py +156 -0
- hud/tools/utils.py +50 -0
- hud/types.py +10 -1
- hud/utils/tests/test_init.py +21 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/METADATA +9 -6
- hud_python-0.3.0.dist-info/RECORD +124 -0
- hud_python-0.2.10.dist-info/RECORD +0 -85
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/WHEEL +0 -0
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/licenses/LICENSE +0 -0
hud/tools/edit.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Literal, get_args
|
|
6
|
+
|
|
7
|
+
from .base import CLIResult, ToolError, ToolResult
|
|
8
|
+
from .utils import maybe_truncate, run
|
|
9
|
+
|
|
10
|
+
Command = Literal[
|
|
11
|
+
"view",
|
|
12
|
+
"create",
|
|
13
|
+
"str_replace",
|
|
14
|
+
"insert",
|
|
15
|
+
"undo_edit",
|
|
16
|
+
]
|
|
17
|
+
SNIPPET_LINES: int = 4
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class EditTool:
|
|
21
|
+
"""
|
|
22
|
+
An filesystem editor tool that allows the agent to view, create, and edit files.
|
|
23
|
+
The tool parameters are defined by Anthropic and are not editable.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
_file_history: dict[Path, list[str]]
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
self._file_history = defaultdict(list)
|
|
30
|
+
|
|
31
|
+
async def __call__(
|
|
32
|
+
self,
|
|
33
|
+
*,
|
|
34
|
+
command: Command,
|
|
35
|
+
path: str,
|
|
36
|
+
file_text: str | None = None,
|
|
37
|
+
view_range: list[int] | None = None,
|
|
38
|
+
old_str: str | None = None,
|
|
39
|
+
new_str: str | None = None,
|
|
40
|
+
insert_line: int | None = None,
|
|
41
|
+
**kwargs: Any,
|
|
42
|
+
) -> CLIResult:
|
|
43
|
+
_path = Path(path)
|
|
44
|
+
self.validate_path(command, _path)
|
|
45
|
+
if command == "view":
|
|
46
|
+
return await self.view(_path, view_range)
|
|
47
|
+
elif command == "create":
|
|
48
|
+
if file_text is None:
|
|
49
|
+
raise ToolError("Parameter `file_text` is required for command: create")
|
|
50
|
+
await self.write_file(_path, file_text)
|
|
51
|
+
self._file_history[_path].append(file_text)
|
|
52
|
+
return ToolResult(output=f"File created successfully at: {_path}")
|
|
53
|
+
elif command == "str_replace":
|
|
54
|
+
if old_str is None:
|
|
55
|
+
raise ToolError("Parameter `old_str` is required for command: str_replace")
|
|
56
|
+
return await self.str_replace(_path, old_str, new_str)
|
|
57
|
+
elif command == "insert":
|
|
58
|
+
if insert_line is None:
|
|
59
|
+
raise ToolError("Parameter `insert_line` is required for command: insert")
|
|
60
|
+
if new_str is None:
|
|
61
|
+
raise ToolError("Parameter `new_str` is required for command: insert")
|
|
62
|
+
return await self.insert(_path, insert_line, new_str)
|
|
63
|
+
elif command == "undo_edit":
|
|
64
|
+
return await self.undo_edit(_path)
|
|
65
|
+
raise ToolError(
|
|
66
|
+
f"Unrecognized command {command}. The allowed commands for the {self.name} tool are: "
|
|
67
|
+
f"{', '.join(get_args(Command))}"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def validate_path(self, command: str, path: Path) -> None:
|
|
71
|
+
"""
|
|
72
|
+
Check that the path/command combination is valid.
|
|
73
|
+
"""
|
|
74
|
+
# Check if its an absolute path
|
|
75
|
+
if not path.is_absolute():
|
|
76
|
+
suggested_path = Path("") / path
|
|
77
|
+
raise ToolError(
|
|
78
|
+
f"The path {path} is not an absolute path, it should start with `/`. "
|
|
79
|
+
f"Maybe you meant {suggested_path}?"
|
|
80
|
+
)
|
|
81
|
+
# Check if path exists
|
|
82
|
+
if not path.exists() and command != "create":
|
|
83
|
+
raise ToolError(f"The path {path} does not exist. Please provide a valid path.")
|
|
84
|
+
if path.exists() and command == "create":
|
|
85
|
+
raise ToolError(
|
|
86
|
+
f"File already exists at: {path}. Cannot overwrite files using command `create`."
|
|
87
|
+
)
|
|
88
|
+
# Check if the path points to a directory
|
|
89
|
+
if path.is_dir() and command != "view":
|
|
90
|
+
raise ToolError(
|
|
91
|
+
f"The path {path} is a dir and only the `view` command can be used on dirs."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
async def view(self, path: Path, view_range: list[int] | None = None) -> CLIResult:
|
|
95
|
+
"""Implement the view command"""
|
|
96
|
+
if path.is_dir():
|
|
97
|
+
if view_range:
|
|
98
|
+
raise ToolError(
|
|
99
|
+
"The `view_range` parameter is not allowed when `path` points to a directory."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
import shlex
|
|
103
|
+
|
|
104
|
+
safe_path = shlex.quote(str(path))
|
|
105
|
+
_, stdout, stderr = await run(rf"find {safe_path} -maxdepth 2 -not -path '*/\.*'")
|
|
106
|
+
if not stderr:
|
|
107
|
+
stdout = (
|
|
108
|
+
f"Here's the files and directories up to 2 levels deep in {path}, "
|
|
109
|
+
f"excluding hidden items:\n{stdout}\n"
|
|
110
|
+
)
|
|
111
|
+
return CLIResult(output=stdout, error=stderr)
|
|
112
|
+
|
|
113
|
+
file_content = await self.read_file(path)
|
|
114
|
+
init_line = 1
|
|
115
|
+
if view_range:
|
|
116
|
+
if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range):
|
|
117
|
+
raise ToolError("Invalid `view_range`. It should be a list of two integers.")
|
|
118
|
+
file_lines = file_content.split("\n")
|
|
119
|
+
n_lines_file = len(file_lines)
|
|
120
|
+
init_line, final_line = view_range
|
|
121
|
+
if init_line < 1 or init_line > n_lines_file:
|
|
122
|
+
raise ToolError(
|
|
123
|
+
f"Invalid `view_range`: {view_range}. Its first element `{init_line}` "
|
|
124
|
+
f"should be within the range of lines of the file: {[1, n_lines_file]}"
|
|
125
|
+
)
|
|
126
|
+
if final_line > n_lines_file:
|
|
127
|
+
raise ToolError(
|
|
128
|
+
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` "
|
|
129
|
+
f"should be smaller than the number of lines in the file: `{n_lines_file}`"
|
|
130
|
+
)
|
|
131
|
+
if final_line != -1 and final_line < init_line:
|
|
132
|
+
raise ToolError(
|
|
133
|
+
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` "
|
|
134
|
+
f"should be larger or equal than its first `{init_line}`"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if final_line == -1:
|
|
138
|
+
file_content = "\n".join(file_lines[init_line - 1 :])
|
|
139
|
+
else:
|
|
140
|
+
file_content = "\n".join(file_lines[init_line - 1 : final_line])
|
|
141
|
+
|
|
142
|
+
return CLIResult(output=self._make_output(file_content, str(path), init_line=init_line))
|
|
143
|
+
|
|
144
|
+
async def str_replace(self, path: Path, old_str: str, new_str: str | None) -> CLIResult:
|
|
145
|
+
"""
|
|
146
|
+
Implement the str_replace command, which replaces old_str with new_str in the file content.
|
|
147
|
+
"""
|
|
148
|
+
# Read the file content
|
|
149
|
+
file_content = (await self.read_file(path)).expandtabs()
|
|
150
|
+
old_str = old_str.expandtabs()
|
|
151
|
+
new_str = new_str.expandtabs() if new_str is not None else ""
|
|
152
|
+
|
|
153
|
+
# Check if old_str is unique in the file
|
|
154
|
+
occurrences = file_content.count(old_str)
|
|
155
|
+
if occurrences == 0:
|
|
156
|
+
raise ToolError(
|
|
157
|
+
f"No replacement was performed, old_str `{old_str}` did not appear verbatim in "
|
|
158
|
+
f"{path}."
|
|
159
|
+
)
|
|
160
|
+
elif occurrences > 1:
|
|
161
|
+
file_content_lines = file_content.split("\n")
|
|
162
|
+
lines = [idx + 1 for idx, line in enumerate(file_content_lines) if old_str in line]
|
|
163
|
+
raise ToolError(
|
|
164
|
+
f"No replacement was performed. Multiple occurrences of old_str `{old_str}` "
|
|
165
|
+
f"in lines {lines}. Please ensure it is unique"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Replace old_str with new_str
|
|
169
|
+
new_file_content = file_content.replace(old_str, new_str)
|
|
170
|
+
|
|
171
|
+
# Write the new content to the file
|
|
172
|
+
await self.write_file(path, new_file_content)
|
|
173
|
+
|
|
174
|
+
# Save the content to history
|
|
175
|
+
self._file_history[path].append(file_content)
|
|
176
|
+
|
|
177
|
+
# Create a snippet of the edited section
|
|
178
|
+
replacement_line = file_content.split(old_str)[0].count("\n")
|
|
179
|
+
start_line = max(0, replacement_line - SNIPPET_LINES)
|
|
180
|
+
end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
|
|
181
|
+
snippet = "\n".join(new_file_content.split("\n")[start_line : end_line + 1])
|
|
182
|
+
|
|
183
|
+
# Prepare the success message
|
|
184
|
+
success_msg = f"The file {path} has been edited. "
|
|
185
|
+
success_msg += self._make_output(snippet, f"a snippet of {path}", start_line + 1)
|
|
186
|
+
success_msg += (
|
|
187
|
+
"Review the changes and make sure they are as expected. "
|
|
188
|
+
"Edit the file again if necessary."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return CLIResult(output=success_msg)
|
|
192
|
+
|
|
193
|
+
async def insert(self, path: Path, insert_line: int, new_str: str) -> CLIResult:
|
|
194
|
+
"""
|
|
195
|
+
Implement the insert command, which inserts new_str at the specified line in the file.
|
|
196
|
+
"""
|
|
197
|
+
file_text = (await self.read_file(path)).expandtabs()
|
|
198
|
+
new_str = new_str.expandtabs()
|
|
199
|
+
file_text_lines = file_text.split("\n")
|
|
200
|
+
n_lines_file = len(file_text_lines)
|
|
201
|
+
|
|
202
|
+
if insert_line < 0 or insert_line > n_lines_file:
|
|
203
|
+
raise ToolError(
|
|
204
|
+
f"Invalid `insert_line` parameter: {insert_line}. It should be within the range "
|
|
205
|
+
f"of lines of the file: {[0, n_lines_file]}"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
new_str_lines = new_str.split("\n")
|
|
209
|
+
new_file_text_lines = (
|
|
210
|
+
file_text_lines[:insert_line] + new_str_lines + file_text_lines[insert_line:]
|
|
211
|
+
)
|
|
212
|
+
snippet_lines = (
|
|
213
|
+
file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
|
|
214
|
+
+ new_str_lines
|
|
215
|
+
+ file_text_lines[insert_line : insert_line + SNIPPET_LINES]
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
new_file_text = "\n".join(new_file_text_lines)
|
|
219
|
+
snippet = "\n".join(snippet_lines)
|
|
220
|
+
|
|
221
|
+
await self.write_file(path, new_file_text)
|
|
222
|
+
self._file_history[path].append(file_text)
|
|
223
|
+
|
|
224
|
+
success_msg = f"The file {path} has been edited. "
|
|
225
|
+
success_msg += self._make_output(
|
|
226
|
+
snippet,
|
|
227
|
+
"a snippet of the edited file",
|
|
228
|
+
max(1, insert_line - SNIPPET_LINES + 1),
|
|
229
|
+
)
|
|
230
|
+
success_msg += (
|
|
231
|
+
"Review the changes and make sure they are as expected (correct indentation, "
|
|
232
|
+
"no duplicate lines, etc). Edit the file again if necessary."
|
|
233
|
+
)
|
|
234
|
+
return CLIResult(output=success_msg)
|
|
235
|
+
|
|
236
|
+
async def undo_edit(self, path: Path) -> CLIResult:
|
|
237
|
+
"""Implement the undo_edit command."""
|
|
238
|
+
if not self._file_history[path]:
|
|
239
|
+
raise ToolError(f"No edit history found for {path}.")
|
|
240
|
+
|
|
241
|
+
old_text = self._file_history[path].pop()
|
|
242
|
+
await self.write_file(path, old_text)
|
|
243
|
+
|
|
244
|
+
return CLIResult(
|
|
245
|
+
output=f"Last edit to {path} undone successfully. "
|
|
246
|
+
f"{self._make_output(old_text, str(path))}"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
async def read_file(self, path: Path) -> str:
|
|
250
|
+
"""Read the content of a file from a given path; raise a ToolError if an error occurs."""
|
|
251
|
+
try:
|
|
252
|
+
import shlex
|
|
253
|
+
|
|
254
|
+
safe_path = shlex.quote(str(path))
|
|
255
|
+
code, out, err = await run(f"cat {safe_path}")
|
|
256
|
+
if code != 0:
|
|
257
|
+
raise ToolError(f"Ran into {err} while trying to read {path}")
|
|
258
|
+
return out
|
|
259
|
+
except Exception as e:
|
|
260
|
+
raise ToolError(f"Ran into {e} while trying to read {path}") from None
|
|
261
|
+
|
|
262
|
+
async def write_file(self, path: Path, file: str) -> None:
|
|
263
|
+
"""Write the content of a file to a given path; raise a ToolError if an error occurs."""
|
|
264
|
+
try:
|
|
265
|
+
import shlex
|
|
266
|
+
|
|
267
|
+
safe_path = shlex.quote(str(path))
|
|
268
|
+
code, _, err = await run(f"cat > {safe_path} << 'EOF'\n{file}\nEOF")
|
|
269
|
+
if code != 0:
|
|
270
|
+
raise ToolError(f"Ran into {err} while trying to write to {path}")
|
|
271
|
+
except Exception as e:
|
|
272
|
+
raise ToolError(f"Ran into {e} while trying to write to {path}") from None
|
|
273
|
+
|
|
274
|
+
def _make_output(
|
|
275
|
+
self,
|
|
276
|
+
file_content: str,
|
|
277
|
+
file_descriptor: str,
|
|
278
|
+
init_line: int = 1,
|
|
279
|
+
expand_tabs: bool = True,
|
|
280
|
+
) -> str:
|
|
281
|
+
"""Generate output for the CLI based on the content of a file."""
|
|
282
|
+
file_content = maybe_truncate(file_content)
|
|
283
|
+
if expand_tabs:
|
|
284
|
+
file_content = file_content.expandtabs()
|
|
285
|
+
file_content = "\n".join(
|
|
286
|
+
[f"{i + init_line:6}\t{line}" for i, line in enumerate(file_content.split("\n"))]
|
|
287
|
+
)
|
|
288
|
+
return (
|
|
289
|
+
f"Here's the result of running `cat -n` on {file_descriptor}:\n" + file_content + "\n"
|
|
290
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Executors for running system commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .base import BaseExecutor
|
|
6
|
+
from .pyautogui import PyAutoGUIExecutor
|
|
7
|
+
from .xdo import XDOExecutor
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"BaseExecutor",
|
|
11
|
+
"PyAutoGUIExecutor",
|
|
12
|
+
"XDOExecutor",
|
|
13
|
+
]
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from hud.tools.base import ToolResult
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BaseExecutor:
|
|
13
|
+
"""
|
|
14
|
+
Base executor that provides simulation implementations for all CLA (Common Language Actions).
|
|
15
|
+
|
|
16
|
+
This class:
|
|
17
|
+
1. Defines all action methods that HudComputer expects
|
|
18
|
+
2. Provides simulation implementations for environments without display
|
|
19
|
+
3. Serves as the base class for platform-specific executors (XDO, PyAutoGUI)
|
|
20
|
+
|
|
21
|
+
When used directly, it simulates all actions. Subclasses provide real implementations.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, display_num: int | None = None) -> None:
|
|
25
|
+
"""
|
|
26
|
+
Initialize the base executor.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
display_num: X display number (for Linux/X11 systems)
|
|
30
|
+
"""
|
|
31
|
+
self.display_num = display_num
|
|
32
|
+
self._screenshot_delay = 0.5
|
|
33
|
+
logger.info("BaseExecutor initialized")
|
|
34
|
+
|
|
35
|
+
# ===== Core CLA Actions =====
|
|
36
|
+
|
|
37
|
+
async def click(
|
|
38
|
+
self,
|
|
39
|
+
x: int | None = None,
|
|
40
|
+
y: int | None = None,
|
|
41
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
42
|
+
pattern: list[int] | None = None,
|
|
43
|
+
hold_keys: list[str] | None = None,
|
|
44
|
+
take_screenshot: bool = True,
|
|
45
|
+
) -> ToolResult:
|
|
46
|
+
"""
|
|
47
|
+
Click at specified coordinates.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
x, y: Coordinates to click at (None = current position)
|
|
51
|
+
button: Mouse button to use
|
|
52
|
+
pattern: List of delays for multi-clicks (e.g., [100] for double-click)
|
|
53
|
+
hold_keys: Keys to hold during click
|
|
54
|
+
take_screenshot: Whether to capture screenshot after action
|
|
55
|
+
"""
|
|
56
|
+
msg = f"[SIMULATED] Click at ({x}, {y}) with {button} button"
|
|
57
|
+
if pattern:
|
|
58
|
+
msg += f" (multi-click pattern: {pattern})"
|
|
59
|
+
if hold_keys:
|
|
60
|
+
msg += f" while holding {hold_keys}"
|
|
61
|
+
|
|
62
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
63
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
64
|
+
|
|
65
|
+
async def type(
|
|
66
|
+
self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
|
|
67
|
+
) -> ToolResult:
|
|
68
|
+
"""
|
|
69
|
+
Type text using keyboard.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
text: Text to type
|
|
73
|
+
enter_after: Whether to press Enter after typing
|
|
74
|
+
delay: Delay between keystrokes in milliseconds
|
|
75
|
+
take_screenshot: Whether to capture screenshot after action
|
|
76
|
+
"""
|
|
77
|
+
msg = f"[SIMULATED] Type '{text}'"
|
|
78
|
+
if enter_after:
|
|
79
|
+
msg += " followed by Enter"
|
|
80
|
+
|
|
81
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
82
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
83
|
+
|
|
84
|
+
async def press(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
|
|
85
|
+
"""
|
|
86
|
+
Press a key combination (hotkey).
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
keys: List of keys to press together (e.g., ["ctrl", "c"])
|
|
90
|
+
take_screenshot: Whether to capture screenshot after action
|
|
91
|
+
"""
|
|
92
|
+
key_combo = "+".join(keys)
|
|
93
|
+
msg = f"[SIMULATED] Press key combination: {key_combo}"
|
|
94
|
+
|
|
95
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
96
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
97
|
+
|
|
98
|
+
async def key(self, key_sequence: str, take_screenshot: bool = True) -> ToolResult:
|
|
99
|
+
"""
|
|
100
|
+
Press a single key or key combination.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
key_sequence: Key or combination like "Return" or "ctrl+a"
|
|
104
|
+
take_screenshot: Whether to capture screenshot after action
|
|
105
|
+
"""
|
|
106
|
+
msg = f"[SIMULATED] Press key: {key_sequence}"
|
|
107
|
+
|
|
108
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
109
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
110
|
+
|
|
111
|
+
async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
|
|
112
|
+
"""
|
|
113
|
+
Press and hold keys.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
keys: Keys to press and hold
|
|
117
|
+
take_screenshot: Whether to capture screenshot after action
|
|
118
|
+
"""
|
|
119
|
+
msg = f"[SIMULATED] Key down: {', '.join(keys)}"
|
|
120
|
+
|
|
121
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
122
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
123
|
+
|
|
124
|
+
async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
|
|
125
|
+
"""
|
|
126
|
+
Release held keys.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
keys: Keys to release
|
|
130
|
+
take_screenshot: Whether to capture screenshot after action
|
|
131
|
+
"""
|
|
132
|
+
msg = f"[SIMULATED] Key up: {', '.join(keys)}"
|
|
133
|
+
|
|
134
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
135
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
136
|
+
|
|
137
|
+
async def scroll(
|
|
138
|
+
self,
|
|
139
|
+
x: int | None = None,
|
|
140
|
+
y: int | None = None,
|
|
141
|
+
scroll_x: int | None = None,
|
|
142
|
+
scroll_y: int | None = None,
|
|
143
|
+
hold_keys: list[str] | None = None,
|
|
144
|
+
take_screenshot: bool = True,
|
|
145
|
+
) -> ToolResult:
|
|
146
|
+
"""
|
|
147
|
+
Scroll at specified position.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
x, y: Position to scroll at (None = current position)
|
|
151
|
+
scroll_x: Horizontal scroll amount (positive = right)
|
|
152
|
+
scroll_y: Vertical scroll amount (positive = down)
|
|
153
|
+
hold_keys: Keys to hold during scroll
|
|
154
|
+
take_screenshot: Whether to capture screenshot after action
|
|
155
|
+
"""
|
|
156
|
+
msg = "[SIMULATED] Scroll"
|
|
157
|
+
if x is not None and y is not None:
|
|
158
|
+
msg += f" at ({x}, {y})"
|
|
159
|
+
if scroll_x:
|
|
160
|
+
msg += f" horizontally by {scroll_x}"
|
|
161
|
+
if scroll_y:
|
|
162
|
+
msg += f" vertically by {scroll_y}"
|
|
163
|
+
if hold_keys:
|
|
164
|
+
msg += f" while holding {hold_keys}"
|
|
165
|
+
|
|
166
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
167
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
168
|
+
|
|
169
|
+
async def move(
|
|
170
|
+
self,
|
|
171
|
+
x: int | None = None,
|
|
172
|
+
y: int | None = None,
|
|
173
|
+
offset_x: int | None = None,
|
|
174
|
+
offset_y: int | None = None,
|
|
175
|
+
take_screenshot: bool = True,
|
|
176
|
+
) -> ToolResult:
|
|
177
|
+
"""
|
|
178
|
+
Move mouse cursor.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
x, y: Absolute coordinates to move to
|
|
182
|
+
offset_x, offset_y: Relative offset from current position
|
|
183
|
+
take_screenshot: Whether to capture screenshot after action
|
|
184
|
+
"""
|
|
185
|
+
if x is not None and y is not None:
|
|
186
|
+
msg = f"[SIMULATED] Move mouse to ({x}, {y})"
|
|
187
|
+
elif offset_x is not None or offset_y is not None:
|
|
188
|
+
msg = f"[SIMULATED] Move mouse by offset ({offset_x or 0}, {offset_y or 0})"
|
|
189
|
+
else:
|
|
190
|
+
msg = "[SIMULATED] Move mouse (no coordinates specified)"
|
|
191
|
+
|
|
192
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
193
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
194
|
+
|
|
195
|
+
async def drag(
|
|
196
|
+
self,
|
|
197
|
+
path: list[tuple[int, int]],
|
|
198
|
+
pattern: list[int] | None = None,
|
|
199
|
+
hold_keys: list[str] | None = None,
|
|
200
|
+
take_screenshot: bool = True,
|
|
201
|
+
) -> ToolResult:
|
|
202
|
+
"""
|
|
203
|
+
Drag along a path.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
path: List of (x, y) coordinates defining the drag path
|
|
207
|
+
pattern: Delays between path points in milliseconds
|
|
208
|
+
hold_keys: Keys to hold during drag
|
|
209
|
+
take_screenshot: Whether to capture screenshot after action
|
|
210
|
+
"""
|
|
211
|
+
if len(path) < 2:
|
|
212
|
+
return ToolResult(error="Drag path must have at least 2 points")
|
|
213
|
+
|
|
214
|
+
start = path[0]
|
|
215
|
+
end = path[-1]
|
|
216
|
+
msg = f"[SIMULATED] Drag from {start} to {end}"
|
|
217
|
+
if len(path) > 2:
|
|
218
|
+
msg += f" via {len(path) - 2} intermediate points"
|
|
219
|
+
if hold_keys:
|
|
220
|
+
msg += f" while holding {hold_keys}"
|
|
221
|
+
|
|
222
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
223
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
224
|
+
|
|
225
|
+
async def mouse_down(
|
|
226
|
+
self,
|
|
227
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
228
|
+
take_screenshot: bool = True,
|
|
229
|
+
) -> ToolResult:
|
|
230
|
+
"""
|
|
231
|
+
Press and hold a mouse button.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
button: Mouse button to press
|
|
235
|
+
take_screenshot: Whether to capture screenshot after action
|
|
236
|
+
"""
|
|
237
|
+
msg = f"[SIMULATED] Mouse down: {button} button"
|
|
238
|
+
|
|
239
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
240
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
241
|
+
|
|
242
|
+
async def mouse_up(
|
|
243
|
+
self,
|
|
244
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
245
|
+
take_screenshot: bool = True,
|
|
246
|
+
) -> ToolResult:
|
|
247
|
+
"""
|
|
248
|
+
Release a mouse button.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
button: Mouse button to release
|
|
252
|
+
take_screenshot: Whether to capture screenshot after action
|
|
253
|
+
"""
|
|
254
|
+
msg = f"[SIMULATED] Mouse up: {button} button"
|
|
255
|
+
|
|
256
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
257
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
258
|
+
|
|
259
|
+
async def hold_key(self, key: str, duration: float, take_screenshot: bool = True) -> ToolResult:
|
|
260
|
+
"""
|
|
261
|
+
Hold a key for a specified duration.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
key: The key to hold
|
|
265
|
+
duration: Duration in seconds
|
|
266
|
+
take_screenshot: Whether to capture screenshot after action
|
|
267
|
+
"""
|
|
268
|
+
msg = f"[SIMULATED] Hold key '{key}' for {duration} seconds"
|
|
269
|
+
await asyncio.sleep(duration) # Simulate the wait
|
|
270
|
+
|
|
271
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
272
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
273
|
+
|
|
274
|
+
# ===== Utility Actions =====
|
|
275
|
+
|
|
276
|
+
async def wait(self, time: int) -> ToolResult:
|
|
277
|
+
"""
|
|
278
|
+
Wait for specified time.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
time: Time to wait in milliseconds
|
|
282
|
+
"""
|
|
283
|
+
duration_seconds = time / 1000.0
|
|
284
|
+
await asyncio.sleep(duration_seconds)
|
|
285
|
+
return ToolResult(output=f"Waited {time}ms")
|
|
286
|
+
|
|
287
|
+
async def screenshot(self) -> str | None:
|
|
288
|
+
"""
|
|
289
|
+
Take a screenshot and return base64 encoded image.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Base64 encoded PNG image or None if failed
|
|
293
|
+
"""
|
|
294
|
+
logger.info("[SIMULATION] Taking screenshot")
|
|
295
|
+
return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
|
|
296
|
+
|
|
297
|
+
async def position(self) -> ToolResult:
|
|
298
|
+
"""
|
|
299
|
+
Get current cursor position.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
ToolResult with position information
|
|
303
|
+
"""
|
|
304
|
+
return ToolResult(output="[SIMULATED] Mouse position: (0, 0)")
|
|
305
|
+
|
|
306
|
+
# ===== Legacy/Compatibility Methods =====
|
|
307
|
+
|
|
308
|
+
async def execute(self, command: str, take_screenshot: bool = True) -> ToolResult:
|
|
309
|
+
"""
|
|
310
|
+
Execute a raw command (for backwards compatibility).
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
command: Command to execute
|
|
314
|
+
take_screenshot: Whether to capture screenshot after action
|
|
315
|
+
"""
|
|
316
|
+
msg = f"[SIMULATED] Execute: {command}"
|
|
317
|
+
screenshot = await self.screenshot() if take_screenshot else None
|
|
318
|
+
return ToolResult(output=msg, base64_image=screenshot)
|
|
319
|
+
|
|
320
|
+
# Compatibility aliases
|
|
321
|
+
async def type_text(
|
|
322
|
+
self, text: str, delay: int = 12, take_screenshot: bool = True
|
|
323
|
+
) -> ToolResult:
|
|
324
|
+
"""Alias for type() to maintain compatibility."""
|
|
325
|
+
return await self.type(
|
|
326
|
+
text, enter_after=False, delay=delay, take_screenshot=take_screenshot
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
async def mouse_move(self, x: int, y: int, take_screenshot: bool = True) -> ToolResult:
|
|
330
|
+
"""Alias for move() to maintain compatibility."""
|
|
331
|
+
return await self.move(x=x, y=y, take_screenshot=take_screenshot)
|