hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +20 -8
- hud/adapters/common/adapter.py +14 -3
- hud/adapters/common/tests/test_adapter.py +16 -4
- hud/datasets.py +188 -0
- hud/env/docker_client.py +15 -3
- hud/env/environment.py +10 -7
- hud/env/local_docker_client.py +29 -7
- hud/env/remote_client.py +1 -1
- hud/env/remote_docker_client.py +2 -2
- hud/exceptions.py +2 -1
- hud/gym.py +0 -9
- hud/mcp/__init__.py +17 -0
- hud/mcp/base.py +631 -0
- hud/mcp/claude.py +321 -0
- hud/mcp/client.py +312 -0
- hud/mcp/langchain.py +250 -0
- hud/mcp/openai.py +334 -0
- hud/mcp/tests/__init__.py +1 -0
- hud/mcp/tests/test_base.py +512 -0
- hud/mcp/tests/test_claude.py +294 -0
- hud/mcp/tests/test_client.py +324 -0
- hud/mcp/tests/test_openai.py +238 -0
- hud/settings.py +20 -2
- hud/task.py +5 -88
- hud/taskset.py +2 -23
- hud/telemetry/__init__.py +16 -7
- hud/telemetry/_trace.py +246 -72
- hud/telemetry/context.py +88 -27
- hud/telemetry/exporter.py +171 -11
- hud/telemetry/instrumentation/mcp.py +174 -410
- hud/telemetry/job.py +141 -0
- hud/telemetry/mcp_models.py +13 -74
- hud/telemetry/tests/test_context.py +9 -6
- hud/telemetry/tests/test_trace.py +120 -78
- hud/tools/__init__.py +34 -0
- hud/tools/base.py +65 -0
- hud/tools/bash.py +137 -0
- hud/tools/computer/__init__.py +13 -0
- hud/tools/computer/anthropic.py +411 -0
- hud/tools/computer/hud.py +315 -0
- hud/tools/computer/openai.py +283 -0
- hud/tools/edit.py +290 -0
- hud/tools/executors/__init__.py +30 -0
- hud/tools/executors/base.py +331 -0
- hud/tools/executors/pyautogui.py +619 -0
- hud/tools/executors/tests/__init__.py +1 -0
- hud/tools/executors/tests/test_base_executor.py +338 -0
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
- hud/tools/executors/xdo.py +503 -0
- hud/tools/helper/README.md +56 -0
- hud/tools/helper/__init__.py +9 -0
- hud/tools/helper/mcp_server.py +78 -0
- hud/tools/helper/server_initialization.py +115 -0
- hud/tools/helper/utils.py +58 -0
- hud/tools/playwright_tool.py +379 -0
- hud/tools/tests/__init__.py +3 -0
- hud/tools/tests/test_bash.py +152 -0
- hud/tools/tests/test_computer.py +52 -0
- hud/tools/tests/test_computer_actions.py +34 -0
- hud/tools/tests/test_edit.py +240 -0
- hud/tools/tests/test_init.py +27 -0
- hud/tools/tests/test_playwright_tool.py +183 -0
- hud/tools/tests/test_tools.py +157 -0
- hud/tools/tests/test_utils.py +156 -0
- hud/tools/utils.py +50 -0
- hud/trajectory.py +5 -1
- hud/types.py +10 -1
- hud/utils/tests/test_init.py +21 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
- hud_python-0.3.1.dist-info/RECORD +119 -0
- hud/evaluators/__init__.py +0 -9
- hud/evaluators/base.py +0 -32
- hud/evaluators/inspect.py +0 -24
- hud/evaluators/judge.py +0 -189
- hud/evaluators/match.py +0 -156
- hud/evaluators/remote.py +0 -65
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +0 -12
- hud/evaluators/tests/test_judge.py +0 -231
- hud/evaluators/tests/test_match.py +0 -115
- hud/evaluators/tests/test_remote.py +0 -98
- hud_python-0.2.10.dist-info/RECORD +0 -85
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import base64
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import shlex
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from tempfile import gettempdir
|
|
10
|
+
from typing import Literal
|
|
11
|
+
from uuid import uuid4
|
|
12
|
+
|
|
13
|
+
from hud.tools.base import ToolResult
|
|
14
|
+
from hud.tools.utils import run
|
|
15
|
+
|
|
16
|
+
from .base import BaseExecutor
|
|
17
|
+
|
|
18
|
+
OUTPUT_DIR = os.environ.get("SCREENSHOT_DIR")
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# Map CLA standard keys to X11/XDO key names
|
|
22
|
+
CLA_TO_XDO = {
|
|
23
|
+
"enter": "Return",
|
|
24
|
+
"tab": "Tab",
|
|
25
|
+
"space": "space",
|
|
26
|
+
"backspace": "BackSpace",
|
|
27
|
+
"delete": "Delete",
|
|
28
|
+
"escape": "Escape",
|
|
29
|
+
"esc": "Escape",
|
|
30
|
+
"up": "Up",
|
|
31
|
+
"down": "Down",
|
|
32
|
+
"left": "Left",
|
|
33
|
+
"right": "Right",
|
|
34
|
+
"shift": "Shift_L",
|
|
35
|
+
"shiftleft": "Shift_L",
|
|
36
|
+
"shiftright": "Shift_R",
|
|
37
|
+
"ctrl": "Control_L",
|
|
38
|
+
"ctrlleft": "Control_L",
|
|
39
|
+
"ctrlright": "Control_R",
|
|
40
|
+
"alt": "Alt_L",
|
|
41
|
+
"altleft": "Alt_L",
|
|
42
|
+
"altright": "Alt_R",
|
|
43
|
+
"win": "Super_L",
|
|
44
|
+
"winleft": "Super_L",
|
|
45
|
+
"winright": "Super_R",
|
|
46
|
+
"cmd": "Control_L", # Map cmd to ctrl for Linux
|
|
47
|
+
"command": "Control_L",
|
|
48
|
+
"super": "Super_L",
|
|
49
|
+
"pageup": "Page_Up",
|
|
50
|
+
"pagedown": "Page_Down",
|
|
51
|
+
"home": "Home",
|
|
52
|
+
"end": "End",
|
|
53
|
+
"insert": "Insert",
|
|
54
|
+
"pause": "Pause",
|
|
55
|
+
"capslock": "Caps_Lock",
|
|
56
|
+
"numlock": "Num_Lock",
|
|
57
|
+
"scrolllock": "Scroll_Lock",
|
|
58
|
+
"printscreen": "Print",
|
|
59
|
+
"prtsc": "Print",
|
|
60
|
+
# Function keys
|
|
61
|
+
**{f"f{i}": f"F{i}" for i in range(1, 25)},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class XDOExecutor(BaseExecutor):
|
|
66
|
+
"""
|
|
67
|
+
Low-level executor for xdotool commands.
|
|
68
|
+
Handles display management and screenshot capture on Linux/X11 systems.
|
|
69
|
+
|
|
70
|
+
This executor should only be instantiated when X11 display is available.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, display_num: int | None = None) -> None:
|
|
74
|
+
"""Initialize with optional display number."""
|
|
75
|
+
super().__init__(display_num)
|
|
76
|
+
|
|
77
|
+
if display_num is not None:
|
|
78
|
+
self._display_prefix = f"DISPLAY=:{display_num} "
|
|
79
|
+
else:
|
|
80
|
+
self._display_prefix = ""
|
|
81
|
+
|
|
82
|
+
self.xdotool = f"{self._display_prefix}xdotool"
|
|
83
|
+
logger.info("XDOExecutor initialized")
|
|
84
|
+
|
|
85
|
+
def _map_key(self, key: str) -> str:
|
|
86
|
+
"""Map CLA standard key to XDO key."""
|
|
87
|
+
return CLA_TO_XDO.get(key.lower(), key)
|
|
88
|
+
|
|
89
|
+
def _map_keys(self, keys: list[str]) -> list[str]:
|
|
90
|
+
"""Map CLA standard keys to XDO keys."""
|
|
91
|
+
mapped_keys = []
|
|
92
|
+
for key in keys:
|
|
93
|
+
# Handle key combinations like "ctrl+a"
|
|
94
|
+
if "+" in key:
|
|
95
|
+
parts = key.split("+")
|
|
96
|
+
mapped_parts = [self._map_key(part) for part in parts]
|
|
97
|
+
mapped_keys.append("+".join(mapped_parts))
|
|
98
|
+
else:
|
|
99
|
+
mapped_keys.append(self._map_key(key))
|
|
100
|
+
return mapped_keys
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def is_available(cls) -> bool:
|
|
104
|
+
"""
|
|
105
|
+
Check if xdotool and X11 display are available.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
True if xdotool can be used, False otherwise
|
|
109
|
+
"""
|
|
110
|
+
display = os.environ.get("DISPLAY")
|
|
111
|
+
if not display:
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
# Try a simple xdotool command to test availability
|
|
115
|
+
try:
|
|
116
|
+
import subprocess
|
|
117
|
+
|
|
118
|
+
# Try without display prefix if DISPLAY is already set
|
|
119
|
+
result = subprocess.run( # noqa: S603
|
|
120
|
+
["xdotool", "getdisplaygeometry"], # noqa: S607
|
|
121
|
+
capture_output=True,
|
|
122
|
+
timeout=2,
|
|
123
|
+
)
|
|
124
|
+
return result.returncode == 0
|
|
125
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
async def execute(self, command: str, take_screenshot: bool = True) -> ToolResult:
|
|
129
|
+
"""
|
|
130
|
+
Execute an xdotool command.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
command: The xdotool command (without xdotool prefix)
|
|
134
|
+
take_screenshot: Whether to capture a screenshot after execution
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
ToolResult with output, error, and optional screenshot
|
|
138
|
+
"""
|
|
139
|
+
full_command = f"{self.xdotool} {command}"
|
|
140
|
+
|
|
141
|
+
# Execute command
|
|
142
|
+
returncode, stdout, stderr = await run(full_command)
|
|
143
|
+
|
|
144
|
+
# Prepare result
|
|
145
|
+
result = ToolResult(
|
|
146
|
+
output=stdout if stdout else None, error=stderr if stderr or returncode != 0 else None
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Take screenshot if requested
|
|
150
|
+
if take_screenshot:
|
|
151
|
+
await asyncio.sleep(self._screenshot_delay)
|
|
152
|
+
screenshot = await self.screenshot()
|
|
153
|
+
if screenshot:
|
|
154
|
+
result = ToolResult(
|
|
155
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return result
|
|
159
|
+
|
|
160
|
+
async def screenshot(self) -> str | None:
|
|
161
|
+
"""
|
|
162
|
+
Take a screenshot and return base64 encoded image.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Base64 encoded PNG image or None if failed
|
|
166
|
+
"""
|
|
167
|
+
# Real screenshot using scrot
|
|
168
|
+
if OUTPUT_DIR:
|
|
169
|
+
output_dir = Path(OUTPUT_DIR)
|
|
170
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
171
|
+
screenshot_path = output_dir / f"screenshot_{uuid4().hex}.png"
|
|
172
|
+
else:
|
|
173
|
+
# Generate a unique path in system temp dir without opening a file
|
|
174
|
+
screenshot_path = Path(gettempdir()) / f"screenshot_{uuid4().hex}.png"
|
|
175
|
+
|
|
176
|
+
screenshot_cmd = f"{self._display_prefix}scrot -p {screenshot_path}"
|
|
177
|
+
|
|
178
|
+
returncode, _, stderr = await run(screenshot_cmd)
|
|
179
|
+
|
|
180
|
+
if returncode == 0 and screenshot_path.exists():
|
|
181
|
+
try:
|
|
182
|
+
image_data = screenshot_path.read_bytes()
|
|
183
|
+
# Remove the file unless user requested persistence via env var
|
|
184
|
+
if not OUTPUT_DIR:
|
|
185
|
+
screenshot_path.unlink(missing_ok=True)
|
|
186
|
+
return base64.b64encode(image_data).decode()
|
|
187
|
+
except Exception:
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
# ===== Helper Methods =====
|
|
193
|
+
|
|
194
|
+
async def _hold_keys_context(self, keys: list[str] | None) -> None:
|
|
195
|
+
"""
|
|
196
|
+
Press and hold keys, to be used with try/finally.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
keys: List of keys to hold
|
|
200
|
+
|
|
201
|
+
Example:
|
|
202
|
+
await self._hold_keys_context(['ctrl'])
|
|
203
|
+
try:
|
|
204
|
+
# Do action with ctrl held
|
|
205
|
+
finally:
|
|
206
|
+
await self._release_keys(['ctrl'])
|
|
207
|
+
"""
|
|
208
|
+
if keys:
|
|
209
|
+
for key in keys:
|
|
210
|
+
escaped_key = shlex.quote(key)
|
|
211
|
+
await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
212
|
+
|
|
213
|
+
async def _release_keys(self, keys: list[str] | None) -> None:
|
|
214
|
+
"""Release held keys."""
|
|
215
|
+
if keys:
|
|
216
|
+
for key in reversed(keys): # Release in reverse order
|
|
217
|
+
escaped_key = shlex.quote(key)
|
|
218
|
+
await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
219
|
+
|
|
220
|
+
# ===== CLA Action Implementations =====
|
|
221
|
+
|
|
222
|
+
async def click(
|
|
223
|
+
self,
|
|
224
|
+
x: int | None = None,
|
|
225
|
+
y: int | None = None,
|
|
226
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
227
|
+
pattern: list[int] | None = None,
|
|
228
|
+
hold_keys: list[str] | None = None,
|
|
229
|
+
take_screenshot: bool = True,
|
|
230
|
+
) -> ToolResult:
|
|
231
|
+
"""Click at specified coordinates or current position."""
|
|
232
|
+
# Map button names to xdotool button numbers
|
|
233
|
+
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
234
|
+
button_num = button_map.get(button, 1)
|
|
235
|
+
|
|
236
|
+
# Hold keys if specified
|
|
237
|
+
await self._hold_keys_context(hold_keys)
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
# Handle multi-clicks based on pattern
|
|
241
|
+
if pattern:
|
|
242
|
+
click_count = len(pattern) + 1
|
|
243
|
+
delay = pattern[0] if pattern else 10 # Use first delay for all clicks
|
|
244
|
+
|
|
245
|
+
if x is not None and y is not None:
|
|
246
|
+
cmd = f"mousemove {x} {y} click --repeat {click_count} --delay {delay} {button_num}" # noqa: E501
|
|
247
|
+
else:
|
|
248
|
+
cmd = f"click --repeat {click_count} --delay {delay} {button_num}"
|
|
249
|
+
else:
|
|
250
|
+
# Single click
|
|
251
|
+
if x is not None and y is not None:
|
|
252
|
+
cmd = f"mousemove {x} {y} click {button_num}"
|
|
253
|
+
else:
|
|
254
|
+
cmd = f"click {button_num}"
|
|
255
|
+
|
|
256
|
+
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
257
|
+
finally:
|
|
258
|
+
# Release held keys
|
|
259
|
+
await self._release_keys(hold_keys)
|
|
260
|
+
|
|
261
|
+
return result
|
|
262
|
+
|
|
263
|
+
async def type(
|
|
264
|
+
self, text: str, enter_after: bool = False, delay: int = 12, take_screenshot: bool = True
|
|
265
|
+
) -> ToolResult:
|
|
266
|
+
"""Type text with specified delay between keystrokes."""
|
|
267
|
+
# Escape text for shell
|
|
268
|
+
escaped_text = shlex.quote(text)
|
|
269
|
+
cmd = f"type --delay {delay} -- {escaped_text}"
|
|
270
|
+
result = await self.execute(cmd, take_screenshot=False)
|
|
271
|
+
|
|
272
|
+
if enter_after:
|
|
273
|
+
enter_result = await self.key("Return", take_screenshot=False)
|
|
274
|
+
# Combine outputs
|
|
275
|
+
combined_output = (result.output or "") + "\n" + (enter_result.output or "")
|
|
276
|
+
combined_error = None
|
|
277
|
+
if result.error or enter_result.error:
|
|
278
|
+
combined_error = (result.error or "") + "\n" + (enter_result.error or "")
|
|
279
|
+
result = ToolResult(output=combined_output.strip(), error=combined_error)
|
|
280
|
+
|
|
281
|
+
if take_screenshot:
|
|
282
|
+
screenshot = await self.screenshot()
|
|
283
|
+
if screenshot:
|
|
284
|
+
result = ToolResult(
|
|
285
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return result
|
|
289
|
+
|
|
290
|
+
async def key(self, key_sequence: str, take_screenshot: bool = True) -> ToolResult:
|
|
291
|
+
"""Press a key or key combination."""
|
|
292
|
+
return await self.execute(f"key -- {key_sequence}", take_screenshot=take_screenshot)
|
|
293
|
+
|
|
294
|
+
async def press(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
|
|
295
|
+
"""Press a key combination (hotkey)."""
|
|
296
|
+
# Map CLA keys to XDO keys
|
|
297
|
+
mapped_keys = self._map_keys(keys)
|
|
298
|
+
# Convert list of keys to xdotool format
|
|
299
|
+
key_combo = "+".join(mapped_keys)
|
|
300
|
+
return await self.key(key_combo, take_screenshot=take_screenshot)
|
|
301
|
+
|
|
302
|
+
async def keydown(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
|
|
303
|
+
"""Press and hold keys."""
|
|
304
|
+
# Map CLA keys to XDO keys
|
|
305
|
+
mapped_keys = self._map_keys(keys)
|
|
306
|
+
last_result = None
|
|
307
|
+
for key in mapped_keys:
|
|
308
|
+
escaped_key = shlex.quote(key)
|
|
309
|
+
last_result = await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
310
|
+
|
|
311
|
+
if take_screenshot and last_result:
|
|
312
|
+
screenshot = await self.screenshot()
|
|
313
|
+
if screenshot:
|
|
314
|
+
last_result = ToolResult(
|
|
315
|
+
output=last_result.output, error=last_result.error, base64_image=screenshot
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return last_result or ToolResult()
|
|
319
|
+
|
|
320
|
+
async def keyup(self, keys: list[str], take_screenshot: bool = True) -> ToolResult:
|
|
321
|
+
"""Release held keys."""
|
|
322
|
+
# Map CLA keys to XDO keys
|
|
323
|
+
mapped_keys = self._map_keys(keys)
|
|
324
|
+
last_result = None
|
|
325
|
+
for key in mapped_keys:
|
|
326
|
+
escaped_key = shlex.quote(key)
|
|
327
|
+
last_result = await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
328
|
+
|
|
329
|
+
if take_screenshot and last_result:
|
|
330
|
+
screenshot = await self.screenshot()
|
|
331
|
+
if screenshot:
|
|
332
|
+
last_result = ToolResult(
|
|
333
|
+
output=last_result.output, error=last_result.error, base64_image=screenshot
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return last_result or ToolResult()
|
|
337
|
+
|
|
338
|
+
async def scroll(
|
|
339
|
+
self,
|
|
340
|
+
x: int | None = None,
|
|
341
|
+
y: int | None = None,
|
|
342
|
+
scroll_x: int | None = None,
|
|
343
|
+
scroll_y: int | None = None,
|
|
344
|
+
hold_keys: list[str] | None = None,
|
|
345
|
+
take_screenshot: bool = True,
|
|
346
|
+
) -> ToolResult:
|
|
347
|
+
"""Scroll at specified position."""
|
|
348
|
+
# Convert scroll amounts to xdotool format
|
|
349
|
+
scroll_button_map = {"up": 4, "down": 5, "left": 6, "right": 7}
|
|
350
|
+
|
|
351
|
+
# Hold keys if specified
|
|
352
|
+
await self._hold_keys_context(hold_keys)
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
# Handle vertical scroll
|
|
356
|
+
if scroll_y and scroll_y != 0:
|
|
357
|
+
direction = "down" if scroll_y > 0 else "up"
|
|
358
|
+
amount = abs(scroll_y)
|
|
359
|
+
button = scroll_button_map.get(direction, 5)
|
|
360
|
+
|
|
361
|
+
if x is not None and y is not None:
|
|
362
|
+
cmd = f"mousemove {x} {y} click --repeat {amount} {button}"
|
|
363
|
+
else:
|
|
364
|
+
cmd = f"click --repeat {amount} {button}"
|
|
365
|
+
|
|
366
|
+
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
367
|
+
|
|
368
|
+
# Handle horizontal scroll
|
|
369
|
+
elif scroll_x and scroll_x != 0:
|
|
370
|
+
direction = "right" if scroll_x > 0 else "left"
|
|
371
|
+
amount = abs(scroll_x)
|
|
372
|
+
button = scroll_button_map.get(direction, 7)
|
|
373
|
+
|
|
374
|
+
if x is not None and y is not None:
|
|
375
|
+
cmd = f"mousemove {x} {y} click --repeat {amount} {button}"
|
|
376
|
+
else:
|
|
377
|
+
cmd = f"click --repeat {amount} {button}"
|
|
378
|
+
|
|
379
|
+
result = await self.execute(cmd, take_screenshot=take_screenshot)
|
|
380
|
+
|
|
381
|
+
else:
|
|
382
|
+
result = ToolResult(output="No scroll amount specified")
|
|
383
|
+
finally:
|
|
384
|
+
# Release held keys
|
|
385
|
+
await self._release_keys(hold_keys)
|
|
386
|
+
|
|
387
|
+
return result
|
|
388
|
+
|
|
389
|
+
async def move(
|
|
390
|
+
self,
|
|
391
|
+
x: int | None = None,
|
|
392
|
+
y: int | None = None,
|
|
393
|
+
offset_x: int | None = None,
|
|
394
|
+
offset_y: int | None = None,
|
|
395
|
+
take_screenshot: bool = True,
|
|
396
|
+
) -> ToolResult:
|
|
397
|
+
"""Move mouse cursor."""
|
|
398
|
+
if x is not None and y is not None:
|
|
399
|
+
# Absolute move
|
|
400
|
+
return await self.execute(f"mousemove {x} {y}", take_screenshot=take_screenshot)
|
|
401
|
+
elif offset_x is not None or offset_y is not None:
|
|
402
|
+
# Relative move
|
|
403
|
+
offset_x = offset_x or 0
|
|
404
|
+
offset_y = offset_y or 0
|
|
405
|
+
return await self.execute(
|
|
406
|
+
f"mousemove_relative -- {offset_x} {offset_y}", take_screenshot=take_screenshot
|
|
407
|
+
)
|
|
408
|
+
else:
|
|
409
|
+
return ToolResult(output="No move coordinates specified")
|
|
410
|
+
|
|
411
|
+
async def drag(
|
|
412
|
+
self,
|
|
413
|
+
path: list[tuple[int, int]],
|
|
414
|
+
pattern: list[int] | None = None,
|
|
415
|
+
hold_keys: list[str] | None = None,
|
|
416
|
+
take_screenshot: bool = True,
|
|
417
|
+
) -> ToolResult:
|
|
418
|
+
"""Drag along a path."""
|
|
419
|
+
if len(path) < 2:
|
|
420
|
+
return ToolResult(error="Drag path must have at least 2 points")
|
|
421
|
+
|
|
422
|
+
# Hold keys if specified
|
|
423
|
+
await self._hold_keys_context(hold_keys)
|
|
424
|
+
|
|
425
|
+
try:
|
|
426
|
+
# Start drag
|
|
427
|
+
start_x, start_y = path[0]
|
|
428
|
+
await self.execute(f"mousemove {start_x} {start_y}", take_screenshot=False)
|
|
429
|
+
await self.execute("mousedown 1", take_screenshot=False)
|
|
430
|
+
|
|
431
|
+
# Move through intermediate points
|
|
432
|
+
for i, (x, y) in enumerate(path[1:], 1):
|
|
433
|
+
# Apply delay if pattern is specified
|
|
434
|
+
if pattern and i - 1 < len(pattern):
|
|
435
|
+
await asyncio.sleep(pattern[i - 1] / 1000.0) # Convert ms to seconds
|
|
436
|
+
|
|
437
|
+
await self.execute(f"mousemove {x} {y}", take_screenshot=False)
|
|
438
|
+
|
|
439
|
+
# End drag
|
|
440
|
+
await self.execute("mouseup 1", take_screenshot=False)
|
|
441
|
+
|
|
442
|
+
# Take final screenshot if requested
|
|
443
|
+
if take_screenshot:
|
|
444
|
+
screenshot = await self.screenshot()
|
|
445
|
+
result = ToolResult(
|
|
446
|
+
output=f"Dragged along {len(path)} points", base64_image=screenshot
|
|
447
|
+
)
|
|
448
|
+
else:
|
|
449
|
+
result = ToolResult(output=f"Dragged along {len(path)} points")
|
|
450
|
+
|
|
451
|
+
finally:
|
|
452
|
+
# Release held keys
|
|
453
|
+
await self._release_keys(hold_keys)
|
|
454
|
+
|
|
455
|
+
return result
|
|
456
|
+
|
|
457
|
+
async def mouse_down(
|
|
458
|
+
self,
|
|
459
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
460
|
+
take_screenshot: bool = True,
|
|
461
|
+
) -> ToolResult:
|
|
462
|
+
"""Press and hold a mouse button."""
|
|
463
|
+
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
464
|
+
button_num = button_map.get(button, 1)
|
|
465
|
+
return await self.execute(f"mousedown {button_num}", take_screenshot=take_screenshot)
|
|
466
|
+
|
|
467
|
+
async def mouse_up(
|
|
468
|
+
self,
|
|
469
|
+
button: Literal["left", "right", "middle", "back", "forward"] = "left",
|
|
470
|
+
take_screenshot: bool = True,
|
|
471
|
+
) -> ToolResult:
|
|
472
|
+
"""Release a mouse button."""
|
|
473
|
+
button_map = {"left": 1, "right": 3, "middle": 2, "back": 8, "forward": 9}
|
|
474
|
+
button_num = button_map.get(button, 1)
|
|
475
|
+
return await self.execute(f"mouseup {button_num}", take_screenshot=take_screenshot)
|
|
476
|
+
|
|
477
|
+
async def hold_key(self, key: str, duration: float, take_screenshot: bool = True) -> ToolResult:
|
|
478
|
+
"""Hold a key for a specified duration."""
|
|
479
|
+
# Map CLA key to XDO key
|
|
480
|
+
mapped_key = self._map_key(key)
|
|
481
|
+
escaped_key = shlex.quote(mapped_key)
|
|
482
|
+
|
|
483
|
+
# Press the key
|
|
484
|
+
await self.execute(f"keydown {escaped_key}", take_screenshot=False)
|
|
485
|
+
|
|
486
|
+
# Wait
|
|
487
|
+
await asyncio.sleep(duration)
|
|
488
|
+
|
|
489
|
+
# Release the key
|
|
490
|
+
result = await self.execute(f"keyup {escaped_key}", take_screenshot=False)
|
|
491
|
+
|
|
492
|
+
if take_screenshot:
|
|
493
|
+
screenshot = await self.screenshot()
|
|
494
|
+
if screenshot:
|
|
495
|
+
result = ToolResult(
|
|
496
|
+
output=result.output, error=result.error, base64_image=screenshot
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
return result
|
|
500
|
+
|
|
501
|
+
async def position(self) -> ToolResult:
|
|
502
|
+
"""Get current cursor position."""
|
|
503
|
+
return await self.execute("getmouselocation", take_screenshot=False)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# HUD Helper Package
|
|
2
|
+
|
|
3
|
+
This sub-package bundles utilities that make it trivial to expose HUD
|
|
4
|
+
Python tool classes as **Model Context Protocol (MCP)** tools.
|
|
5
|
+
|
|
6
|
+
## Contents
|
|
7
|
+
|
|
8
|
+
| File | Purpose |
|
|
9
|
+
|------|---------|
|
|
10
|
+
| `utils.py` | `register_instance_tool` – wrap a class instance into a FastMCP tool with auto-generated JSON schema |
|
|
11
|
+
| `mcp_server.py` | CLI server (stdio/HTTP). Tool names: `computer`, `computer_anthropic`, `computer_openai`, `bash`, `edit_file` |
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
### 1 — Run a server (stdio)
|
|
16
|
+
```bash
|
|
17
|
+
python -m hud.tools.helper.mcp_server # exposes all tools on stdio
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### 2 — Run a server (HTTP)
|
|
21
|
+
```bash
|
|
22
|
+
python -m hud.tools.helper.mcp_server http --port 8040 \
|
|
23
|
+
--tools computer bash # expose only two tools
|
|
24
|
+
```
|
|
25
|
+
This starts a Streamable-HTTP MCP server at `http://localhost:8040/mcp`.
|
|
26
|
+
|
|
27
|
+
### 3 — From a client
|
|
28
|
+
```python
|
|
29
|
+
from mcp import ClientSession
|
|
30
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
31
|
+
|
|
32
|
+
async with streamablehttp_client("http://localhost:8040/mcp") as (r, w, _):
|
|
33
|
+
async with ClientSession(r, w) as sess:
|
|
34
|
+
await sess.initialize()
|
|
35
|
+
res = await sess.call_tool("bash", {"command": "echo hi"})
|
|
36
|
+
print(res.content[0].text)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Advanced: registering custom tools
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from mcp.server.fastmcp import FastMCP
|
|
43
|
+
from hud.tools.helper import register_instance_tool
|
|
44
|
+
|
|
45
|
+
class MyTool:
|
|
46
|
+
async def __call__(self, name: str) -> str: # type-hints generate schema!
|
|
47
|
+
return f"Hello {name}!"
|
|
48
|
+
|
|
49
|
+
mcp = FastMCP("Custom")
|
|
50
|
+
register_instance_tool(mcp, "my_tool", MyTool())
|
|
51
|
+
|
|
52
|
+
mcp.run(transport="stdio")
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The helper inspects `MyTool.__call__`, removes `*args/**kwargs`, and FastMCP
|
|
56
|
+
automatically derives an input schema and registers the tool.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Parameterised FastMCP server for HUD tools.
|
|
3
|
+
|
|
4
|
+
Usage
|
|
5
|
+
-----
|
|
6
|
+
Run with default (stdio, all tools):
|
|
7
|
+
|
|
8
|
+
python -m hud.tools.helper.mcp_server
|
|
9
|
+
|
|
10
|
+
Streamable HTTP on :8040 exposing computer + bash only:
|
|
11
|
+
|
|
12
|
+
python -m hud.tools.helper.mcp_server http --tools computer bash
|
|
13
|
+
|
|
14
|
+
Arguments
|
|
15
|
+
~~~~~~~~~
|
|
16
|
+
transport stdio (default) | http
|
|
17
|
+
--tools list of tool names to expose (default = all)
|
|
18
|
+
--port HTTP port (default 8040)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
|
|
25
|
+
from mcp.server.fastmcp import FastMCP
|
|
26
|
+
|
|
27
|
+
from hud.tools.bash import BashTool
|
|
28
|
+
from hud.tools.computer.anthropic import AnthropicComputerTool
|
|
29
|
+
from hud.tools.computer.hud import HudComputerTool
|
|
30
|
+
from hud.tools.computer.openai import OpenAIComputerTool
|
|
31
|
+
from hud.tools.edit import EditTool
|
|
32
|
+
|
|
33
|
+
from .utils import register_instance_tool
|
|
34
|
+
|
|
35
|
+
TOOL_MAP = {
|
|
36
|
+
"computer": HudComputerTool,
|
|
37
|
+
"computer_anthropic": AnthropicComputerTool,
|
|
38
|
+
"computer_openai": OpenAIComputerTool,
|
|
39
|
+
"bash": BashTool,
|
|
40
|
+
"edit_file": EditTool,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_server(
|
|
45
|
+
names: list[str] | None = None,
|
|
46
|
+
*,
|
|
47
|
+
port: int = 8040,
|
|
48
|
+
host: str = "0.0.0.0", # noqa: S104
|
|
49
|
+
) -> FastMCP:
|
|
50
|
+
server = FastMCP("HUD", port=port, host=host)
|
|
51
|
+
selected = names or list(TOOL_MAP.keys())
|
|
52
|
+
|
|
53
|
+
for name in selected:
|
|
54
|
+
cls = TOOL_MAP.get(name)
|
|
55
|
+
if cls is None:
|
|
56
|
+
raise SystemExit(f"Unknown tool '{name}'. Choices: {list(TOOL_MAP)}")
|
|
57
|
+
register_instance_tool(server, name, cls())
|
|
58
|
+
return server
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def main() -> None:
|
|
62
|
+
parser = argparse.ArgumentParser(prog="hud-mcp", description="Run HUD FastMCP server")
|
|
63
|
+
parser.add_argument("transport", nargs="?", choices=["stdio", "http"], default="stdio")
|
|
64
|
+
parser.add_argument("--tools", nargs="*", help="Tool names to expose (default: all)")
|
|
65
|
+
parser.add_argument("--port", type=int, default=8040, help="HTTP port (default 8040)")
|
|
66
|
+
parser.add_argument("--host", type=str, default="0.0.0.0", help="HTTP host (default 0.0.0.0)") # noqa: S104
|
|
67
|
+
args = parser.parse_args()
|
|
68
|
+
|
|
69
|
+
mcp = build_server(args.tools, port=args.port, host=args.host)
|
|
70
|
+
|
|
71
|
+
if args.transport == "http":
|
|
72
|
+
mcp.run(transport="streamable-http")
|
|
73
|
+
else:
|
|
74
|
+
mcp.run(transport="stdio")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
if __name__ == "__main__":
|
|
78
|
+
main()
|