hud-python 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +7 -4
- hud/adapters/common/adapter.py +14 -3
- hud/adapters/common/tests/test_adapter.py +16 -4
- hud/datasets.py +188 -0
- hud/env/docker_client.py +14 -2
- hud/env/local_docker_client.py +28 -6
- hud/gym.py +0 -9
- hud/{mcp_agent → mcp}/__init__.py +2 -0
- hud/mcp/base.py +631 -0
- hud/{mcp_agent → mcp}/claude.py +52 -47
- hud/mcp/client.py +312 -0
- hud/{mcp_agent → mcp}/langchain.py +52 -33
- hud/{mcp_agent → mcp}/openai.py +56 -40
- hud/{mcp_agent → mcp}/tests/test_base.py +129 -54
- hud/mcp/tests/test_claude.py +294 -0
- hud/mcp/tests/test_client.py +324 -0
- hud/mcp/tests/test_openai.py +238 -0
- hud/settings.py +6 -0
- hud/task.py +1 -88
- hud/taskset.py +2 -23
- hud/telemetry/__init__.py +5 -0
- hud/telemetry/_trace.py +180 -17
- hud/telemetry/context.py +79 -0
- hud/telemetry/exporter.py +165 -6
- hud/telemetry/job.py +141 -0
- hud/telemetry/tests/test_trace.py +36 -25
- hud/tools/__init__.py +14 -1
- hud/tools/executors/__init__.py +19 -2
- hud/tools/executors/pyautogui.py +84 -50
- hud/tools/executors/tests/test_pyautogui_executor.py +4 -1
- hud/tools/playwright_tool.py +73 -67
- hud/tools/tests/test_edit.py +8 -1
- hud/tools/tests/test_tools.py +3 -0
- hud/trajectory.py +5 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/METADATA +20 -14
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/RECORD +41 -46
- hud/evaluators/__init__.py +0 -9
- hud/evaluators/base.py +0 -32
- hud/evaluators/inspect.py +0 -24
- hud/evaluators/judge.py +0 -189
- hud/evaluators/match.py +0 -156
- hud/evaluators/remote.py +0 -65
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +0 -12
- hud/evaluators/tests/test_judge.py +0 -231
- hud/evaluators/tests/test_match.py +0 -115
- hud/evaluators/tests/test_remote.py +0 -98
- hud/mcp_agent/base.py +0 -723
- /hud/{mcp_agent → mcp}/tests/__init__.py +0 -0
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/tools/executors/pyautogui.py
CHANGED
|
@@ -5,28 +5,56 @@ import base64
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
from io import BytesIO
|
|
8
|
-
from typing import Literal
|
|
8
|
+
from typing import Any, Literal
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
try:
|
|
12
|
-
from hud.settings import settings
|
|
10
|
+
from hud.tools.base import ToolResult
|
|
13
11
|
|
|
14
|
-
|
|
15
|
-
except (ImportError, AttributeError):
|
|
16
|
-
os.environ["DISPLAY"] = ":0"
|
|
12
|
+
from .base import BaseExecutor
|
|
17
13
|
|
|
18
|
-
|
|
19
|
-
import pyautogui
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
20
15
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
# Lazy loading for pyautogui
|
|
17
|
+
_pyautogui = None
|
|
18
|
+
_pyautogui_available = None
|
|
24
19
|
|
|
25
|
-
from hud.tools.base import ToolResult
|
|
26
20
|
|
|
27
|
-
|
|
21
|
+
def _get_pyautogui() -> Any | None:
|
|
22
|
+
"""Lazily import and return pyautogui module."""
|
|
23
|
+
global _pyautogui, _pyautogui_available
|
|
24
|
+
|
|
25
|
+
if _pyautogui_available is False:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
if _pyautogui is None:
|
|
29
|
+
# Set display if not already set
|
|
30
|
+
if "DISPLAY" not in os.environ:
|
|
31
|
+
try:
|
|
32
|
+
from hud.settings import settings
|
|
33
|
+
|
|
34
|
+
os.environ["DISPLAY"] = settings.display
|
|
35
|
+
except (ImportError, AttributeError):
|
|
36
|
+
os.environ["DISPLAY"] = ":0"
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
import pyautogui
|
|
40
|
+
|
|
41
|
+
_pyautogui = pyautogui
|
|
42
|
+
_pyautogui_available = True
|
|
43
|
+
|
|
44
|
+
# Configure PyAutoGUI settings
|
|
45
|
+
_pyautogui.FAILSAFE = False # Disable fail-safe feature
|
|
46
|
+
_pyautogui.PAUSE = 0.1 # Small pause between actions
|
|
47
|
+
except ImportError:
|
|
48
|
+
_pyautogui_available = False
|
|
49
|
+
logger.warning("PyAutoGUI is not available")
|
|
50
|
+
return None
|
|
51
|
+
except Exception as e:
|
|
52
|
+
_pyautogui_available = False
|
|
53
|
+
logger.warning("Failed to initialize PyAutoGUI: %s", e)
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
return _pyautogui
|
|
28
57
|
|
|
29
|
-
logger = logging.getLogger(__name__)
|
|
30
58
|
|
|
31
59
|
# Map CLA standard keys to PyAutoGUI keys (only where they differ)
|
|
32
60
|
CLA_TO_PYAUTOGUI = {
|
|
@@ -58,12 +86,17 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
58
86
|
display_num: X display number (used only on Linux, ignored on Windows/macOS)
|
|
59
87
|
"""
|
|
60
88
|
super().__init__(display_num)
|
|
61
|
-
|
|
89
|
+
self._pyautogui = None
|
|
62
90
|
logger.info("PyAutoGUIExecutor initialized")
|
|
63
91
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
pyautogui
|
|
92
|
+
@property
|
|
93
|
+
def pyautogui(self) -> Any:
|
|
94
|
+
"""Get the pyautogui module, importing it lazily if needed."""
|
|
95
|
+
if self._pyautogui is None:
|
|
96
|
+
self._pyautogui = _get_pyautogui()
|
|
97
|
+
if self._pyautogui is None:
|
|
98
|
+
raise RuntimeError("PyAutoGUI is not available")
|
|
99
|
+
return self._pyautogui
|
|
67
100
|
|
|
68
101
|
def _map_key(self, key: str) -> str:
|
|
69
102
|
"""Map CLA standard key to PyAutoGUI key."""
|
|
@@ -90,7 +123,8 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
90
123
|
Returns:
|
|
91
124
|
True if PyAutoGUI is available and functional, False otherwise
|
|
92
125
|
"""
|
|
93
|
-
|
|
126
|
+
pyautogui = _get_pyautogui()
|
|
127
|
+
if not pyautogui:
|
|
94
128
|
return False
|
|
95
129
|
|
|
96
130
|
try:
|
|
@@ -109,7 +143,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
109
143
|
"""
|
|
110
144
|
try:
|
|
111
145
|
# Take screenshot using PyAutoGUI
|
|
112
|
-
screenshot = pyautogui.screenshot()
|
|
146
|
+
screenshot = self.pyautogui.screenshot()
|
|
113
147
|
|
|
114
148
|
# Convert to base64
|
|
115
149
|
buffer = BytesIO()
|
|
@@ -131,13 +165,13 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
131
165
|
"""
|
|
132
166
|
if keys:
|
|
133
167
|
for key in keys:
|
|
134
|
-
pyautogui.keyDown(key)
|
|
168
|
+
self.pyautogui.keyDown(key)
|
|
135
169
|
|
|
136
170
|
def _release_keys(self, keys: list[str] | None) -> None:
|
|
137
171
|
"""Release held keys."""
|
|
138
172
|
if keys:
|
|
139
173
|
for key in reversed(keys): # Release in reverse order
|
|
140
|
-
pyautogui.keyUp(key)
|
|
174
|
+
self.pyautogui.keyUp(key)
|
|
141
175
|
|
|
142
176
|
# ===== CLA Action Implementations =====
|
|
143
177
|
|
|
@@ -172,17 +206,17 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
172
206
|
interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
|
|
173
207
|
|
|
174
208
|
if x is not None and y is not None:
|
|
175
|
-
pyautogui.click(
|
|
209
|
+
self.pyautogui.click(
|
|
176
210
|
x=x, y=y, clicks=clicks, interval=interval, button=button_name
|
|
177
211
|
)
|
|
178
212
|
else:
|
|
179
|
-
pyautogui.click(clicks=clicks, interval=interval, button=button_name)
|
|
213
|
+
self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
|
|
180
214
|
else:
|
|
181
215
|
# Single click
|
|
182
216
|
if x is not None and y is not None:
|
|
183
|
-
pyautogui.click(x=x, y=y, button=button_name)
|
|
217
|
+
self.pyautogui.click(x=x, y=y, button=button_name)
|
|
184
218
|
else:
|
|
185
|
-
pyautogui.click(button=button_name)
|
|
219
|
+
self.pyautogui.click(button=button_name)
|
|
186
220
|
finally:
|
|
187
221
|
# Release held keys
|
|
188
222
|
self._release_keys(hold_keys)
|
|
@@ -210,10 +244,10 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
210
244
|
try:
|
|
211
245
|
# Convert delay from milliseconds to seconds for PyAutoGUI
|
|
212
246
|
interval = delay / 1000.0
|
|
213
|
-
pyautogui.typewrite(text, interval=interval)
|
|
247
|
+
self.pyautogui.typewrite(text, interval=interval)
|
|
214
248
|
|
|
215
249
|
if enter_after:
|
|
216
|
-
pyautogui.press("enter")
|
|
250
|
+
self.pyautogui.press("enter")
|
|
217
251
|
|
|
218
252
|
result = ToolResult(
|
|
219
253
|
output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
|
|
@@ -237,12 +271,12 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
237
271
|
# Handle key combinations (e.g., "ctrl+c")
|
|
238
272
|
if "+" in key_sequence:
|
|
239
273
|
keys = key_sequence.split("+")
|
|
240
|
-
pyautogui.hotkey(*keys)
|
|
274
|
+
self.pyautogui.hotkey(*keys)
|
|
241
275
|
result = ToolResult(output=f"Pressed hotkey: {key_sequence}")
|
|
242
276
|
else:
|
|
243
277
|
# Map common key names from xdotool to PyAutoGUI
|
|
244
278
|
key = key_sequence.lower()
|
|
245
|
-
pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
|
|
279
|
+
self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
|
|
246
280
|
result = ToolResult(output=f"Pressed key: {key_sequence}")
|
|
247
281
|
|
|
248
282
|
if take_screenshot:
|
|
@@ -265,7 +299,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
265
299
|
|
|
266
300
|
# Handle single key or combination
|
|
267
301
|
if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
|
|
268
|
-
pyautogui.press(mapped_keys[0])
|
|
302
|
+
self.pyautogui.press(mapped_keys[0])
|
|
269
303
|
result = ToolResult(output=f"Pressed key: {keys[0]}")
|
|
270
304
|
else:
|
|
271
305
|
# For combinations, use hotkey
|
|
@@ -275,7 +309,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
275
309
|
hotkey_parts.extend(key.split("+"))
|
|
276
310
|
else:
|
|
277
311
|
hotkey_parts.append(key)
|
|
278
|
-
pyautogui.hotkey(*hotkey_parts)
|
|
312
|
+
self.pyautogui.hotkey(*hotkey_parts)
|
|
279
313
|
result = ToolResult(output=f"Pressed hotkey: {'+'.join(keys)}")
|
|
280
314
|
|
|
281
315
|
if take_screenshot:
|
|
@@ -296,7 +330,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
296
330
|
# Map CLA keys to PyAutoGUI keys
|
|
297
331
|
mapped_keys = self._map_keys(keys)
|
|
298
332
|
for key in mapped_keys:
|
|
299
|
-
pyautogui.keyDown(key)
|
|
333
|
+
self.pyautogui.keyDown(key)
|
|
300
334
|
|
|
301
335
|
result = ToolResult(output=f"Keys down: {', '.join(keys)}")
|
|
302
336
|
|
|
@@ -318,7 +352,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
318
352
|
# Map CLA keys to PyAutoGUI keys
|
|
319
353
|
mapped_keys = self._map_keys(keys)
|
|
320
354
|
for key in reversed(mapped_keys): # Release in reverse order
|
|
321
|
-
pyautogui.keyUp(key)
|
|
355
|
+
self.pyautogui.keyUp(key)
|
|
322
356
|
|
|
323
357
|
result = ToolResult(output=f"Keys up: {', '.join(keys)}")
|
|
324
358
|
|
|
@@ -347,7 +381,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
347
381
|
try:
|
|
348
382
|
# Move to position if specified
|
|
349
383
|
if x is not None and y is not None:
|
|
350
|
-
pyautogui.moveTo(x, y)
|
|
384
|
+
self.pyautogui.moveTo(x, y)
|
|
351
385
|
|
|
352
386
|
# Hold keys if specified
|
|
353
387
|
self._hold_keys_context(hold_keys)
|
|
@@ -358,14 +392,14 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
358
392
|
# Perform vertical scroll
|
|
359
393
|
if scroll_y and scroll_y != 0:
|
|
360
394
|
# PyAutoGUI: positive = up, negative = down (opposite of our convention)
|
|
361
|
-
pyautogui.scroll(-scroll_y)
|
|
395
|
+
self.pyautogui.scroll(-scroll_y)
|
|
362
396
|
msg_parts.append(f"vertically by {scroll_y}")
|
|
363
397
|
|
|
364
398
|
# Perform horizontal scroll (if supported)
|
|
365
399
|
if scroll_x and scroll_x != 0:
|
|
366
400
|
# PyAutoGUI horizontal scroll might not work on all platforms
|
|
367
401
|
try:
|
|
368
|
-
pyautogui.hscroll(scroll_x)
|
|
402
|
+
self.pyautogui.hscroll(scroll_x)
|
|
369
403
|
msg_parts.append(f"horizontally by {scroll_x}")
|
|
370
404
|
except AttributeError:
|
|
371
405
|
# hscroll not available
|
|
@@ -409,13 +443,13 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
409
443
|
try:
|
|
410
444
|
if x is not None and y is not None:
|
|
411
445
|
# Absolute move
|
|
412
|
-
pyautogui.moveTo(x, y, duration=0.1)
|
|
446
|
+
self.pyautogui.moveTo(x, y, duration=0.1)
|
|
413
447
|
result = ToolResult(output=f"Moved mouse to ({x}, {y})")
|
|
414
448
|
elif offset_x is not None or offset_y is not None:
|
|
415
449
|
# Relative move
|
|
416
450
|
offset_x = offset_x or 0
|
|
417
451
|
offset_y = offset_y or 0
|
|
418
|
-
pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
|
|
452
|
+
self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
|
|
419
453
|
result = ToolResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
|
|
420
454
|
else:
|
|
421
455
|
return ToolResult(output="No move coordinates specified")
|
|
@@ -450,25 +484,25 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
450
484
|
try:
|
|
451
485
|
# Move to start
|
|
452
486
|
start_x, start_y = path[0]
|
|
453
|
-
pyautogui.moveTo(start_x, start_y)
|
|
487
|
+
self.pyautogui.moveTo(start_x, start_y)
|
|
454
488
|
|
|
455
489
|
# Handle multi-point drag
|
|
456
490
|
if len(path) == 2:
|
|
457
491
|
# Simple drag
|
|
458
492
|
end_x, end_y = path[1]
|
|
459
|
-
pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
|
|
493
|
+
self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
|
|
460
494
|
result = ToolResult(
|
|
461
495
|
output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
|
|
462
496
|
)
|
|
463
497
|
else:
|
|
464
498
|
# Multi-point drag
|
|
465
|
-
pyautogui.mouseDown(button="left")
|
|
499
|
+
self.pyautogui.mouseDown(button="left")
|
|
466
500
|
for i, (x, y) in enumerate(path[1:], 1):
|
|
467
501
|
duration = 0.1
|
|
468
502
|
if pattern and i - 1 < len(pattern):
|
|
469
503
|
duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
|
|
470
|
-
pyautogui.moveTo(x, y, duration=duration)
|
|
471
|
-
pyautogui.mouseUp(button="left")
|
|
504
|
+
self.pyautogui.moveTo(x, y, duration=duration)
|
|
505
|
+
self.pyautogui.mouseUp(button="left")
|
|
472
506
|
|
|
473
507
|
result = ToolResult(output=f"Dragged along {len(path)} points")
|
|
474
508
|
|
|
@@ -507,7 +541,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
507
541
|
} # Fallback for unsupported
|
|
508
542
|
button_name = button_map.get(button, "left")
|
|
509
543
|
|
|
510
|
-
pyautogui.mouseDown(button=button_name)
|
|
544
|
+
self.pyautogui.mouseDown(button=button_name)
|
|
511
545
|
result = ToolResult(output=f"Mouse down: {button} button")
|
|
512
546
|
|
|
513
547
|
if take_screenshot:
|
|
@@ -539,7 +573,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
539
573
|
} # Fallback for unsupported
|
|
540
574
|
button_name = button_map.get(button, "left")
|
|
541
575
|
|
|
542
|
-
pyautogui.mouseUp(button=button_name)
|
|
576
|
+
self.pyautogui.mouseUp(button=button_name)
|
|
543
577
|
result = ToolResult(output=f"Mouse up: {button} button")
|
|
544
578
|
|
|
545
579
|
if take_screenshot:
|
|
@@ -559,9 +593,9 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
559
593
|
try:
|
|
560
594
|
# Map CLA key to PyAutoGUI key
|
|
561
595
|
mapped_key = self._map_key(key)
|
|
562
|
-
pyautogui.keyDown(mapped_key)
|
|
596
|
+
self.pyautogui.keyDown(mapped_key)
|
|
563
597
|
await asyncio.sleep(duration)
|
|
564
|
-
pyautogui.keyUp(mapped_key)
|
|
598
|
+
self.pyautogui.keyUp(mapped_key)
|
|
565
599
|
|
|
566
600
|
result = ToolResult(output=f"Held key '{key}' for {duration} seconds")
|
|
567
601
|
|
|
@@ -579,7 +613,7 @@ class PyAutoGUIExecutor(BaseExecutor):
|
|
|
579
613
|
async def position(self) -> ToolResult:
|
|
580
614
|
"""Get current cursor position."""
|
|
581
615
|
try:
|
|
582
|
-
x, y = pyautogui.position()
|
|
616
|
+
x, y = self.pyautogui.position()
|
|
583
617
|
return ToolResult(output=f"Mouse position: ({x}, {y})")
|
|
584
618
|
except Exception as e:
|
|
585
619
|
return ToolResult(error=str(e))
|
|
@@ -7,7 +7,10 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
7
7
|
import pytest
|
|
8
8
|
|
|
9
9
|
from hud.tools.base import ToolResult
|
|
10
|
-
from hud.tools.executors.pyautogui import
|
|
10
|
+
from hud.tools.executors.pyautogui import PyAutoGUIExecutor
|
|
11
|
+
|
|
12
|
+
# Check if pyautogui is available for test skipping
|
|
13
|
+
PYAUTOGUI_AVAILABLE = PyAutoGUIExecutor.is_available()
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class TestPyAutoGUIExecutor:
|
hud/tools/playwright_tool.py
CHANGED
|
@@ -21,8 +21,9 @@ logger = logging.getLogger(__name__)
|
|
|
21
21
|
class PlaywrightTool:
|
|
22
22
|
"""Playwright tool for web automation."""
|
|
23
23
|
|
|
24
|
-
def __init__(self) -> None:
|
|
24
|
+
def __init__(self, cdp_url: str | None = None) -> None:
|
|
25
25
|
super().__init__()
|
|
26
|
+
self._cdp_url = cdp_url
|
|
26
27
|
self._playwright = None
|
|
27
28
|
self._browser: Browser | None = None
|
|
28
29
|
self._context: BrowserContext | None = None
|
|
@@ -46,9 +47,6 @@ class PlaywrightTool:
|
|
|
46
47
|
None, description="CSS selector for element (for click, type, wait_for_element actions)"
|
|
47
48
|
),
|
|
48
49
|
text: str | None = Field(None, description="Text to type (for type action)"),
|
|
49
|
-
path: str | None = Field(
|
|
50
|
-
None, description="File path to save screenshot (for screenshot action)"
|
|
51
|
-
),
|
|
52
50
|
wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
|
53
51
|
| None = Field(
|
|
54
52
|
None,
|
|
@@ -74,7 +72,7 @@ class PlaywrightTool:
|
|
|
74
72
|
result = await self.navigate(url, wait_for_load_state or "networkidle")
|
|
75
73
|
|
|
76
74
|
elif action == "screenshot":
|
|
77
|
-
result = await self.screenshot(
|
|
75
|
+
result = await self.screenshot()
|
|
78
76
|
|
|
79
77
|
elif action == "click":
|
|
80
78
|
if selector is None:
|
|
@@ -119,17 +117,13 @@ class PlaywrightTool:
|
|
|
119
117
|
# Convert dict result to ToolResult
|
|
120
118
|
if isinstance(result, dict):
|
|
121
119
|
if result.get("success"):
|
|
122
|
-
|
|
123
|
-
# Return screenshot as image content
|
|
124
|
-
tool_result = ToolResult(
|
|
125
|
-
output=result.get("message", ""), base64_image=result["screenshot"]
|
|
126
|
-
)
|
|
127
|
-
else:
|
|
128
|
-
tool_result = ToolResult(output=result.get("message", ""))
|
|
120
|
+
tool_result = ToolResult(output=result.get("message", ""))
|
|
129
121
|
else:
|
|
130
122
|
tool_result = ToolResult(error=result.get("error", "Unknown error"))
|
|
131
|
-
|
|
123
|
+
elif isinstance(result, ToolResult):
|
|
132
124
|
tool_result = result
|
|
125
|
+
else:
|
|
126
|
+
tool_result = ToolResult(output=str(result))
|
|
133
127
|
|
|
134
128
|
# Convert result to content blocks
|
|
135
129
|
return tool_result_to_content_blocks(tool_result)
|
|
@@ -143,10 +137,14 @@ class PlaywrightTool:
|
|
|
143
137
|
async def _ensure_browser(self) -> None:
|
|
144
138
|
"""Ensure browser is launched and ready."""
|
|
145
139
|
if self._browser is None or not self._browser.is_connected():
|
|
146
|
-
|
|
140
|
+
if self._cdp_url:
|
|
141
|
+
logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
|
|
142
|
+
else:
|
|
143
|
+
logger.info("Launching Playwright browser...")
|
|
147
144
|
|
|
148
|
-
# Ensure DISPLAY is set
|
|
149
|
-
|
|
145
|
+
# Ensure DISPLAY is set (only needed for local browser)
|
|
146
|
+
if not self._cdp_url:
|
|
147
|
+
os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
|
|
150
148
|
|
|
151
149
|
if self._playwright is None:
|
|
152
150
|
try:
|
|
@@ -158,37 +156,56 @@ class PlaywrightTool:
|
|
|
158
156
|
"Playwright is not installed. Please install with: pip install playwright"
|
|
159
157
|
) from None
|
|
160
158
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
"
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
159
|
+
# Connect via CDP URL or launch local browser
|
|
160
|
+
if self._cdp_url:
|
|
161
|
+
# Connect to remote browser via CDP
|
|
162
|
+
self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
|
|
163
|
+
|
|
164
|
+
if self._browser is None:
|
|
165
|
+
raise RuntimeError("Failed to connect to remote browser")
|
|
166
|
+
|
|
167
|
+
# Use existing context or create new one
|
|
168
|
+
contexts = self._browser.contexts
|
|
169
|
+
if contexts:
|
|
170
|
+
self._context = contexts[0]
|
|
171
|
+
else:
|
|
172
|
+
self._context = await self._browser.new_context(
|
|
173
|
+
viewport={"width": 1920, "height": 1080},
|
|
174
|
+
ignore_https_errors=True,
|
|
175
|
+
)
|
|
176
|
+
else:
|
|
177
|
+
# Launch local browser
|
|
178
|
+
self._browser = await self._playwright.chromium.launch(
|
|
179
|
+
headless=False,
|
|
180
|
+
args=[
|
|
181
|
+
"--no-sandbox",
|
|
182
|
+
"--disable-dev-shm-usage",
|
|
183
|
+
"--disable-gpu",
|
|
184
|
+
"--disable-web-security",
|
|
185
|
+
"--disable-features=IsolateOrigins,site-per-process",
|
|
186
|
+
"--disable-blink-features=AutomationControlled",
|
|
187
|
+
"--window-size=1920,1080",
|
|
188
|
+
"--window-position=0,0",
|
|
189
|
+
"--start-maximized",
|
|
190
|
+
"--disable-background-timer-throttling",
|
|
191
|
+
"--disable-backgrounding-occluded-windows",
|
|
192
|
+
"--disable-renderer-backgrounding",
|
|
193
|
+
"--disable-features=TranslateUI",
|
|
194
|
+
"--disable-ipc-flooding-protection",
|
|
195
|
+
"--disable-default-apps",
|
|
196
|
+
"--no-first-run",
|
|
197
|
+
"--disable-sync",
|
|
198
|
+
"--no-default-browser-check",
|
|
199
|
+
],
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
if self._browser is None:
|
|
203
|
+
raise RuntimeError("Browser failed to initialize")
|
|
204
|
+
|
|
205
|
+
self._context = await self._browser.new_context(
|
|
206
|
+
viewport={"width": 1920, "height": 1080},
|
|
207
|
+
ignore_https_errors=True,
|
|
208
|
+
)
|
|
192
209
|
|
|
193
210
|
if self._context is None:
|
|
194
211
|
raise RuntimeError("Browser context failed to initialize")
|
|
@@ -234,35 +251,24 @@ class PlaywrightTool:
|
|
|
234
251
|
"message": f"Failed to navigate to {url}: {e}",
|
|
235
252
|
}
|
|
236
253
|
|
|
237
|
-
async def screenshot(self
|
|
254
|
+
async def screenshot(self) -> ToolResult:
|
|
238
255
|
"""Take a screenshot of the current page.
|
|
239
256
|
|
|
240
|
-
Args:
|
|
241
|
-
path: Optional path to save screenshot
|
|
242
|
-
|
|
243
257
|
Returns:
|
|
244
|
-
|
|
258
|
+
ToolResult with base64_image
|
|
245
259
|
"""
|
|
246
260
|
await self._ensure_browser()
|
|
247
261
|
|
|
248
262
|
try:
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
import base64
|
|
256
|
-
|
|
257
|
-
screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
|
|
258
|
-
return {
|
|
259
|
-
"success": True,
|
|
260
|
-
"screenshot": screenshot_b64,
|
|
261
|
-
"message": "Screenshot captured",
|
|
262
|
-
}
|
|
263
|
+
# Always return base64 encoded screenshot as ToolResult
|
|
264
|
+
screenshot_bytes = await self.page.screenshot(full_page=True)
|
|
265
|
+
import base64
|
|
266
|
+
|
|
267
|
+
screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
|
|
268
|
+
return ToolResult(base64_image=screenshot_b64)
|
|
263
269
|
except Exception as e:
|
|
264
270
|
logger.error("Screenshot failed: %s", e)
|
|
265
|
-
return
|
|
271
|
+
return ToolResult(error=f"Failed to take screenshot: {e}")
|
|
266
272
|
|
|
267
273
|
async def click(self, selector: str) -> dict[str, Any]:
|
|
268
274
|
"""Click an element by selector.
|
hud/tools/tests/test_edit.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
+
import sys
|
|
6
7
|
import tempfile
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from unittest.mock import AsyncMock, patch
|
|
@@ -37,8 +38,14 @@ class TestEditTool:
|
|
|
37
38
|
"""Test validate_path when file doesn't exist for non-create commands."""
|
|
38
39
|
tool = EditTool()
|
|
39
40
|
|
|
41
|
+
# Use a platform-appropriate absolute path
|
|
42
|
+
if sys.platform == "win32":
|
|
43
|
+
nonexistent_path = Path("C:\\nonexistent\\file.txt")
|
|
44
|
+
else:
|
|
45
|
+
nonexistent_path = Path("/nonexistent/file.txt")
|
|
46
|
+
|
|
40
47
|
with pytest.raises(ToolError) as exc_info:
|
|
41
|
-
tool.validate_path("view",
|
|
48
|
+
tool.validate_path("view", nonexistent_path)
|
|
42
49
|
|
|
43
50
|
assert "does not exist" in str(exc_info.value)
|
|
44
51
|
|
hud/tools/tests/test_tools.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import inspect
|
|
5
|
+
import sys
|
|
5
6
|
|
|
6
7
|
import pytest
|
|
7
8
|
from mcp.types import ImageContent, TextContent
|
|
@@ -72,6 +73,7 @@ async def test_bash_tool_restart_and_no_command():
|
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
@pytest.mark.asyncio
|
|
76
|
+
@pytest.mark.skipif(sys.platform == "win32", reason="EditTool uses Unix commands")
|
|
75
77
|
async def test_edit_tool_flow(tmp_path):
|
|
76
78
|
file_path = tmp_path / "demo.txt"
|
|
77
79
|
|
|
@@ -106,6 +108,7 @@ async def test_base_executor_simulation():
|
|
|
106
108
|
|
|
107
109
|
|
|
108
110
|
@pytest.mark.asyncio
|
|
111
|
+
@pytest.mark.skipif(sys.platform == "win32", reason="EditTool uses Unix commands")
|
|
109
112
|
async def test_edit_tool_view(tmp_path):
|
|
110
113
|
# Create a temporary file
|
|
111
114
|
p = tmp_path / "sample.txt"
|
hud/trajectory.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
import datetime
|
|
5
5
|
|
|
6
|
-
from IPython.display import HTML, Markdown, display
|
|
7
6
|
from pydantic import BaseModel, Field
|
|
8
7
|
|
|
9
8
|
from .adapters.common.types import LogType
|
|
@@ -30,6 +29,11 @@ class Trajectory(BaseModel):
|
|
|
30
29
|
trajectory: list[TrajectoryStep] = Field(default_factory=list)
|
|
31
30
|
|
|
32
31
|
def display(self) -> None:
|
|
32
|
+
try:
|
|
33
|
+
from IPython.display import HTML, Markdown, display
|
|
34
|
+
except ImportError:
|
|
35
|
+
raise ImportError("IPython is required for trajectory display") from None
|
|
36
|
+
|
|
33
37
|
trajectory_start_timestamp_str = self.trajectory[0].start_timestamp
|
|
34
38
|
t_start_dt = (
|
|
35
39
|
datetime.datetime.fromisoformat(trajectory_start_timestamp_str.replace("Z", "+00:00"))
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED