hud-python 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (53) hide show
  1. hud/__init__.py +7 -4
  2. hud/adapters/common/adapter.py +14 -3
  3. hud/adapters/common/tests/test_adapter.py +16 -4
  4. hud/datasets.py +188 -0
  5. hud/env/docker_client.py +14 -2
  6. hud/env/local_docker_client.py +28 -6
  7. hud/gym.py +0 -9
  8. hud/{mcp_agent → mcp}/__init__.py +2 -0
  9. hud/mcp/base.py +631 -0
  10. hud/{mcp_agent → mcp}/claude.py +52 -47
  11. hud/mcp/client.py +312 -0
  12. hud/{mcp_agent → mcp}/langchain.py +52 -33
  13. hud/{mcp_agent → mcp}/openai.py +56 -40
  14. hud/{mcp_agent → mcp}/tests/test_base.py +129 -54
  15. hud/mcp/tests/test_claude.py +294 -0
  16. hud/mcp/tests/test_client.py +324 -0
  17. hud/mcp/tests/test_openai.py +238 -0
  18. hud/settings.py +6 -0
  19. hud/task.py +1 -88
  20. hud/taskset.py +2 -23
  21. hud/telemetry/__init__.py +5 -0
  22. hud/telemetry/_trace.py +180 -17
  23. hud/telemetry/context.py +79 -0
  24. hud/telemetry/exporter.py +165 -6
  25. hud/telemetry/job.py +141 -0
  26. hud/telemetry/tests/test_trace.py +36 -25
  27. hud/tools/__init__.py +14 -1
  28. hud/tools/executors/__init__.py +19 -2
  29. hud/tools/executors/pyautogui.py +84 -50
  30. hud/tools/executors/tests/test_pyautogui_executor.py +4 -1
  31. hud/tools/playwright_tool.py +73 -67
  32. hud/tools/tests/test_edit.py +8 -1
  33. hud/tools/tests/test_tools.py +3 -0
  34. hud/trajectory.py +5 -1
  35. hud/utils/tests/test_version.py +1 -1
  36. hud/version.py +1 -1
  37. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/METADATA +20 -14
  38. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/RECORD +41 -46
  39. hud/evaluators/__init__.py +0 -9
  40. hud/evaluators/base.py +0 -32
  41. hud/evaluators/inspect.py +0 -24
  42. hud/evaluators/judge.py +0 -189
  43. hud/evaluators/match.py +0 -156
  44. hud/evaluators/remote.py +0 -65
  45. hud/evaluators/tests/__init__.py +0 -0
  46. hud/evaluators/tests/test_inspect.py +0 -12
  47. hud/evaluators/tests/test_judge.py +0 -231
  48. hud/evaluators/tests/test_match.py +0 -115
  49. hud/evaluators/tests/test_remote.py +0 -98
  50. hud/mcp_agent/base.py +0 -723
  51. /hud/{mcp_agent → mcp}/tests/__init__.py +0 -0
  52. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
  53. {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -5,28 +5,56 @@ import base64
5
5
  import logging
6
6
  import os
7
7
  from io import BytesIO
8
- from typing import Literal
8
+ from typing import Any, Literal
9
9
 
10
- if "DISPLAY" not in os.environ:
11
- try:
12
- from hud.settings import settings
10
+ from hud.tools.base import ToolResult
13
11
 
14
- os.environ["DISPLAY"] = settings.display
15
- except (ImportError, AttributeError):
16
- os.environ["DISPLAY"] = ":0"
12
+ from .base import BaseExecutor
17
13
 
18
- try:
19
- import pyautogui
14
+ logger = logging.getLogger(__name__)
20
15
 
21
- PYAUTOGUI_AVAILABLE = True
22
- except ImportError:
23
- PYAUTOGUI_AVAILABLE = False
16
+ # Lazy loading for pyautogui
17
+ _pyautogui = None
18
+ _pyautogui_available = None
24
19
 
25
- from hud.tools.base import ToolResult
26
20
 
27
- from .base import BaseExecutor
21
+ def _get_pyautogui() -> Any | None:
22
+ """Lazily import and return pyautogui module."""
23
+ global _pyautogui, _pyautogui_available
24
+
25
+ if _pyautogui_available is False:
26
+ return None
27
+
28
+ if _pyautogui is None:
29
+ # Set display if not already set
30
+ if "DISPLAY" not in os.environ:
31
+ try:
32
+ from hud.settings import settings
33
+
34
+ os.environ["DISPLAY"] = settings.display
35
+ except (ImportError, AttributeError):
36
+ os.environ["DISPLAY"] = ":0"
37
+
38
+ try:
39
+ import pyautogui
40
+
41
+ _pyautogui = pyautogui
42
+ _pyautogui_available = True
43
+
44
+ # Configure PyAutoGUI settings
45
+ _pyautogui.FAILSAFE = False # Disable fail-safe feature
46
+ _pyautogui.PAUSE = 0.1 # Small pause between actions
47
+ except ImportError:
48
+ _pyautogui_available = False
49
+ logger.warning("PyAutoGUI is not available")
50
+ return None
51
+ except Exception as e:
52
+ _pyautogui_available = False
53
+ logger.warning("Failed to initialize PyAutoGUI: %s", e)
54
+ return None
55
+
56
+ return _pyautogui
28
57
 
29
- logger = logging.getLogger(__name__)
30
58
 
31
59
  # Map CLA standard keys to PyAutoGUI keys (only where they differ)
32
60
  CLA_TO_PYAUTOGUI = {
@@ -58,12 +86,17 @@ class PyAutoGUIExecutor(BaseExecutor):
58
86
  display_num: X display number (used only on Linux, ignored on Windows/macOS)
59
87
  """
60
88
  super().__init__(display_num)
61
-
89
+ self._pyautogui = None
62
90
  logger.info("PyAutoGUIExecutor initialized")
63
91
 
64
- # Configure PyAutoGUI settings
65
- pyautogui.FAILSAFE = False # Disable fail-safe feature
66
- pyautogui.PAUSE = 0.1 # Small pause between actions
92
+ @property
93
+ def pyautogui(self) -> Any:
94
+ """Get the pyautogui module, importing it lazily if needed."""
95
+ if self._pyautogui is None:
96
+ self._pyautogui = _get_pyautogui()
97
+ if self._pyautogui is None:
98
+ raise RuntimeError("PyAutoGUI is not available")
99
+ return self._pyautogui
67
100
 
68
101
  def _map_key(self, key: str) -> str:
69
102
  """Map CLA standard key to PyAutoGUI key."""
@@ -90,7 +123,8 @@ class PyAutoGUIExecutor(BaseExecutor):
90
123
  Returns:
91
124
  True if PyAutoGUI is available and functional, False otherwise
92
125
  """
93
- if not PYAUTOGUI_AVAILABLE:
126
+ pyautogui = _get_pyautogui()
127
+ if not pyautogui:
94
128
  return False
95
129
 
96
130
  try:
@@ -109,7 +143,7 @@ class PyAutoGUIExecutor(BaseExecutor):
109
143
  """
110
144
  try:
111
145
  # Take screenshot using PyAutoGUI
112
- screenshot = pyautogui.screenshot()
146
+ screenshot = self.pyautogui.screenshot()
113
147
 
114
148
  # Convert to base64
115
149
  buffer = BytesIO()
@@ -131,13 +165,13 @@ class PyAutoGUIExecutor(BaseExecutor):
131
165
  """
132
166
  if keys:
133
167
  for key in keys:
134
- pyautogui.keyDown(key)
168
+ self.pyautogui.keyDown(key)
135
169
 
136
170
  def _release_keys(self, keys: list[str] | None) -> None:
137
171
  """Release held keys."""
138
172
  if keys:
139
173
  for key in reversed(keys): # Release in reverse order
140
- pyautogui.keyUp(key)
174
+ self.pyautogui.keyUp(key)
141
175
 
142
176
  # ===== CLA Action Implementations =====
143
177
 
@@ -172,17 +206,17 @@ class PyAutoGUIExecutor(BaseExecutor):
172
206
  interval = pattern[0] / 1000.0 if pattern else 0.1 # Convert ms to seconds
173
207
 
174
208
  if x is not None and y is not None:
175
- pyautogui.click(
209
+ self.pyautogui.click(
176
210
  x=x, y=y, clicks=clicks, interval=interval, button=button_name
177
211
  )
178
212
  else:
179
- pyautogui.click(clicks=clicks, interval=interval, button=button_name)
213
+ self.pyautogui.click(clicks=clicks, interval=interval, button=button_name)
180
214
  else:
181
215
  # Single click
182
216
  if x is not None and y is not None:
183
- pyautogui.click(x=x, y=y, button=button_name)
217
+ self.pyautogui.click(x=x, y=y, button=button_name)
184
218
  else:
185
- pyautogui.click(button=button_name)
219
+ self.pyautogui.click(button=button_name)
186
220
  finally:
187
221
  # Release held keys
188
222
  self._release_keys(hold_keys)
@@ -210,10 +244,10 @@ class PyAutoGUIExecutor(BaseExecutor):
210
244
  try:
211
245
  # Convert delay from milliseconds to seconds for PyAutoGUI
212
246
  interval = delay / 1000.0
213
- pyautogui.typewrite(text, interval=interval)
247
+ self.pyautogui.typewrite(text, interval=interval)
214
248
 
215
249
  if enter_after:
216
- pyautogui.press("enter")
250
+ self.pyautogui.press("enter")
217
251
 
218
252
  result = ToolResult(
219
253
  output=f"Typed: '{text}'" + (" and pressed Enter" if enter_after else "")
@@ -237,12 +271,12 @@ class PyAutoGUIExecutor(BaseExecutor):
237
271
  # Handle key combinations (e.g., "ctrl+c")
238
272
  if "+" in key_sequence:
239
273
  keys = key_sequence.split("+")
240
- pyautogui.hotkey(*keys)
274
+ self.pyautogui.hotkey(*keys)
241
275
  result = ToolResult(output=f"Pressed hotkey: {key_sequence}")
242
276
  else:
243
277
  # Map common key names from xdotool to PyAutoGUI
244
278
  key = key_sequence.lower()
245
- pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
279
+ self.pyautogui.press(CLA_TO_PYAUTOGUI.get(key, key))
246
280
  result = ToolResult(output=f"Pressed key: {key_sequence}")
247
281
 
248
282
  if take_screenshot:
@@ -265,7 +299,7 @@ class PyAutoGUIExecutor(BaseExecutor):
265
299
 
266
300
  # Handle single key or combination
267
301
  if len(mapped_keys) == 1 and "+" not in mapped_keys[0]:
268
- pyautogui.press(mapped_keys[0])
302
+ self.pyautogui.press(mapped_keys[0])
269
303
  result = ToolResult(output=f"Pressed key: {keys[0]}")
270
304
  else:
271
305
  # For combinations, use hotkey
@@ -275,7 +309,7 @@ class PyAutoGUIExecutor(BaseExecutor):
275
309
  hotkey_parts.extend(key.split("+"))
276
310
  else:
277
311
  hotkey_parts.append(key)
278
- pyautogui.hotkey(*hotkey_parts)
312
+ self.pyautogui.hotkey(*hotkey_parts)
279
313
  result = ToolResult(output=f"Pressed hotkey: {'+'.join(keys)}")
280
314
 
281
315
  if take_screenshot:
@@ -296,7 +330,7 @@ class PyAutoGUIExecutor(BaseExecutor):
296
330
  # Map CLA keys to PyAutoGUI keys
297
331
  mapped_keys = self._map_keys(keys)
298
332
  for key in mapped_keys:
299
- pyautogui.keyDown(key)
333
+ self.pyautogui.keyDown(key)
300
334
 
301
335
  result = ToolResult(output=f"Keys down: {', '.join(keys)}")
302
336
 
@@ -318,7 +352,7 @@ class PyAutoGUIExecutor(BaseExecutor):
318
352
  # Map CLA keys to PyAutoGUI keys
319
353
  mapped_keys = self._map_keys(keys)
320
354
  for key in reversed(mapped_keys): # Release in reverse order
321
- pyautogui.keyUp(key)
355
+ self.pyautogui.keyUp(key)
322
356
 
323
357
  result = ToolResult(output=f"Keys up: {', '.join(keys)}")
324
358
 
@@ -347,7 +381,7 @@ class PyAutoGUIExecutor(BaseExecutor):
347
381
  try:
348
382
  # Move to position if specified
349
383
  if x is not None and y is not None:
350
- pyautogui.moveTo(x, y)
384
+ self.pyautogui.moveTo(x, y)
351
385
 
352
386
  # Hold keys if specified
353
387
  self._hold_keys_context(hold_keys)
@@ -358,14 +392,14 @@ class PyAutoGUIExecutor(BaseExecutor):
358
392
  # Perform vertical scroll
359
393
  if scroll_y and scroll_y != 0:
360
394
  # PyAutoGUI: positive = up, negative = down (opposite of our convention)
361
- pyautogui.scroll(-scroll_y)
395
+ self.pyautogui.scroll(-scroll_y)
362
396
  msg_parts.append(f"vertically by {scroll_y}")
363
397
 
364
398
  # Perform horizontal scroll (if supported)
365
399
  if scroll_x and scroll_x != 0:
366
400
  # PyAutoGUI horizontal scroll might not work on all platforms
367
401
  try:
368
- pyautogui.hscroll(scroll_x)
402
+ self.pyautogui.hscroll(scroll_x)
369
403
  msg_parts.append(f"horizontally by {scroll_x}")
370
404
  except AttributeError:
371
405
  # hscroll not available
@@ -409,13 +443,13 @@ class PyAutoGUIExecutor(BaseExecutor):
409
443
  try:
410
444
  if x is not None and y is not None:
411
445
  # Absolute move
412
- pyautogui.moveTo(x, y, duration=0.1)
446
+ self.pyautogui.moveTo(x, y, duration=0.1)
413
447
  result = ToolResult(output=f"Moved mouse to ({x}, {y})")
414
448
  elif offset_x is not None or offset_y is not None:
415
449
  # Relative move
416
450
  offset_x = offset_x or 0
417
451
  offset_y = offset_y or 0
418
- pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
452
+ self.pyautogui.moveRel(xOffset=offset_x, yOffset=offset_y, duration=0.1)
419
453
  result = ToolResult(output=f"Moved mouse by offset ({offset_x}, {offset_y})")
420
454
  else:
421
455
  return ToolResult(output="No move coordinates specified")
@@ -450,25 +484,25 @@ class PyAutoGUIExecutor(BaseExecutor):
450
484
  try:
451
485
  # Move to start
452
486
  start_x, start_y = path[0]
453
- pyautogui.moveTo(start_x, start_y)
487
+ self.pyautogui.moveTo(start_x, start_y)
454
488
 
455
489
  # Handle multi-point drag
456
490
  if len(path) == 2:
457
491
  # Simple drag
458
492
  end_x, end_y = path[1]
459
- pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
493
+ self.pyautogui.dragTo(end_x, end_y, duration=0.5, button="left")
460
494
  result = ToolResult(
461
495
  output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
462
496
  )
463
497
  else:
464
498
  # Multi-point drag
465
- pyautogui.mouseDown(button="left")
499
+ self.pyautogui.mouseDown(button="left")
466
500
  for i, (x, y) in enumerate(path[1:], 1):
467
501
  duration = 0.1
468
502
  if pattern and i - 1 < len(pattern):
469
503
  duration = pattern[i - 1] / 1000.0 # Convert ms to seconds
470
- pyautogui.moveTo(x, y, duration=duration)
471
- pyautogui.mouseUp(button="left")
504
+ self.pyautogui.moveTo(x, y, duration=duration)
505
+ self.pyautogui.mouseUp(button="left")
472
506
 
473
507
  result = ToolResult(output=f"Dragged along {len(path)} points")
474
508
 
@@ -507,7 +541,7 @@ class PyAutoGUIExecutor(BaseExecutor):
507
541
  } # Fallback for unsupported
508
542
  button_name = button_map.get(button, "left")
509
543
 
510
- pyautogui.mouseDown(button=button_name)
544
+ self.pyautogui.mouseDown(button=button_name)
511
545
  result = ToolResult(output=f"Mouse down: {button} button")
512
546
 
513
547
  if take_screenshot:
@@ -539,7 +573,7 @@ class PyAutoGUIExecutor(BaseExecutor):
539
573
  } # Fallback for unsupported
540
574
  button_name = button_map.get(button, "left")
541
575
 
542
- pyautogui.mouseUp(button=button_name)
576
+ self.pyautogui.mouseUp(button=button_name)
543
577
  result = ToolResult(output=f"Mouse up: {button} button")
544
578
 
545
579
  if take_screenshot:
@@ -559,9 +593,9 @@ class PyAutoGUIExecutor(BaseExecutor):
559
593
  try:
560
594
  # Map CLA key to PyAutoGUI key
561
595
  mapped_key = self._map_key(key)
562
- pyautogui.keyDown(mapped_key)
596
+ self.pyautogui.keyDown(mapped_key)
563
597
  await asyncio.sleep(duration)
564
- pyautogui.keyUp(mapped_key)
598
+ self.pyautogui.keyUp(mapped_key)
565
599
 
566
600
  result = ToolResult(output=f"Held key '{key}' for {duration} seconds")
567
601
 
@@ -579,7 +613,7 @@ class PyAutoGUIExecutor(BaseExecutor):
579
613
  async def position(self) -> ToolResult:
580
614
  """Get current cursor position."""
581
615
  try:
582
- x, y = pyautogui.position()
616
+ x, y = self.pyautogui.position()
583
617
  return ToolResult(output=f"Mouse position: ({x}, {y})")
584
618
  except Exception as e:
585
619
  return ToolResult(error=str(e))
@@ -7,7 +7,10 @@ from unittest.mock import AsyncMock, MagicMock, patch
7
7
  import pytest
8
8
 
9
9
  from hud.tools.base import ToolResult
10
- from hud.tools.executors.pyautogui import PYAUTOGUI_AVAILABLE, PyAutoGUIExecutor
10
+ from hud.tools.executors.pyautogui import PyAutoGUIExecutor
11
+
12
+ # Check if pyautogui is available for test skipping
13
+ PYAUTOGUI_AVAILABLE = PyAutoGUIExecutor.is_available()
11
14
 
12
15
 
13
16
  class TestPyAutoGUIExecutor:
@@ -21,8 +21,9 @@ logger = logging.getLogger(__name__)
21
21
  class PlaywrightTool:
22
22
  """Playwright tool for web automation."""
23
23
 
24
- def __init__(self) -> None:
24
+ def __init__(self, cdp_url: str | None = None) -> None:
25
25
  super().__init__()
26
+ self._cdp_url = cdp_url
26
27
  self._playwright = None
27
28
  self._browser: Browser | None = None
28
29
  self._context: BrowserContext | None = None
@@ -46,9 +47,6 @@ class PlaywrightTool:
46
47
  None, description="CSS selector for element (for click, type, wait_for_element actions)"
47
48
  ),
48
49
  text: str | None = Field(None, description="Text to type (for type action)"),
49
- path: str | None = Field(
50
- None, description="File path to save screenshot (for screenshot action)"
51
- ),
52
50
  wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
53
51
  | None = Field(
54
52
  None,
@@ -74,7 +72,7 @@ class PlaywrightTool:
74
72
  result = await self.navigate(url, wait_for_load_state or "networkidle")
75
73
 
76
74
  elif action == "screenshot":
77
- result = await self.screenshot(path)
75
+ result = await self.screenshot()
78
76
 
79
77
  elif action == "click":
80
78
  if selector is None:
@@ -119,17 +117,13 @@ class PlaywrightTool:
119
117
  # Convert dict result to ToolResult
120
118
  if isinstance(result, dict):
121
119
  if result.get("success"):
122
- if "screenshot" in result:
123
- # Return screenshot as image content
124
- tool_result = ToolResult(
125
- output=result.get("message", ""), base64_image=result["screenshot"]
126
- )
127
- else:
128
- tool_result = ToolResult(output=result.get("message", ""))
120
+ tool_result = ToolResult(output=result.get("message", ""))
129
121
  else:
130
122
  tool_result = ToolResult(error=result.get("error", "Unknown error"))
131
- else:
123
+ elif isinstance(result, ToolResult):
132
124
  tool_result = result
125
+ else:
126
+ tool_result = ToolResult(output=str(result))
133
127
 
134
128
  # Convert result to content blocks
135
129
  return tool_result_to_content_blocks(tool_result)
@@ -143,10 +137,14 @@ class PlaywrightTool:
143
137
  async def _ensure_browser(self) -> None:
144
138
  """Ensure browser is launched and ready."""
145
139
  if self._browser is None or not self._browser.is_connected():
146
- logger.info("Launching Playwright browser...")
140
+ if self._cdp_url:
141
+ logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
142
+ else:
143
+ logger.info("Launching Playwright browser...")
147
144
 
148
- # Ensure DISPLAY is set
149
- os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
145
+ # Ensure DISPLAY is set (only needed for local browser)
146
+ if not self._cdp_url:
147
+ os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
150
148
 
151
149
  if self._playwright is None:
152
150
  try:
@@ -158,37 +156,56 @@ class PlaywrightTool:
158
156
  "Playwright is not installed. Please install with: pip install playwright"
159
157
  ) from None
160
158
 
161
- self._browser = await self._playwright.chromium.launch(
162
- headless=False,
163
- args=[
164
- "--no-sandbox",
165
- "--disable-dev-shm-usage",
166
- "--disable-gpu",
167
- "--disable-web-security",
168
- "--disable-features=IsolateOrigins,site-per-process",
169
- "--disable-blink-features=AutomationControlled",
170
- "--window-size=1920,1080",
171
- "--window-position=0,0",
172
- "--start-maximized",
173
- "--disable-background-timer-throttling",
174
- "--disable-backgrounding-occluded-windows",
175
- "--disable-renderer-backgrounding",
176
- "--disable-features=TranslateUI",
177
- "--disable-ipc-flooding-protection",
178
- "--disable-default-apps",
179
- "--no-first-run",
180
- "--disable-sync",
181
- "--no-default-browser-check",
182
- ],
183
- )
184
-
185
- if self._browser is None:
186
- raise RuntimeError("Browser failed to initialize")
187
-
188
- self._context = await self._browser.new_context(
189
- viewport={"width": 1920, "height": 1080},
190
- ignore_https_errors=True,
191
- )
159
+ # Connect via CDP URL or launch local browser
160
+ if self._cdp_url:
161
+ # Connect to remote browser via CDP
162
+ self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
163
+
164
+ if self._browser is None:
165
+ raise RuntimeError("Failed to connect to remote browser")
166
+
167
+ # Use existing context or create new one
168
+ contexts = self._browser.contexts
169
+ if contexts:
170
+ self._context = contexts[0]
171
+ else:
172
+ self._context = await self._browser.new_context(
173
+ viewport={"width": 1920, "height": 1080},
174
+ ignore_https_errors=True,
175
+ )
176
+ else:
177
+ # Launch local browser
178
+ self._browser = await self._playwright.chromium.launch(
179
+ headless=False,
180
+ args=[
181
+ "--no-sandbox",
182
+ "--disable-dev-shm-usage",
183
+ "--disable-gpu",
184
+ "--disable-web-security",
185
+ "--disable-features=IsolateOrigins,site-per-process",
186
+ "--disable-blink-features=AutomationControlled",
187
+ "--window-size=1920,1080",
188
+ "--window-position=0,0",
189
+ "--start-maximized",
190
+ "--disable-background-timer-throttling",
191
+ "--disable-backgrounding-occluded-windows",
192
+ "--disable-renderer-backgrounding",
193
+ "--disable-features=TranslateUI",
194
+ "--disable-ipc-flooding-protection",
195
+ "--disable-default-apps",
196
+ "--no-first-run",
197
+ "--disable-sync",
198
+ "--no-default-browser-check",
199
+ ],
200
+ )
201
+
202
+ if self._browser is None:
203
+ raise RuntimeError("Browser failed to initialize")
204
+
205
+ self._context = await self._browser.new_context(
206
+ viewport={"width": 1920, "height": 1080},
207
+ ignore_https_errors=True,
208
+ )
192
209
 
193
210
  if self._context is None:
194
211
  raise RuntimeError("Browser context failed to initialize")
@@ -234,35 +251,24 @@ class PlaywrightTool:
234
251
  "message": f"Failed to navigate to {url}: {e}",
235
252
  }
236
253
 
237
- async def screenshot(self, path: str | None = None) -> dict[str, Any]:
254
+ async def screenshot(self) -> ToolResult:
238
255
  """Take a screenshot of the current page.
239
256
 
240
- Args:
241
- path: Optional path to save screenshot
242
-
243
257
  Returns:
244
- Dict with screenshot result
258
+ ToolResult with base64_image
245
259
  """
246
260
  await self._ensure_browser()
247
261
 
248
262
  try:
249
- if path:
250
- await self.page.screenshot(path=path, full_page=True)
251
- return {"success": True, "path": path, "message": f"Screenshot saved to {path}"}
252
- else:
253
- # Return base64 encoded screenshot
254
- screenshot_bytes = await self.page.screenshot(full_page=True)
255
- import base64
256
-
257
- screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
258
- return {
259
- "success": True,
260
- "screenshot": screenshot_b64,
261
- "message": "Screenshot captured",
262
- }
263
+ # Always return base64 encoded screenshot as ToolResult
264
+ screenshot_bytes = await self.page.screenshot(full_page=True)
265
+ import base64
266
+
267
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
268
+ return ToolResult(base64_image=screenshot_b64)
263
269
  except Exception as e:
264
270
  logger.error("Screenshot failed: %s", e)
265
- return {"success": False, "error": str(e), "message": f"Failed to take screenshot: {e}"}
271
+ return ToolResult(error=f"Failed to take screenshot: {e}")
266
272
 
267
273
  async def click(self, selector: str) -> dict[str, Any]:
268
274
  """Click an element by selector.
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import os
6
+ import sys
6
7
  import tempfile
7
8
  from pathlib import Path
8
9
  from unittest.mock import AsyncMock, patch
@@ -37,8 +38,14 @@ class TestEditTool:
37
38
  """Test validate_path when file doesn't exist for non-create commands."""
38
39
  tool = EditTool()
39
40
 
41
+ # Use a platform-appropriate absolute path
42
+ if sys.platform == "win32":
43
+ nonexistent_path = Path("C:\\nonexistent\\file.txt")
44
+ else:
45
+ nonexistent_path = Path("/nonexistent/file.txt")
46
+
40
47
  with pytest.raises(ToolError) as exc_info:
41
- tool.validate_path("view", Path("/nonexistent/file.txt"))
48
+ tool.validate_path("view", nonexistent_path)
42
49
 
43
50
  assert "does not exist" in str(exc_info.value)
44
51
 
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import inspect
5
+ import sys
5
6
 
6
7
  import pytest
7
8
  from mcp.types import ImageContent, TextContent
@@ -72,6 +73,7 @@ async def test_bash_tool_restart_and_no_command():
72
73
 
73
74
 
74
75
  @pytest.mark.asyncio
76
+ @pytest.mark.skipif(sys.platform == "win32", reason="EditTool uses Unix commands")
75
77
  async def test_edit_tool_flow(tmp_path):
76
78
  file_path = tmp_path / "demo.txt"
77
79
 
@@ -106,6 +108,7 @@ async def test_base_executor_simulation():
106
108
 
107
109
 
108
110
  @pytest.mark.asyncio
111
+ @pytest.mark.skipif(sys.platform == "win32", reason="EditTool uses Unix commands")
109
112
  async def test_edit_tool_view(tmp_path):
110
113
  # Create a temporary file
111
114
  p = tmp_path / "sample.txt"
hud/trajectory.py CHANGED
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
 
4
4
  import datetime
5
5
 
6
- from IPython.display import HTML, Markdown, display
7
6
  from pydantic import BaseModel, Field
8
7
 
9
8
  from .adapters.common.types import LogType
@@ -30,6 +29,11 @@ class Trajectory(BaseModel):
30
29
  trajectory: list[TrajectoryStep] = Field(default_factory=list)
31
30
 
32
31
  def display(self) -> None:
32
+ try:
33
+ from IPython.display import HTML, Markdown, display
34
+ except ImportError:
35
+ raise ImportError("IPython is required for trajectory display") from None
36
+
33
37
  trajectory_start_timestamp_str = self.trajectory[0].start_timestamp
34
38
  t_start_dt = (
35
39
  datetime.datetime.fromisoformat(trajectory_start_timestamp_str.replace("Z", "+00:00"))
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.3.0"
8
+ assert hud.__version__ == "0.3.1"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.3.0"
7
+ __version__ = "0.3.1"