hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (86) hide show
  1. hud/__init__.py +20 -8
  2. hud/adapters/common/adapter.py +14 -3
  3. hud/adapters/common/tests/test_adapter.py +16 -4
  4. hud/datasets.py +188 -0
  5. hud/env/docker_client.py +15 -3
  6. hud/env/environment.py +10 -7
  7. hud/env/local_docker_client.py +29 -7
  8. hud/env/remote_client.py +1 -1
  9. hud/env/remote_docker_client.py +2 -2
  10. hud/exceptions.py +2 -1
  11. hud/gym.py +0 -9
  12. hud/mcp/__init__.py +17 -0
  13. hud/mcp/base.py +631 -0
  14. hud/mcp/claude.py +321 -0
  15. hud/mcp/client.py +312 -0
  16. hud/mcp/langchain.py +250 -0
  17. hud/mcp/openai.py +334 -0
  18. hud/mcp/tests/__init__.py +1 -0
  19. hud/mcp/tests/test_base.py +512 -0
  20. hud/mcp/tests/test_claude.py +294 -0
  21. hud/mcp/tests/test_client.py +324 -0
  22. hud/mcp/tests/test_openai.py +238 -0
  23. hud/settings.py +20 -2
  24. hud/task.py +5 -88
  25. hud/taskset.py +2 -23
  26. hud/telemetry/__init__.py +16 -7
  27. hud/telemetry/_trace.py +246 -72
  28. hud/telemetry/context.py +88 -27
  29. hud/telemetry/exporter.py +171 -11
  30. hud/telemetry/instrumentation/mcp.py +174 -410
  31. hud/telemetry/job.py +141 -0
  32. hud/telemetry/mcp_models.py +13 -74
  33. hud/telemetry/tests/test_context.py +9 -6
  34. hud/telemetry/tests/test_trace.py +120 -78
  35. hud/tools/__init__.py +34 -0
  36. hud/tools/base.py +65 -0
  37. hud/tools/bash.py +137 -0
  38. hud/tools/computer/__init__.py +13 -0
  39. hud/tools/computer/anthropic.py +411 -0
  40. hud/tools/computer/hud.py +315 -0
  41. hud/tools/computer/openai.py +283 -0
  42. hud/tools/edit.py +290 -0
  43. hud/tools/executors/__init__.py +30 -0
  44. hud/tools/executors/base.py +331 -0
  45. hud/tools/executors/pyautogui.py +619 -0
  46. hud/tools/executors/tests/__init__.py +1 -0
  47. hud/tools/executors/tests/test_base_executor.py +338 -0
  48. hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
  49. hud/tools/executors/xdo.py +503 -0
  50. hud/tools/helper/README.md +56 -0
  51. hud/tools/helper/__init__.py +9 -0
  52. hud/tools/helper/mcp_server.py +78 -0
  53. hud/tools/helper/server_initialization.py +115 -0
  54. hud/tools/helper/utils.py +58 -0
  55. hud/tools/playwright_tool.py +379 -0
  56. hud/tools/tests/__init__.py +3 -0
  57. hud/tools/tests/test_bash.py +152 -0
  58. hud/tools/tests/test_computer.py +52 -0
  59. hud/tools/tests/test_computer_actions.py +34 -0
  60. hud/tools/tests/test_edit.py +240 -0
  61. hud/tools/tests/test_init.py +27 -0
  62. hud/tools/tests/test_playwright_tool.py +183 -0
  63. hud/tools/tests/test_tools.py +157 -0
  64. hud/tools/tests/test_utils.py +156 -0
  65. hud/tools/utils.py +50 -0
  66. hud/trajectory.py +5 -1
  67. hud/types.py +10 -1
  68. hud/utils/tests/test_init.py +21 -0
  69. hud/utils/tests/test_version.py +1 -1
  70. hud/version.py +1 -1
  71. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
  72. hud_python-0.3.1.dist-info/RECORD +119 -0
  73. hud/evaluators/__init__.py +0 -9
  74. hud/evaluators/base.py +0 -32
  75. hud/evaluators/inspect.py +0 -24
  76. hud/evaluators/judge.py +0 -189
  77. hud/evaluators/match.py +0 -156
  78. hud/evaluators/remote.py +0 -65
  79. hud/evaluators/tests/__init__.py +0 -0
  80. hud/evaluators/tests/test_inspect.py +0 -12
  81. hud/evaluators/tests/test_judge.py +0 -231
  82. hud/evaluators/tests/test_match.py +0 -115
  83. hud/evaluators/tests/test_remote.py +0 -98
  84. hud_python-0.2.10.dist-info/RECORD +0 -85
  85. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
  86. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,115 @@
1
+ """Helper for MCP server initialization with progress notifications.
2
+
3
+ Example:
4
+ ```python
5
+ from hud.tools.helper import mcp_intialize_wrapper
6
+
7
+
8
+ @mcp_intialize_wrapper
9
+ async def initialize_environment(session=None, progress_token=None):
10
+ # Send progress if available
11
+ if session and progress_token:
12
+ await session.send_progress_notification(
13
+ progress_token=progress_token, progress=0, total=100, message="Starting services..."
14
+ )
15
+
16
+ # Your initialization code works with or without session
17
+ start_services()
18
+
19
+
20
+ # Create and run server - initialization happens automatically
21
+ mcp = FastMCP("My Server")
22
+ mcp.run()
23
+ ```
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ from typing import TYPE_CHECKING
29
+
30
+ import mcp.types as types
31
+ from mcp.server.session import ServerSession
32
+
33
+ if TYPE_CHECKING:
34
+ from collections.abc import Awaitable, Callable
35
+
36
+ from mcp.shared.session import RequestResponder
37
+
38
+ # Store the original _received_request method
39
+ _original_received_request = ServerSession._received_request
40
+ _init_function: Callable | None = None
41
+ _initialized = False
42
+
43
+
44
+ async def _patched_received_request(
45
+ self: ServerSession, responder: RequestResponder[types.ClientRequest, types.ServerResult]
46
+ ) -> types.ServerResult | None:
47
+ """Intercept initialization to run custom setup with progress notifications."""
48
+ global _initialized, _init_function
49
+
50
+ # Check if this is an initialization request
51
+ if isinstance(responder.request.root, types.InitializeRequest):
52
+ params = responder.request.root.params
53
+ # Extract progress token if present
54
+ progress_token = None
55
+ if hasattr(params, "meta") and params.meta and hasattr(params.meta, "progressToken"):
56
+ progress_token = params.meta.progressToken
57
+
58
+ # Run our initialization function if provided and not already done
59
+ if _init_function and not _initialized:
60
+ try:
61
+ await _init_function(session=self, progress_token=progress_token)
62
+ ServerSession._received_request = _original_received_request
63
+ except Exception as e:
64
+ if progress_token:
65
+ await self.send_progress_notification(
66
+ progress_token=progress_token,
67
+ progress=0,
68
+ total=100,
69
+ message=f"Initialization failed: {e!s}",
70
+ )
71
+ raise
72
+
73
+ # Call the original handler to send the InitializeResult
74
+ result = await _original_received_request(self, responder)
75
+ _initialized = True
76
+
77
+ return result
78
+
79
+
80
+ def mcp_intialize_wrapper(
81
+ init_function: Callable[[ServerSession | None, str | None], Awaitable[None]] | None = None,
82
+ ) -> Callable:
83
+ """Decorator to enable progress notifications during MCP server initialization.
84
+
85
+ Your init function receives optional session and progress_token parameters.
86
+ If provided, use them to send progress updates. If not, the function still works.
87
+
88
+ Usage:
89
+ @mcp_intialize_wrapper
90
+ async def initialize(session=None, progress_token=None):
91
+ if session and progress_token:
92
+ await session.send_progress_notification(...)
93
+ # Your init code here
94
+
95
+ Must be applied before creating FastMCP instance or calling mcp.run().
96
+ """
97
+ global _init_function
98
+
99
+ def decorator(func: Callable[[ServerSession | None, str | None], Awaitable[None]]) -> Callable:
100
+ global _init_function
101
+ # Store the initialization function
102
+ _init_function = func
103
+
104
+ # Apply the monkey patch if not already applied
105
+ if ServerSession._received_request != _patched_received_request:
106
+ ServerSession._received_request = _patched_received_request # type: ignore[assignment]
107
+
108
+ return func
109
+
110
+ # If called with a function directly
111
+ if init_function is not None:
112
+ return decorator(init_function)
113
+
114
+ # If used as @decorator
115
+ return decorator
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import inspect
5
+ from functools import wraps
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Callable
10
+
11
+ from mcp.server.fastmcp import FastMCP
12
+
13
+
14
+ def register_instance_tool(mcp: FastMCP, name: str, instance: Any) -> Callable[..., Any]:
15
+ """Register ``instance.__call__`` as a FastMCP tool.
16
+
17
+ Parameters
18
+ ----------
19
+ mcp:
20
+ A :class:`mcp.server.fastmcp.FastMCP` instance.
21
+ name:
22
+ Public tool name.
23
+ instance:
24
+ Object with an ``async def __call__`` (or sync) implementing the tool.
25
+ """
26
+
27
+ if inspect.isclass(instance):
28
+ class_name = instance.__name__
29
+ raise TypeError(
30
+ f"register_instance_tool() expects an instance, but got class '{class_name}'. "
31
+ f"Use: register_instance_tool(mcp, '{name}', {class_name}()) "
32
+ f"Not: register_instance_tool(mcp, '{name}', {class_name})"
33
+ )
34
+
35
+ call_fn = instance.__call__
36
+ sig = inspect.signature(call_fn)
37
+
38
+ # Remove *args/**kwargs so Pydantic doesn't treat them as required fields
39
+ from typing import Any as _Any
40
+
41
+ filtered = [
42
+ p.replace(kind=p.POSITIONAL_OR_KEYWORD, annotation=_Any)
43
+ for p in sig.parameters.values()
44
+ if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
45
+ ]
46
+
47
+ public_sig = inspect.Signature(parameters=filtered, return_annotation=_Any)
48
+
49
+ @wraps(call_fn)
50
+ async def _wrapper(*args: Any, **kwargs: Any) -> Any: # type: ignore[override]
51
+ result = call_fn(*args, **kwargs)
52
+ if asyncio.iscoroutine(result):
53
+ result = await result
54
+ return result
55
+
56
+ _wrapper.__signature__ = public_sig # type: ignore[attr-defined]
57
+
58
+ return mcp.tool(name=name)(_wrapper)
@@ -0,0 +1,379 @@
1
+ """Playwright web automation tool for HUD."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ from mcp import ErrorData, McpError
10
+ from mcp.types import INVALID_PARAMS, ImageContent, TextContent
11
+ from pydantic import Field
12
+
13
+ from hud.tools.base import ToolResult, tool_result_to_content_blocks
14
+
15
+ if TYPE_CHECKING:
16
+ from playwright.async_api import Browser, BrowserContext, Page
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class PlaywrightTool:
22
+ """Playwright tool for web automation."""
23
+
24
+ def __init__(self, cdp_url: str | None = None) -> None:
25
+ super().__init__()
26
+ self._cdp_url = cdp_url
27
+ self._playwright = None
28
+ self._browser: Browser | None = None
29
+ self._context: BrowserContext | None = None
30
+ self._page: Page | None = None
31
+
32
+ @property
33
+ def page(self) -> Page:
34
+ """Get the current page, raising an error if not initialized."""
35
+ if self._page is None:
36
+ raise RuntimeError("Browser page is not initialized. Call ensure_browser_launched().")
37
+ return self._page
38
+
39
+ async def __call__(
40
+ self,
41
+ action: str = Field(
42
+ ...,
43
+ description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)", # noqa: E501
44
+ ),
45
+ url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
46
+ selector: str | None = Field(
47
+ None, description="CSS selector for element (for click, type, wait_for_element actions)"
48
+ ),
49
+ text: str | None = Field(None, description="Text to type (for type action)"),
50
+ wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
51
+ | None = Field(
52
+ None,
53
+ description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)", # noqa: E501
54
+ ),
55
+ ) -> list[ImageContent | TextContent]:
56
+ """
57
+ Execute a Playwright web automation action.
58
+
59
+ Returns:
60
+ List of MCP content blocks
61
+ """
62
+ logger.info("PlaywrightTool executing action: %s", action)
63
+
64
+ try:
65
+ if action == "navigate":
66
+ if url is None:
67
+ raise McpError(
68
+ ErrorData(
69
+ code=INVALID_PARAMS, message="url parameter is required for navigate"
70
+ )
71
+ )
72
+ result = await self.navigate(url, wait_for_load_state or "networkidle")
73
+
74
+ elif action == "screenshot":
75
+ result = await self.screenshot()
76
+
77
+ elif action == "click":
78
+ if selector is None:
79
+ raise McpError(
80
+ ErrorData(
81
+ code=INVALID_PARAMS, message="selector parameter is required for click"
82
+ )
83
+ )
84
+ result = await self.click(selector)
85
+
86
+ elif action == "type":
87
+ if selector is None:
88
+ raise McpError(
89
+ ErrorData(
90
+ code=INVALID_PARAMS, message="selector parameter is required for type"
91
+ )
92
+ )
93
+ if text is None:
94
+ raise McpError(
95
+ ErrorData(
96
+ code=INVALID_PARAMS, message="text parameter is required for type"
97
+ )
98
+ )
99
+ result = await self.type_text(selector, text)
100
+
101
+ elif action == "get_page_info":
102
+ result = await self.get_page_info()
103
+
104
+ elif action == "wait_for_element":
105
+ if selector is None:
106
+ raise McpError(
107
+ ErrorData(
108
+ code=INVALID_PARAMS,
109
+ message="selector parameter is required for wait_for_element",
110
+ )
111
+ )
112
+ result = await self.wait_for_element(selector)
113
+
114
+ else:
115
+ raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
116
+
117
+ # Convert dict result to ToolResult
118
+ if isinstance(result, dict):
119
+ if result.get("success"):
120
+ tool_result = ToolResult(output=result.get("message", ""))
121
+ else:
122
+ tool_result = ToolResult(error=result.get("error", "Unknown error"))
123
+ elif isinstance(result, ToolResult):
124
+ tool_result = result
125
+ else:
126
+ tool_result = ToolResult(output=str(result))
127
+
128
+ # Convert result to content blocks
129
+ return tool_result_to_content_blocks(tool_result)
130
+
131
+ except McpError:
132
+ raise
133
+ except Exception as e:
134
+ logger.error("PlaywrightTool error: %s", e)
135
+ raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
136
+
137
+ async def _ensure_browser(self) -> None:
138
+ """Ensure browser is launched and ready."""
139
+ if self._browser is None or not self._browser.is_connected():
140
+ if self._cdp_url:
141
+ logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
142
+ else:
143
+ logger.info("Launching Playwright browser...")
144
+
145
+ # Ensure DISPLAY is set (only needed for local browser)
146
+ if not self._cdp_url:
147
+ os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
148
+
149
+ if self._playwright is None:
150
+ try:
151
+ from playwright.async_api import async_playwright
152
+
153
+ self._playwright = await async_playwright().start()
154
+ except ImportError:
155
+ raise ImportError(
156
+ "Playwright is not installed. Please install with: pip install playwright"
157
+ ) from None
158
+
159
+ # Connect via CDP URL or launch local browser
160
+ if self._cdp_url:
161
+ # Connect to remote browser via CDP
162
+ self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
163
+
164
+ if self._browser is None:
165
+ raise RuntimeError("Failed to connect to remote browser")
166
+
167
+ # Use existing context or create new one
168
+ contexts = self._browser.contexts
169
+ if contexts:
170
+ self._context = contexts[0]
171
+ else:
172
+ self._context = await self._browser.new_context(
173
+ viewport={"width": 1920, "height": 1080},
174
+ ignore_https_errors=True,
175
+ )
176
+ else:
177
+ # Launch local browser
178
+ self._browser = await self._playwright.chromium.launch(
179
+ headless=False,
180
+ args=[
181
+ "--no-sandbox",
182
+ "--disable-dev-shm-usage",
183
+ "--disable-gpu",
184
+ "--disable-web-security",
185
+ "--disable-features=IsolateOrigins,site-per-process",
186
+ "--disable-blink-features=AutomationControlled",
187
+ "--window-size=1920,1080",
188
+ "--window-position=0,0",
189
+ "--start-maximized",
190
+ "--disable-background-timer-throttling",
191
+ "--disable-backgrounding-occluded-windows",
192
+ "--disable-renderer-backgrounding",
193
+ "--disable-features=TranslateUI",
194
+ "--disable-ipc-flooding-protection",
195
+ "--disable-default-apps",
196
+ "--no-first-run",
197
+ "--disable-sync",
198
+ "--no-default-browser-check",
199
+ ],
200
+ )
201
+
202
+ if self._browser is None:
203
+ raise RuntimeError("Browser failed to initialize")
204
+
205
+ self._context = await self._browser.new_context(
206
+ viewport={"width": 1920, "height": 1080},
207
+ ignore_https_errors=True,
208
+ )
209
+
210
+ if self._context is None:
211
+ raise RuntimeError("Browser context failed to initialize")
212
+
213
+ self._page = await self._context.new_page()
214
+ logger.info("Playwright browser launched successfully")
215
+
216
+ async def navigate(
217
+ self,
218
+ url: str,
219
+ wait_for_load_state: Literal[
220
+ "commit", "domcontentloaded", "load", "networkidle"
221
+ ] = "networkidle",
222
+ ) -> dict[str, Any]:
223
+ """Navigate to a URL.
224
+
225
+ Args:
226
+ url: URL to navigate to
227
+ wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
228
+
229
+ Returns:
230
+ Dict with navigation result
231
+ """
232
+ await self._ensure_browser()
233
+
234
+ logger.info("Navigating to %s", url)
235
+ try:
236
+ await self.page.goto(url, wait_until=wait_for_load_state)
237
+ current_url = self.page.url
238
+ title = await self.page.title()
239
+
240
+ return {
241
+ "success": True,
242
+ "url": current_url,
243
+ "title": title,
244
+ "message": f"Successfully navigated to {url}",
245
+ }
246
+ except Exception as e:
247
+ logger.error("Navigation failed: %s", e)
248
+ return {
249
+ "success": False,
250
+ "error": str(e),
251
+ "message": f"Failed to navigate to {url}: {e}",
252
+ }
253
+
254
+ async def screenshot(self) -> ToolResult:
255
+ """Take a screenshot of the current page.
256
+
257
+ Returns:
258
+ ToolResult with base64_image
259
+ """
260
+ await self._ensure_browser()
261
+
262
+ try:
263
+ # Always return base64 encoded screenshot as ToolResult
264
+ screenshot_bytes = await self.page.screenshot(full_page=True)
265
+ import base64
266
+
267
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
268
+ return ToolResult(base64_image=screenshot_b64)
269
+ except Exception as e:
270
+ logger.error("Screenshot failed: %s", e)
271
+ return ToolResult(error=f"Failed to take screenshot: {e}")
272
+
273
+ async def click(self, selector: str) -> dict[str, Any]:
274
+ """Click an element by selector.
275
+
276
+ Args:
277
+ selector: CSS selector for element to click
278
+
279
+ Returns:
280
+ Dict with click result
281
+ """
282
+ await self._ensure_browser()
283
+
284
+ try:
285
+ await self.page.click(selector)
286
+ return {"success": True, "message": f"Clicked element: {selector}"}
287
+ except Exception as e:
288
+ logger.error("Click failed: %s", e)
289
+ return {
290
+ "success": False,
291
+ "error": str(e),
292
+ "message": f"Failed to click {selector}: {e}",
293
+ }
294
+
295
+ async def type_text(self, selector: str, text: str) -> dict[str, Any]:
296
+ """Type text into an element.
297
+
298
+ Args:
299
+ selector: CSS selector for input element
300
+ text: Text to type
301
+
302
+ Returns:
303
+ Dict with type result
304
+ """
305
+ await self._ensure_browser()
306
+
307
+ try:
308
+ await self.page.fill(selector, text)
309
+ return {"success": True, "message": f"Typed '{text}' into {selector}"}
310
+ except Exception as e:
311
+ logger.error("Type failed: %s", e)
312
+ return {
313
+ "success": False,
314
+ "error": str(e),
315
+ "message": f"Failed to type into {selector}: {e}",
316
+ }
317
+
318
+ async def get_page_info(self) -> dict[str, Any]:
319
+ """Get current page information.
320
+
321
+ Returns:
322
+ Dict with page info
323
+ """
324
+ await self._ensure_browser()
325
+
326
+ try:
327
+ url = self.page.url
328
+ title = await self.page.title()
329
+ return {
330
+ "success": True,
331
+ "url": url,
332
+ "title": title,
333
+ "message": f"Current page: {title} ({url})",
334
+ }
335
+ except Exception as e:
336
+ logger.error("Get page info failed: %s", e)
337
+ return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
338
+
339
+ async def wait_for_element(self, selector: str) -> dict[str, Any]:
340
+ """Wait for an element to appear.
341
+
342
+ Args:
343
+ selector: CSS selector for element
344
+
345
+ Returns:
346
+ Dict with wait result
347
+ """
348
+ await self._ensure_browser()
349
+
350
+ try:
351
+ await self.page.wait_for_selector(selector, timeout=30000)
352
+ return {"success": True, "message": f"Element {selector} appeared"}
353
+ except Exception as e:
354
+ logger.error("Wait for element failed: %s", e)
355
+ return {
356
+ "success": False,
357
+ "error": str(e),
358
+ "message": f"Element {selector} did not appear within 30000ms: {e}",
359
+ }
360
+
361
+ async def close(self) -> None:
362
+ """Close browser and cleanup."""
363
+ if self._browser:
364
+ try:
365
+ await self._browser.close()
366
+ logger.info("Browser closed")
367
+ except Exception as e:
368
+ logger.error("Error closing browser: %s", e)
369
+
370
+ if self._playwright:
371
+ try:
372
+ await self._playwright.stop()
373
+ except Exception as e:
374
+ logger.error("Error stopping playwright: %s", e)
375
+
376
+ self._browser = None
377
+ self._context = None
378
+ self._page = None
379
+ self._playwright = None
@@ -0,0 +1,3 @@
1
+ from __future__ import annotations
2
+
3
+ __all__ = []