PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (282) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +70 -5
hud/agents/base.py +238 -500
hud/agents/claude.py +236 -247
hud/agents/gateway.py +42 -0
hud/agents/gemini.py +264 -0
hud/agents/gemini_cua.py +324 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +48 -36
hud/agents/openai.py +282 -296
hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
hud/agents/operator.py +199 -0
hud/agents/resolver.py +70 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +381 -214
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +377 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_resolver.py +192 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/agents/types.py +148 -0
hud/cli/__init__.py +493 -546
hud/cli/analyze.py +43 -5
hud/cli/build.py +699 -113
hud/cli/debug.py +8 -5
hud/cli/dev.py +889 -732
hud/cli/eval.py +793 -667
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/pull.py +1 -1
hud/cli/push.py +38 -13
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +110 -8
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push.py +1 -1
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +70 -1
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +45 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +326 -0
hud/datasets/runner.py +198 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +52 -0
hud/environment/connection.py +258 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +137 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +835 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +263 -0
hud/environment/scenarios.py +620 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +205 -0
hud/environment/tests/test_environment.py +593 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +242 -0
hud/environment/tests/test_scenarios.py +1086 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +727 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +187 -0
hud/eval/manager.py +533 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +372 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +291 -0
hud/eval/types.py +65 -0
hud/eval/utils.py +194 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +308 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +165 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +18 -2
hud/tools/agent.py +223 -0
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +36 -3
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_agent_tool.py +355 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +194 -56
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +89 -18
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.13.dist-info/METADATA +264 -0
hud_python-0.5.13.dist-info/RECORD +305 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0

hud/clients/environment.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Environment-based client adapter for agents."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+import mcp.types as types
+from hud.types import MCPToolCall, MCPToolResult
+if TYPE_CHECKING:
+    from hud.environment import Environment
+    from hud.eval.context import EvalContext
+__all__ = ["EnvironmentClient"]
+class EnvironmentClient:
+    """Adapter wrapping Environment/EvalContext as AgentMCPClient."""
+    def __init__(self, env: Environment | EvalContext) -> None:
+        self._env = env
+        self._initialized = False
+    @property
+    def mcp_config(self) -> dict[str, dict[str, Any]]:
+        return {}
+    @property
+    def is_connected(self) -> bool:
+        return self._initialized
+    async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
+        if not self._initialized:
+            await self._env.list_tools()
+            self._initialized = True
+    async def list_tools(self) -> list[types.Tool]:
+        return await self._env.list_tools()
+    async def call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
+        result = await self._env.call_tool(tool_call.name, **(tool_call.arguments or {}))
+        if isinstance(result, MCPToolResult):
+            return result
+        return MCPToolResult(
+            content=[types.TextContent(type="text", text=str(result))],
+            isError=False,
+        )
+    async def shutdown(self) -> None:
+        self._initialized = False

hud/clients/fastmcp.py CHANGED Viewed

@@ -12,6 +12,7 @@ from fastmcp import Client as FastMCPClient
 from mcp import Implementation, types
 from mcp.shared.exceptions import McpError
+from hud.settings import settings
 from hud.types import MCPToolCall, MCPToolResult
 from hud.version import __version__ as hud_version
@@ -73,7 +74,7 @@ class FastMCPHUDClient(BaseHUDClient):
             return
         # Create FastMCP client with the custom transport
-        timeout = 10 * 60  # 5 minutes
+        timeout = settings.client_timeout
         os.environ["FASTMCP_CLIENT_INIT_TIMEOUT"] = str(timeout)
         # Create custom transport with retry support for HTTP servers
@@ -91,11 +92,11 @@ class FastMCPHUDClient(BaseHUDClient):
                     # Check if connecting to HUD API
                     for server_config in mcp_config.values():
                         url = server_config.get("url", "")
-                        if "mcp.hud.so" in url:
+                        if "mcp.hud.ai" in url:
                             raise RuntimeError(
                                 "Authentication failed for HUD API. "
                                 "Please ensure your HUD_API_KEY environment variable is set correctly."  # noqa: E501
-                                "You can get an API key at https://hud.so"
+                                "You can get an API key at https://hud.ai"
                             ) from e
                     # Generic 401 error
                     raise RuntimeError(
@@ -110,7 +111,7 @@ class FastMCPHUDClient(BaseHUDClient):
                     hasattr(self._client, "_session_state")
                     and self._client._session_state.session is not None
                 ):
-                    self._client._session_state.session._validate_structured_outputs = (
+                    self._client._session_state.session._validate_structured_outputs = (  # type: ignore[attr-defined]
                         self._strict_validation
                     )
             except ImportError:
@@ -124,6 +125,12 @@ class FastMCPHUDClient(BaseHUDClient):
             raise ValueError("Client is not connected, call initialize() first")
         return await self._client.list_tools()
+    async def _list_prompts_impl(self) -> list[types.Prompt]:
+        """List all available prompts (FastMCP supports this)."""
+        if self._client is None:
+            raise ValueError("Client is not connected, call initialize() first")
+        return await self._client.list_prompts()
     async def _call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
         """Execute a tool by name."""
         if self._client is None:
@@ -143,8 +150,8 @@ class FastMCPHUDClient(BaseHUDClient):
             structuredContent=result.structured_content,
         )
-    async def list_resources(self) -> list[types.Resource]:
-        """List all available resources."""
+    async def _list_resources_impl(self) -> list[types.Resource]:
+        """Implementation of resource listing for FastMCP client."""
         if self._client is None:
             raise ValueError("Client is not connected, call initialize() first")
         return await self._client.list_resources()

hud/clients/mcp_use.py CHANGED Viewed

@@ -9,9 +9,8 @@ from urllib.parse import urlparse
 from mcp import Implementation, types
 from mcp.shared.exceptions import McpError
-from mcp_use.client import MCPClient as MCPUseClient
-from mcp_use.session import MCPSession as MCPUseSession
-from mcp_use.types.http import HttpOptions
+from mcp_use.client.client import MCPClient as MCPUseClient
+from mcp_use.client.session import MCPSession as MCPUseSession
 from pydantic import AnyUrl
 from hud.settings import settings
@@ -20,7 +19,6 @@ from hud.utils.hud_console import HUDConsole
 from hud.version import __version__ as hud_version
 from .base import BaseHUDClient
-from .utils.retry_transport import create_retry_httpx_client
 logger = logging.getLogger(__name__)
 hud_console = HUDConsole(logger=logger)
@@ -58,12 +56,6 @@ class MCPUseHUDClient(BaseHUDClient):
             str, tuple[str, types.Tool, types.Tool]
         ] = {}  # server_name, original_tool, prefixed_tool
         self._client: Any | None = None  # Will be MCPUseClient when available
-        # Transport options for MCP-use (disable_sse_fallback, httpx_client_factory, etc.)
-        # Default to retry-enabled HTTPX client if factory not provided
-        self._http_options: HttpOptions = HttpOptions(
-            httpx_client_factory=create_retry_httpx_client,
-            disable_sse_fallback=True,
-        )
     async def _connect(self, mcp_config: dict[str, dict[str, Any]]) -> None:
         """Create all sessions for MCP-use client."""
@@ -71,6 +63,16 @@ class MCPUseHUDClient(BaseHUDClient):
             logger.warning("Client is already connected, cannot connect again")
             return
+        # Use configurable timeout for SSE read operations to support long-running tool calls.
+        max_request_timeout = 840
+        for server_cfg in mcp_config.values():
+            if "sse_read_timeout" not in server_cfg:
+                server_cfg["sse_read_timeout"] = (
+                    min(settings.client_timeout, max_request_timeout)
+                    if settings.client_timeout > 0
+                    else max_request_timeout
+                )
         # If a server target matches HUD's MCP host and no auth is provided,
         # inject the HUD API key as a Bearer token to avoid OAuth browser flow.
         try:
@@ -88,11 +90,13 @@ class MCPUseHUDClient(BaseHUDClient):
         config = {"mcpServers": mcp_config}
         if MCPUseClient is None:
             raise ImportError("MCPUseClient is not available")
-        self._client = MCPUseClient.from_dict(config, http_options=self._http_options)
+        self._client = MCPUseClient.from_dict(config)
         try:
-            assert self._client is not None  # noqa: S101
+            assert self._client is not None
             self._sessions = await self._client.create_all_sessions()
-            hud_console.info(f"Created {len(self._sessions)} MCP sessions")
+            session_count = len(self._sessions)
+            session_text = "session" if session_count == 1 else "sessions"
+            hud_console.info(f"Created {session_count} MCP {session_text}")
             # Configure validation for all sessions based on client setting
             try:
@@ -241,8 +245,8 @@ class MCPUseHUDClient(BaseHUDClient):
             structuredContent=result.structuredContent,
         )
-    async def list_resources(self) -> list[types.Resource]:
-        """List all available resources."""
+    async def _list_resources_impl(self) -> list[types.Resource]:
+        """Implementation of resource listing for MCP-use client."""
         if self._client is None or not self._sessions:
             raise ValueError("Client is not connected, call initialize() first")
@@ -268,6 +272,32 @@ class MCPUseHUDClient(BaseHUDClient):
                 continue
         return []
+    async def _list_prompts_impl(self) -> list[types.Prompt]:
+        """Implementation of prompt listing for MCP-use client (best-effort)."""
+        if self._client is None or not self._sessions:
+            raise ValueError("Client is not connected, call initialize() first")
+        all_prompts: list[types.Prompt] = []
+        for server_name, session in self._sessions.items():
+            try:
+                if not hasattr(session, "connector") or not hasattr(
+                    session.connector, "client_session"
+                ):
+                    continue
+                if session.connector.client_session is None:
+                    continue
+                if not hasattr(session.connector.client_session, "list_prompts"):
+                    continue
+                prompts_result = await session.connector.client_session.list_prompts()
+                all_prompts.extend(prompts_result.prompts)
+            except Exception as e:
+                if self.verbose:
+                    hud_console.debug(f"Could not list prompts from server '{server_name}': {e}")
+                continue
+        return all_prompts
     async def read_resource(self, uri: str | AnyUrl) -> types.ReadResourceResult | None:
         """Read a resource by URI from any server that provides it."""
         if self._client is None or not self._sessions:

hud/clients/tests/test_analyze_scenarios.py ADDED Viewed

@@ -0,0 +1,206 @@
+"""Tests for scenario discovery via prompts/resources in analyze_environment()."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+import pytest
+from mcp import types
+from pydantic import AnyUrl
+from hud.clients.base import BaseHUDClient
+if TYPE_CHECKING:
+    from hud.types import MCPToolCall, MCPToolResult
+class _MockClient(BaseHUDClient):
+    """Minimal BaseHUDClient for testing analyze_environment scenario derivation."""
+    def __init__(
+        self,
+        *,
+        prompts: list[types.Prompt],
+        resources: list[types.Resource],
+    ) -> None:
+        super().__init__(mcp_config={"test": {"url": "mock://test"}}, verbose=True)
+        self._mock_prompts = prompts
+        self._mock_resources = resources
+        # Skip initialize() (which fetches telemetry); we just need analyze_environment().
+        self._initialized = True
+    async def _connect(self, mcp_config: dict[str, dict[str, Any]]) -> None:  # pragma: no cover
+        return None
+    async def list_tools(self) -> list[types.Tool]:
+        return []
+    async def _list_resources_impl(self) -> list[types.Resource]:
+        return self._mock_resources
+    async def _list_prompts_impl(self) -> list[types.Prompt]:
+        return self._mock_prompts
+    async def _call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:  # pragma: no cover
+        raise NotImplementedError
+    async def read_resource(self, uri: str) -> types.ReadResourceResult | None:  # pragma: no cover
+        return None
+    async def _disconnect(self) -> None:  # pragma: no cover
+        return None
+@pytest.mark.asyncio
+async def test_analyze_environment_derives_scenarios_from_scenario_prompt_and_resource() -> None:
+    prompts = [
+        types.Prompt(
+            name="my-env:checkout",
+            description="[Setup] Checkout flow",
+            arguments=[],
+        )
+    ]
+    resources = [
+        types.Resource(
+            uri=AnyUrl("my-env:checkout"),
+            name="checkout",
+            description="[Evaluate] Checkout flow",
+        )
+    ]
+    client = _MockClient(prompts=prompts, resources=resources)
+    analysis = await client.analyze_environment()
+    assert "scenarios" in analysis
+    assert len(analysis["scenarios"]) == 1
+    scenario = analysis["scenarios"][0]
+    assert scenario["id"] == "my-env:checkout"
+    assert scenario["env"] == "my-env"
+    assert scenario["name"] == "checkout"
+    assert scenario["has_setup_prompt"] is True
+    assert scenario["has_evaluate_resource"] is True
+@pytest.mark.asyncio
+async def test_analyze_environment_scenario_from_setup_only() -> None:
+    prompts = [
+        types.Prompt(
+            name="env-x:only_setup",
+            description="[Setup] Setup only scenario",
+            arguments=[],
+        )
+    ]
+    resources: list[types.Resource] = []
+    client = _MockClient(prompts=prompts, resources=resources)
+    analysis = await client.analyze_environment()
+    assert len(analysis["scenarios"]) == 1
+    scenario = analysis["scenarios"][0]
+    assert scenario["id"] == "env-x:only_setup"
+    assert scenario["has_setup_prompt"] is True
+    assert scenario["has_evaluate_resource"] is False
+@pytest.mark.asyncio
+async def test_analyze_environment_scenario_from_evaluate_only() -> None:
+    prompts: list[types.Prompt] = []
+    resources = [
+        types.Resource(
+            uri=AnyUrl("env-y:only_eval"),
+            name="only_eval",
+            description="[Evaluate] Evaluate only scenario",
+        )
+    ]
+    client = _MockClient(prompts=prompts, resources=resources)
+    analysis = await client.analyze_environment()
+    assert len(analysis["scenarios"]) == 1
+    scenario = analysis["scenarios"][0]
+    assert scenario["id"] == "env-y:only_eval"
+    assert scenario["has_setup_prompt"] is False
+    assert scenario["has_evaluate_resource"] is True
+@pytest.mark.asyncio
+async def test_analyze_environment_extracts_scenario_code_from_meta() -> None:
+    """Test that scenario code is extracted from the meta field."""
+    scenario_code = """@env.scenario()
+async def checkout(product_id: str):
+    await env.call_tool("navigate", url="/checkout")
+    yield "Complete the checkout"
+    result = await env.call_tool("check_order")
+    yield 1.0 if result else 0.0
+"""
+    # Use model_validate with _meta alias (Pydantic alias for the meta field)
+    prompts = [
+        types.Prompt.model_validate(
+            {
+                "name": "my-env:checkout",
+                "description": "[Setup] Checkout flow",
+                "arguments": [{"name": "product_id", "required": True}],
+                "_meta": {"code": scenario_code},
+            }
+        )
+    ]
+    resources = [
+        types.Resource.model_validate(
+            {
+                "uri": "my-env:checkout",
+                "name": "checkout",
+                "description": "[Evaluate] Checkout flow",
+                "_meta": {"code": scenario_code},
+            }
+        )
+    ]
+    client = _MockClient(prompts=prompts, resources=resources)
+    analysis = await client.analyze_environment()
+    assert len(analysis["scenarios"]) == 1
+    scenario = analysis["scenarios"][0]
+    assert scenario["id"] == "my-env:checkout"
+    assert "code" in scenario
+    assert scenario["code"] == scenario_code
+    assert "async def checkout" in scenario["code"]
+@pytest.mark.asyncio
+async def test_analyze_environment_extracts_meta_on_prompts_and_resources() -> None:
+    """Test that meta field is included in prompts and resources analysis."""
+    meta_data = {"code": "test code", "extra": "value"}
+    # Use model_validate with _meta alias (Pydantic alias for the meta field)
+    prompts = [
+        types.Prompt.model_validate(
+            {
+                "name": "test-prompt",
+                "description": "A test prompt",
+                "arguments": [],
+                "_meta": meta_data,
+            }
+        )
+    ]
+    resources = [
+        types.Resource.model_validate(
+            {
+                "uri": "file:///test",
+                "name": "test-resource",
+                "description": "A test resource",
+                "_meta": meta_data,
+            }
+        )
+    ]
+    client = _MockClient(prompts=prompts, resources=resources)
+    analysis = await client.analyze_environment()
+    # Check prompts have meta
+    assert len(analysis["prompts"]) == 1
+    assert "meta" in analysis["prompts"][0]
+    assert analysis["prompts"][0]["meta"] == meta_data
+    # Check resources have meta
+    assert len(analysis["resources"]) == 1
+    assert "meta" in analysis["resources"][0]
+    assert analysis["resources"][0]["meta"] == meta_data

hud/clients/tests/test_protocol.py CHANGED Viewed

@@ -35,9 +35,15 @@ class MockClient(BaseHUDClient):
             raise RuntimeError("Not connected")
         return self._mock_tools
-    async def list_resources(self) -> list[types.Resource]:
-        """Minimal list_resources for protocol satisfaction in tests."""
-        return []
+    async def _list_resources_impl(self) -> list[types.Resource]:
+        """Minimal resource listing implementation for tests."""
+        from pydantic import AnyUrl
+        return [
+            types.Resource(
+                uri=AnyUrl("telemetry://live"), name="telemetry", description="Live telemetry data"
+            )
+        ]
     async def _call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
         if tool_call.name == "test_tool":

hud/datasets/__init__.py CHANGED Viewed

@@ -1,33 +1,36 @@
 """HUD datasets module.
-Provides data models, utilities, and execution functions for working with HUD datasets.
+Provides unified task loading, saving, and execution for HUD evaluations.
+Key functions:
+- load_tasks(): Load tasks from JSON, JSONL, HuggingFace, or HUD API
+- save_tasks(): Save tasks to the HUD API
+- run_dataset(): Run an agent on a dataset of tasks
+- submit_rollouts(): Submit tasks for remote execution
+Supports both v4 (LegacyTask) and v5 (Task) formats with automatic conversion.
 """
-# Data models
-# Execution functions
 from __future__ import annotations
-from hud.types import Task
+from hud.eval.display import display_results
-from .parallel import (
-    calculate_optimal_workers,
-    run_dataset_parallel,
-    run_dataset_parallel_manual,
+from .loader import load_dataset, load_tasks, save_tasks
+from .runner import run_dataset, run_single_task
+from .utils import (
+    BatchRequest,
+    SingleTaskRequest,
+    submit_rollouts,
 )
-from .runner import run_dataset
-# Utilities
-from .utils import fetch_system_prompt_from_dataset, save_tasks
 __all__ = [
-    # Core data model
-    "Task",
-    "calculate_optimal_workers",
-    # Utilities
-    "fetch_system_prompt_from_dataset",
-    # Execution
+    "BatchRequest",
+    "SingleTaskRequest",
+    "display_results",
+    "load_dataset",  # Deprecated alias
+    "load_tasks",
     "run_dataset",
-    "run_dataset_parallel",
-    "run_dataset_parallel_manual",
+    "run_single_task",
     "save_tasks",
+    "submit_rollouts",
 ]

hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.13py3-none-any.whl