PyPI - hud-python - Versions diffs - 0.4.52__py3-none-any.whl → 0.4.54__py3-none-any.whl - Mend

hud-python 0.4.52py3-none-any.whl → 0.4.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (70) hide show

hud/agents/base.py +9 -2
hud/agents/openai_chat_generic.py +15 -3
hud/agents/tests/test_base.py +15 -0
hud/agents/tests/test_base_runtime.py +164 -0
hud/cli/__init__.py +20 -12
hud/cli/build.py +35 -27
hud/cli/dev.py +13 -31
hud/cli/eval.py +85 -84
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +24 -2
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +134 -0
hud/cli/tests/test_eval.py +6 -6
hud/cli/tests/test_mcp_server.py +8 -7
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/utils/docker.py +120 -1
hud/cli/utils/runner.py +1 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +2 -2
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_runner.py +106 -0
hud/datasets/tests/test_utils.py +228 -0
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_instrumentation.py +207 -0
hud/server/tests/test_server_extra.py +2 -0
hud/shared/exceptions.py +35 -4
hud/shared/hints.py +25 -0
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +31 -23
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/tests/test_async_context.py +242 -0
hud/telemetry/tests/test_instrument.py +414 -0
hud/telemetry/tests/test_job.py +609 -0
hud/telemetry/tests/test_trace.py +183 -5
hud/tools/computer/settings.py +2 -2
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/types.py +17 -1
hud/utils/agent_factories.py +1 -3
hud/utils/mcp.py +1 -1
hud/utils/tests/test_agent_factories.py +60 -0
hud/utils/tests/test_mcp.py +4 -6
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tasks.py +187 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.52.dist-info → hud_python-0.4.54.dist-info}/METADATA +49 -49
{hud_python-0.4.52.dist-info → hud_python-0.4.54.dist-info}/RECORD +70 -32
{hud_python-0.4.52.dist-info → hud_python-0.4.54.dist-info}/WHEEL +0 -0
{hud_python-0.4.52.dist-info → hud_python-0.4.54.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.52.dist-info → hud_python-0.4.54.dist-info}/licenses/LICENSE +0 -0

hud/agents/base.py CHANGED Viewed

@@ -137,7 +137,11 @@ class MCPAgent(ABC):
                 "No MCPClient. Please provide one when initializing the agent or pass a Task with mcp_config."  # noqa: E501
             )
-        await self._setup_config(self.mcp_client.mcp_config)
+        try:
+            client_cfg = getattr(self.mcp_client, "mcp_config", None)
+        except Exception:
+            client_cfg = None
+        await self._setup_config(client_cfg)
         # Initialize client if needed
         try:
@@ -618,8 +622,11 @@ class MCPAgent(ABC):
             except Exception as e:
                 self.console.error_log(f"Response lifecycle tool failed: {e}")
-    async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
+    async def _setup_config(self, mcp_config: dict[str, dict[str, Any]] | None) -> None:
         """Inject metadata into the metadata of the initialize request."""
+        if not isinstance(mcp_config, dict):
+            return
         if self.metadata:
             patch_mcp_config(
                 mcp_config,

hud/agents/openai_chat_generic.py CHANGED Viewed

@@ -20,6 +20,7 @@ import logging
 from typing import TYPE_CHECKING, Any, ClassVar, cast
 import mcp.types as types
+from openai import AsyncOpenAI
 from hud import instrument
 from hud.types import AgentResponse, MCPToolCall, MCPToolResult
@@ -28,7 +29,6 @@ from hud.utils.hud_console import HUDConsole
 from .base import MCPAgent
 if TYPE_CHECKING:
-    from openai import AsyncOpenAI
     from openai.types.chat import ChatCompletionToolParam
 logger = logging.getLogger(__name__)
@@ -42,14 +42,26 @@ class GenericOpenAIChatAgent(MCPAgent):
     def __init__(
         self,
         *,
-        openai_client: AsyncOpenAI | None,
+        openai_client: AsyncOpenAI | None = None,
+        api_key: str | None = None,
+        base_url: str | None = None,
         model_name: str = "gpt-4o-mini",
         completion_kwargs: dict[str, Any] | None = None,
         **agent_kwargs: Any,
     ) -> None:
         # Accept base-agent settings via **agent_kwargs (e.g., mcp_client, system_prompt, etc.)
         super().__init__(**agent_kwargs)
-        self.oai = openai_client
+        # Handle client creation - support both patterns
+        if openai_client is not None:
+            # Use provided client (backward compatibility)
+            self.oai = openai_client
+        elif api_key is not None or base_url is not None:
+            # Create client from config (new pattern, consistent with other agents)
+            self.oai = AsyncOpenAI(api_key=api_key, base_url=base_url)
+        else:
+            raise ValueError("Either openai_client or (api_key and base_url) must be provided")
         self.model_name = model_name
         self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
         self.mcp_schemas = []

hud/agents/tests/test_base.py CHANGED Viewed

@@ -329,6 +329,21 @@ class TestBaseMCPAgent:
         # call_tools doesn't validate empty names, it will return error
         await agent.call_tools(tool_call)
+    def test_get_tool_schemas(self):
+        """Test getting tool schemas."""
+        agent = MockMCPAgent()
+        agent._available_tools = [
+            types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
+            types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
+        ]
+        schemas = agent.get_tool_schemas()
+        # Should include non-lifecycle tools
+        assert len(schemas) == 2
+        assert schemas[0]["name"] == "tool1"
     def test_get_tools_by_server(self):
         """Test getting tools grouped by server."""
         agent = MockMCPAgent()

hud/agents/tests/test_base_runtime.py ADDED Viewed

@@ -0,0 +1,164 @@
+from __future__ import annotations
+from unittest import mock
+import mcp.types as types
+import pytest
+from hud.agents.base import MCPAgent, find_content, find_reward, text_to_blocks
+from hud.types import AgentResponse, MCPToolCall, MCPToolResult
+class DummyAgent(MCPAgent):
+    async def get_system_messages(self):
+        return [types.TextContent(text="sys", type="text")]
+    async def get_response(self, messages):
+        # Single step: no tool calls -> done
+        return AgentResponse(content="ok", tool_calls=[], done=True)
+    async def format_blocks(self, blocks):
+        # Return as-is
+        return blocks
+    async def format_tool_results(self, tool_calls, tool_results):
+        return [types.TextContent(text="tools", type="text")]
+@pytest.mark.asyncio
+async def test_run_with_string_prompt_auto_client(monkeypatch):
+    # Fake MCPClient with required methods
+    fake_client = mock.AsyncMock()
+    fake_client.initialize.return_value = None
+    fake_client.list_tools.return_value = []
+    fake_client.shutdown.return_value = None
+    # Patch MCPClient construction inside initialize()
+    with mock.patch("hud.clients.MCPClient", return_value=fake_client):
+        agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
+        result = await agent.run("hello", max_steps=1)
+    assert result.done is True and result.isError is False
+def test_find_reward_and_content_extractors():
+    # Structured content
+    r = MCPToolResult(
+        content=text_to_blocks("{}"), isError=False, structuredContent={"reward": 0.7}
+    )
+    assert find_reward(r) == 0.7
+    # Text JSON
+    r2 = MCPToolResult(content=text_to_blocks('{"score": 0.5, "content": "hi"}'), isError=False)
+    assert find_reward(r2) == 0.5
+    assert find_content(r2) == "hi"
+@pytest.mark.asyncio
+async def test_call_tools_error_paths():
+    fake_client = mock.AsyncMock()
+    # First call succeeds
+    ok_result = MCPToolResult(content=text_to_blocks("ok"), isError=False)
+    fake_client.call_tool.side_effect = [ok_result, RuntimeError("boom")]
+    agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
+    results = await agent.call_tools(
+        [MCPToolCall(name="a", arguments={}), MCPToolCall(name="b", arguments={})]
+    )
+    assert results[0].isError is False
+    assert results[1].isError is True
+@pytest.mark.asyncio
+async def test_initialize_without_client_raises_valueerror():
+    agent = DummyAgent(mcp_client=None, auto_trace=False)
+    with pytest.raises(ValueError):
+        await agent.initialize(None)
+def test_get_available_tools_before_initialize_raises():
+    agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
+    with pytest.raises(RuntimeError):
+        agent.get_available_tools()
+@pytest.mark.asyncio
+async def test_format_message_invalid_type_raises():
+    agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
+    with pytest.raises(ValueError):
+        await agent.format_message({"oops": 1})  # type: ignore
+@pytest.mark.asyncio
+async def test_call_tools_timeout_error_shutdown_called():
+    fake_client = mock.AsyncMock()
+    fake_client.call_tool.side_effect = TimeoutError("timeout")
+    fake_client.shutdown.return_value = None
+    agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
+    with pytest.raises(TimeoutError):
+        await agent.call_tools(MCPToolCall(name="x", arguments={}))
+    fake_client.shutdown.assert_awaited_once()
+def test_text_to_blocks_shapes():
+    blocks = text_to_blocks("x")
+    assert isinstance(blocks, list) and blocks and isinstance(blocks[0], types.TextContent)
+@pytest.mark.asyncio
+async def test_run_returns_connection_error_trace(monkeypatch):
+    fake_client = mock.AsyncMock()
+    fake_client.mcp_config = {}
+    fake_client.initialize.side_effect = RuntimeError("Connection refused http://localhost:1234")
+    fake_client.list_tools.return_value = []
+    fake_client.shutdown.return_value = None
+    class DummyCM:
+        def __exit__(self, *args, **kwargs):
+            return False
+    monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
+    agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
+    result = await agent.run("p", max_steps=1)
+    assert result.isError is True
+    assert "Could not connect" in (result.content or "")
+@pytest.mark.asyncio
+async def test_run_calls_response_tool_when_configured(monkeypatch):
+    fake_client = mock.AsyncMock()
+    fake_client.mcp_config = {}
+    fake_client.initialize.return_value = None
+    fake_client.list_tools.return_value = []
+    fake_client.shutdown.return_value = None
+    ok = MCPToolResult(content=text_to_blocks("ok"), isError=False)
+    fake_client.call_tool.return_value = ok
+    class DummyCM:
+        def __exit__(self, *args, **kwargs):
+            return False
+    monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
+    agent = DummyAgent(mcp_client=fake_client, auto_trace=False, response_tool_name="submit")
+    result = await agent.run("hello", max_steps=1)
+    assert result.isError is False
+    fake_client.call_tool.assert_awaited()
+@pytest.mark.asyncio
+async def test_get_available_tools_after_initialize(monkeypatch):
+    fake_client = mock.AsyncMock()
+    fake_client.mcp_config = {}
+    fake_client.initialize.return_value = None
+    fake_client.list_tools.return_value = []
+    fake_client.shutdown.return_value = None
+    class DummyCM:
+        def __exit__(self, *args, **kwargs):
+            return False
+    monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
+    agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
+    await agent.initialize(None)
+    assert agent.get_available_tools() == []

hud/cli/__init__.py CHANGED Viewed

@@ -12,6 +12,8 @@ from rich.console import Console
 from rich.panel import Panel
 from rich.table import Table
+from hud.types import AgentType
 from . import list_func as list_module
 from .analyze import (
     analyze_environment,
@@ -242,15 +244,18 @@ def debug(
                 if build and not build_environment(directory, image_name):
                     raise typer.Exit(1)
-            # Build Docker command
-            from .utils.docker import build_run_command
+            # Build Docker command with folder-mode envs
+            from .utils.docker import create_docker_run_command
-            command = build_run_command(image_name, docker_args)
+            command = create_docker_run_command(
+                image_name, docker_args=docker_args, env_dir=directory
+            )
         else:
             # Assume it's an image name
             image = first_param
             from .utils.docker import build_run_command
+            # Image-only mode: do not auto-inject local .env
             command = build_run_command(image, docker_args)
     else:
         console.print(
@@ -844,7 +849,7 @@ def eval(
     hud_console = HUDConsole()
     if integration_test:
-        agent = "integration_test"
+        agent = AgentType.INTEGRATION_TEST
     # If no source provided, reuse RL helper to find a tasks file interactively
     if source is None:
@@ -891,17 +896,17 @@ def eval(
         # Add standard agent choices
         choices.extend(
             [
-                {"name": "Claude 4 Sonnet", "value": "claude"},
-                {"name": "OpenAI Computer Use", "value": "openai"},
-                {"name": "vLLM (Local Server)", "value": "vllm"},
-                {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
+                {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
+                {"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
+                {"name": "vLLM (Local Server)", "value": AgentType.VLLM},
+                {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
             ]
         )
         agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
     # Handle HUD model selection
-    if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]:
+    if agent and agent not in [e.value for e in AgentType]:
         # Find remote model name
         model = agent
         if not vllm_base_url:
@@ -918,20 +923,23 @@ def eval(
             hud_console.error(f"Model {model} not found")
             raise typer.Exit(1)
         model = base_model
-        agent = "vllm"  # Use vLLM backend for HUD models
+        agent = AgentType.VLLM  # Use vLLM backend for HUD models
         hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
     # Validate agent choice
-    valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"]
+    valid_agents = [e.value for e in AgentType]
     if agent not in valid_agents:
         hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
         raise typer.Exit(1)
+    # Type narrowing: agent is now guaranteed to be an AgentType value after validation
+    agent = AgentType(agent)
     # Run the command
     eval_command(
         source=source,
         full=full,
-        agent=agent,  # type: ignore
+        agent=agent,
         model=model,
         allowed_tools=allowed_tools,
         max_concurrent=max_concurrent,

hud/cli/build.py CHANGED Viewed

@@ -161,49 +161,42 @@ async def analyze_mcp_environment(
     hud_console = HUDConsole()
     env_vars = env_vars or {}
-    # Build Docker command to run the image
-    docker_cmd = ["docker", "run", "--rm", "-i"]
+    # Build Docker command to run the image, injecting any provided env vars
+    from hud.cli.utils.docker import build_env_flags
-    # Add environment variables
-    for key, value in env_vars.items():
-        docker_cmd.extend(["-e", f"{key}={value}"])
+    docker_cmd = ["docker", "run", "--rm", "-i", *build_env_flags(env_vars), image]
-    docker_cmd.append(image)
+    # Show full docker command being used for analysis
+    hud_console.dim_info("Command:", " ".join(docker_cmd))
-    # Create MCP config
-    config = {
-        "server": {"command": docker_cmd[0], "args": docker_cmd[1:] if len(docker_cmd) > 1 else []}
-    }
+    # Create MCP config consistently with analyze helpers
+    from hud.cli.analyze import parse_docker_command
+    mcp_config = parse_docker_command(docker_cmd)
     # Initialize client and measure timing
     start_time = time.time()
-    client = MCPClient(mcp_config=config, verbose=verbose, auto_trace=False)
+    client = MCPClient(mcp_config=mcp_config, verbose=verbose, auto_trace=False)
     initialized = False
     try:
         if verbose:
-            hud_console.info(f"Initializing MCP client with command: {' '.join(docker_cmd)}")
+            hud_console.info("Initializing MCP client...")
-        # Add timeout to fail fast instead of hanging (30 seconds)
+        # Add timeout to fail fast instead of hanging (60 seconds)
         await asyncio.wait_for(client.initialize(), timeout=60.0)
         initialized = True
         initialize_ms = int((time.time() - start_time) * 1000)
-        # Get tools
-        tools = await client.list_tools()
-        # Extract tool information
-        tool_info = []
-        for tool in tools:
-            tool_dict = {"name": tool.name, "description": tool.description}
-            if hasattr(tool, "inputSchema") and tool.inputSchema:
-                tool_dict["inputSchema"] = tool.inputSchema
-            tool_info.append(tool_dict)
+        # Delegate to standard analysis helper for consistency
+        full_analysis = await client.analyze_environment()
+        # Normalize to build's expected fields
+        tools_list = full_analysis.get("tools", [])
         return {
             "initializeMs": initialize_ms,
-            "toolCount": len(tools),
-            "tools": tool_info,
+            "toolCount": len(tools_list),
+            "tools": tools_list,
             "success": True,
         }
     except TimeoutError:
@@ -295,6 +288,10 @@ def build_environment(
         hud_console.error(f"Directory not found: {directory}")
         raise typer.Exit(1)
+    from hud.cli.utils.docker import require_docker_running
+    require_docker_running()
     # Step 1: Check for hud.lock.yaml (previous build)
     lock_path = env_dir / "hud.lock.yaml"
     base_name = None
@@ -355,13 +352,24 @@ def build_environment(
     hud_console.success(f"Built temporary image: {temp_tag}")
-    # Analyze the environment
+    # Analyze the environment (merge folder .env if present)
     hud_console.progress_message("Analyzing MCP environment...")
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
-        analysis = loop.run_until_complete(analyze_mcp_environment(temp_tag, verbose, env_vars))
+        # Merge .env from env_dir for analysis only
+        try:
+            from hud.cli.utils.docker import load_env_vars_for_dir
+            env_from_file = load_env_vars_for_dir(env_dir)
+        except Exception:
+            env_from_file = {}
+        merged_env_for_analysis = {**env_from_file, **(env_vars or {})}
+        analysis = loop.run_until_complete(
+            analyze_mcp_environment(temp_tag, verbose, merged_env_for_analysis)
+        )
     except Exception as e:
         hud_console.error(f"Failed to analyze MCP environment: {e}")
         hud_console.info("")

hud/cli/dev.py CHANGED Viewed

@@ -238,9 +238,9 @@ async def run_mcp_module(
         if env_dir.exists() and (env_dir / "server.py").exists():
             hud_console.info("")
             hud_console.info(
-                f"{hud_console.sym.FLOW} Don't forget to start the environment backend:"
+                f"{hud_console.sym.FLOW} Don't forget to start the environment backend in another terminal:"
             )
-            hud_console.info("   cd ../environment && uvicorn server:app --reload")
+            hud_console.info("   cd environment && uv run python uvicorn server:app --reload")
         # Launch inspector if requested (first run only)
         if inspector and transport == "http":
@@ -504,15 +504,12 @@ def run_docker_dev_server(
     base_name = image_name.replace(":", "-").replace("/", "-")
     container_name = f"{base_name}-dev-{pid}"
-    # Build docker run command with volume mounts
-    docker_cmd = [
-        "docker",
-        "run",
-        "--rm",
-        "-i",
+    # Build docker run command with volume mounts and folder-mode envs
+    from .utils.docker import create_docker_run_command
+    base_args = [
         "--name",
         container_name,
-        # Mount both server and environment for hot-reload
         "-v",
         f"{env_dir.absolute()}/server:/app/server:rw",
         "-v",
@@ -524,29 +521,14 @@ def run_docker_dev_server(
         "-e",
         "HUD_DEV=1",
     ]
+    combined_args = [*base_args, *docker_args] if docker_args else base_args
+    docker_cmd = create_docker_run_command(
+        image_name,
+        docker_args=combined_args,
+        env_dir=env_dir,
+    )
-    # Load .env file if present
-    env_file = env_dir / ".env"
-    loaded_env_vars: dict[str, str] = {}
-    if env_file.exists():
-        try:
-            from hud.cli.utils.config import parse_env_file
-            env_contents = env_file.read_text(encoding="utf-8")
-            loaded_env_vars = parse_env_file(env_contents)
-            for key, value in loaded_env_vars.items():
-                docker_cmd.extend(["-e", f"{key}={value}"])
-            if verbose and loaded_env_vars:
-                hud_console.info(f"Loaded {len(loaded_env_vars)} env var(s) from .env")
-        except Exception as e:
-            hud_console.warning(f"Failed to load .env file: {e}")
-    # Add user-provided Docker arguments
-    if docker_args:
-        docker_cmd.extend(docker_args)
-    # Append the image name
-    docker_cmd.append(image_name)
+    # Env flags already injected by create_docker_run_command
     # Print startup info
     hud_console.header("HUD Development Mode (Docker)")

hud-python 0.4.52__py3-none-any.whl → 0.4.54__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.52py3-none-any.whl → 0.4.54py3-none-any.whl