PyPI - hud-python - Versions diffs - 0.5.9__tar.gz → 0.5.11__tar.gz - Mend

hud-python 0.5.9tar.gz → 0.5.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (307) hide show

{hud_python-0.5.9 → hud_python-0.5.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.5.9
+Version: 0.5.11
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/__init__.py RENAMED Viewed

@@ -56,15 +56,11 @@ def create_agent(model: str, **kwargs: Any) -> MCPAgent:
     if gateway_info:
         provider = gateway_info.get("provider") or "openai"
     else:
-        # Map agent class to provider for known types
-        from hud.agents.claude import ClaudeAgent
-        from hud.agents.gemini import GeminiAgent
-        _AGENT_TO_PROVIDER = {
-            ClaudeAgent: "anthropic",
-            GeminiAgent: "google",
-        }
-        provider = _AGENT_TO_PROVIDER.get(agent_cls, "openai")
+        provider = "openai"
+        if agent_cls.__name__ == "ClaudeAgent":
+            provider = "anthropic"
+        elif agent_cls.__name__ in ("GeminiAgent", "GeminiCUAAgent"):
+            provider = "gemini"
     client = build_gateway_client(provider)

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/base.py RENAMED Viewed

@@ -9,11 +9,12 @@ from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any, ClassVar, Literal
 import mcp.types as types
-from pydantic import BaseModel, ConfigDict
 from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
 from hud.utils.hud_console import HUDConsole
+from .types import BaseCreateParams
 if TYPE_CHECKING:
     from hud.environment import Environment
     from hud.eval.context import EvalContext
@@ -22,18 +23,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-class BaseCreateParams(BaseModel):
-    """Runtime parameters for agent creation."""
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    # Primary way to bind agent to execution context (v5)
-    ctx: Any | None = None  # EvalContext or Environment - agent uses this for tool calls
-    auto_respond: bool = False
-    verbose: bool = False
 class MCPAgent(ABC):
     """
     Base class for MCP-enabled agents.

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/claude.py RENAMED Viewed

@@ -25,7 +25,6 @@ from anthropic.types.beta import (
     BetaToolTextEditor20250728Param,
     BetaToolUnionParam,
 )
-from pydantic import ConfigDict
 from hud.settings import settings
 from hud.tools.computer.settings import computer_settings
@@ -33,7 +32,8 @@ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
 from hud.utils.hud_console import HUDConsole
 from hud.utils.types import with_signature
-from .base import BaseCreateParams, MCPAgent
+from .base import MCPAgent
+from .types import ClaudeConfig, ClaudeCreateParams
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -41,21 +41,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-class ClaudeConfig(BaseAgentConfig):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    model_name: str = "Claude"
-    model: str = "claude-sonnet-4-5"
-    model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
-    max_tokens: int = 16384
-    use_computer_beta: bool = True
-    validate_api_key: bool = True
-class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
-    pass
 class ClaudeAgent(MCPAgent):
     """
     Claude agent that uses MCP servers for tool execution.
@@ -94,7 +79,7 @@ class ClaudeAgent(MCPAgent):
                     "or ANTHROPIC_API_KEY for direct Anthropic access."
                 )
-        self.anthropic_client = model_client
+        self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
         self.max_tokens = self.config.max_tokens
         self.use_computer_beta = self.config.use_computer_beta
         self.hud_console = HUDConsole(logger=logger)

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/gemini.py RENAMED Viewed

@@ -8,37 +8,18 @@ from typing import Any, ClassVar, cast
 import mcp.types as types
 from google import genai
 from google.genai import types as genai_types
-from pydantic import ConfigDict
 from hud.settings import settings
 from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
 from hud.utils.hud_console import HUDConsole
 from hud.utils.types import with_signature
-from .base import BaseCreateParams, MCPAgent
+from .base import MCPAgent
+from .types import GeminiConfig, GeminiCreateParams
 logger = logging.getLogger(__name__)
-class GeminiConfig(BaseAgentConfig):
-    """Configuration for `GeminiAgent`."""
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    model_name: str = "Gemini"
-    model: str = "gemini-3-pro-preview"
-    model_client: genai.Client | None = None
-    temperature: float = 1.0
-    top_p: float = 0.95
-    top_k: int = 40
-    max_output_tokens: int = 8192
-    validate_api_key: bool = True
-class GeminiCreateParams(BaseCreateParams, GeminiConfig):
-    pass
 class GeminiAgent(MCPAgent):
     """
     Gemini agent that uses MCP servers for tool execution.
@@ -80,7 +61,7 @@ class GeminiAgent(MCPAgent):
             except Exception as e:
                 raise ValueError(f"Gemini API key is invalid: {e}") from e
-        self.gemini_client = model_client
+        self.gemini_client: genai.Client = model_client
         self.temperature = self.config.temperature
         self.top_p = self.config.top_p
         self.top_k = self.config.top_k

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/gemini_cua.py RENAMED Viewed

@@ -7,14 +7,14 @@ from typing import Any, ClassVar
 import mcp.types as types
 from google.genai import types as genai_types
-from pydantic import ConfigDict, Field
 from hud.tools.computer.settings import computer_settings
 from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
 from hud.utils.types import with_signature
-from .base import BaseCreateParams, MCPAgent
-from .gemini import GeminiAgent, GeminiConfig
+from .base import MCPAgent
+from .gemini import GeminiAgent
+from .types import GeminiCUAConfig, GeminiCUACreateParams
 logger = logging.getLogger(__name__)
@@ -56,20 +56,6 @@ what they asked.
 """.strip()
-class GeminiCUAConfig(GeminiConfig):
-    """Configuration for `GeminiCUAAgent`."""
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    model_name: str = "GeminiCUA"
-    model: str = "gemini-2.5-computer-use-preview-10-2025"
-    excluded_predefined_functions: list[str] = Field(default_factory=list)
-class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
-    pass
 class GeminiCUAAgent(GeminiAgent):
     """
     Gemini Computer Use Agent that extends GeminiAgent with computer use capabilities.

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/openai.py RENAMED Viewed

@@ -29,39 +29,18 @@ from openai.types.responses import (
 from openai.types.responses.response_create_params import ToolChoice  # noqa: TC002
 from openai.types.responses.response_input_param import FunctionCallOutput, Message
 from openai.types.shared_params.reasoning import Reasoning  # noqa: TC002
-from pydantic import ConfigDict
 from hud.settings import settings
 from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
 from hud.utils.strict_schema import ensure_strict_json_schema
 from hud.utils.types import with_signature
-from .base import BaseCreateParams, MCPAgent
+from .base import MCPAgent
+from .types import OpenAIConfig, OpenAICreateParams
 logger = logging.getLogger(__name__)
-class OpenAIConfig(BaseAgentConfig):
-    """Configuration model for `OpenAIAgent`."""
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    model_name: str = "OpenAI"
-    model: str = "gpt-5.1"
-    model_client: AsyncOpenAI | None = None
-    max_output_tokens: int | None = None
-    temperature: float | None = None
-    reasoning: Reasoning | None = None
-    tool_choice: ToolChoice | None = None
-    truncation: Literal["auto", "disabled"] | None = None
-    parallel_tool_calls: bool | None = None
-    validate_api_key: bool = True
-class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
-    pass
 class OpenAIAgent(MCPAgent):
     """Generic OpenAI agent that can execute MCP tools through the Responses API."""
@@ -98,11 +77,11 @@ class OpenAIAgent(MCPAgent):
             except Exception as exc:  # pragma: no cover - network validation
                 raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
-        self.openai_client = model_client
+        self.openai_client: AsyncOpenAI = model_client
         self._model = self.config.model
         self.max_output_tokens = self.config.max_output_tokens
         self.temperature = self.config.temperature
-        self.reasoning = self.config.reasoning
+        self.reasoning: Reasoning | None = self.config.reasoning
         self.tool_choice: ToolChoice | None = self.config.tool_choice
         self.parallel_tool_calls = self.config.parallel_tool_calls
         self.truncation: Literal["auto", "disabled"] | None = self.config.truncation

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/openai_chat.py RENAMED Viewed

@@ -22,14 +22,14 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
 import mcp.types as types
 from openai import AsyncOpenAI
-from pydantic import ConfigDict, Field
 from hud.settings import settings
 from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
 from hud.utils.hud_console import HUDConsole
 from hud.utils.types import with_signature
-from .base import BaseCreateParams, MCPAgent
+from .base import MCPAgent
+from .types import OpenAIChatConfig, OpenAIChatCreateParams
 if TYPE_CHECKING:
     from openai.types.chat import ChatCompletionToolParam
@@ -38,23 +38,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-class OpenAIChatConfig(BaseAgentConfig):
-    """Configuration for `OpenAIChatAgent`."""
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    model_name: str = "OpenAI Chat"
-    model: str = "gpt-5-mini"
-    openai_client: AsyncOpenAI | None = None
-    api_key: str | None = None
-    base_url: str | None = None
-    completion_kwargs: dict[str, Any] = Field(default_factory=dict)
-class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
-    pass
 class OpenAIChatAgent(MCPAgent):
     """MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
@@ -82,6 +65,7 @@ class OpenAIChatAgent(MCPAgent):
                 "Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
             )
+        self.oai: AsyncOpenAI
         if self.config.openai_client is not None:
             self.oai = self.config.openai_client
         elif self.config.api_key is not None or self.config.base_url is not None:

{hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/operator.py RENAMED Viewed

@@ -17,14 +17,14 @@ from openai.types.responses.response_input_param import (
     FunctionCallOutput,
 )
 from openai.types.shared_params.reasoning import Reasoning
-from pydantic import ConfigDict
 from hud.tools.computer.settings import computer_settings
 from hud.types import BaseAgentConfig, MCPToolCall, MCPToolResult
 from hud.utils.types import with_signature
-from .base import BaseCreateParams, MCPAgent
-from .openai import OpenAIAgent, OpenAIConfig
+from .base import MCPAgent
+from .openai import OpenAIAgent
+from .types import OperatorConfig, OperatorCreateParams
 if TYPE_CHECKING:
     from openai.types.responses.response_computer_tool_call import PendingSafetyCheck
@@ -50,20 +50,6 @@ what they asked.
 """.strip()
-class OperatorConfig(OpenAIConfig):
-    """Configuration model for `OperatorAgent`."""
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    model_name: str = "Operator"
-    model: str = "computer-use-preview"
-    environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
-class OperatorCreateParams(BaseCreateParams, OperatorConfig):
-    pass
 class OperatorAgent(OpenAIAgent):
     """
     Backwards-compatible Operator agent built on top of OpenAIAgent.

hud_python-0.5.11/hud/agents/types.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Agent configuration types.
+Config classes are defined here separately from agent implementations
+to allow importing them without requiring SDK dependencies (anthropic, google-genai).
+"""
+from __future__ import annotations
+from typing import Any, Literal
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field
+from hud.types import BaseAgentConfig
+# Alias to accept both 'model' and 'checkpoint_name' (backwards compat)
+_model_alias = AliasChoices("model", "checkpoint_name")
+class BaseCreateParams(BaseModel):
+    """Runtime parameters for agent creation."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    ctx: Any = None  # EvalContext or Environment
+    auto_respond: bool = False
+    verbose: bool = False
+# -----------------------------------------------------------------------------
+# Claude
+# -----------------------------------------------------------------------------
+class ClaudeConfig(BaseAgentConfig):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "Claude"
+    model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
+    model_client: Any = None  # AsyncAnthropic | AsyncAnthropicBedrock
+    max_tokens: int = 16384
+    use_computer_beta: bool = True
+    validate_api_key: bool = True
+class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
+    pass
+# -----------------------------------------------------------------------------
+# Gemini
+# -----------------------------------------------------------------------------
+class GeminiConfig(BaseAgentConfig):
+    """Configuration for GeminiAgent."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "Gemini"
+    model: str = Field(default="gemini-3-pro-preview", validation_alias=_model_alias)
+    model_client: Any = None  # genai.Client
+    temperature: float = 1.0
+    top_p: float = 0.95
+    top_k: int = 40
+    max_output_tokens: int = 8192
+    validate_api_key: bool = True
+class GeminiCreateParams(BaseCreateParams, GeminiConfig):
+    pass
+class GeminiCUAConfig(GeminiConfig):
+    """Configuration for GeminiCUAAgent."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "GeminiCUA"
+    model: str = Field(
+        default="gemini-2.5-computer-use-preview-10-2025", validation_alias=_model_alias
+    )
+    excluded_predefined_functions: list[str] = Field(default_factory=list)
+class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
+    pass
+# -----------------------------------------------------------------------------
+# OpenAI
+# -----------------------------------------------------------------------------
+class OpenAIConfig(BaseAgentConfig):
+    """Configuration for OpenAIAgent."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "OpenAI"
+    model: str = Field(default="gpt-5.1", validation_alias=_model_alias)
+    model_client: Any = None  # AsyncOpenAI
+    max_output_tokens: int | None = None
+    temperature: float | None = None
+    reasoning: Any = None  # openai Reasoning
+    tool_choice: Any = None  # openai ToolChoice
+    truncation: Literal["auto", "disabled"] | None = None
+    parallel_tool_calls: bool | None = None
+    validate_api_key: bool = True
+class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
+    pass
+class OpenAIChatConfig(BaseAgentConfig):
+    """Configuration for OpenAIChatAgent."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "OpenAI Chat"
+    model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
+    openai_client: Any = None  # AsyncOpenAI
+    api_key: str | None = None
+    base_url: str | None = None
+    completion_kwargs: dict[str, Any] = Field(default_factory=dict)
+class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
+    pass
+# -----------------------------------------------------------------------------
+# Operator
+# -----------------------------------------------------------------------------
+class OperatorConfig(OpenAIConfig):
+    """Configuration for OperatorAgent."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "Operator"
+    model: str = Field(default="computer-use-preview", validation_alias=_model_alias)
+    environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
+class OperatorCreateParams(BaseCreateParams, OperatorConfig):
+    pass

{hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/eval.py RENAMED Viewed

@@ -564,7 +564,7 @@ class EvalConfig(BaseModel):
             table.add_row("", "")
             table.add_row(f"[dim]{self.agent_type.value} config[/dim]", "")
-            config_cls = self.agent_type.cls.config_cls
+            config_cls = self.agent_type.config_cls
             defaults = config_cls()
             overrides = self.agent_config.get(self.agent_type.value, {})
             skip = {

{hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/task.py RENAMED Viewed

@@ -287,8 +287,20 @@ class Task(BaseModel):
                 ]
             # Preserve agent_config
+            agent_config: dict[str, Any] = {}
             if data.get("agent_config"):
-                result["agent_config"] = data["agent_config"]
+                agent_config.update(data["agent_config"])
+            # Restore tool filters from Environment (they were extracted during v4 conversion)
+            if self.env is not None:
+                if getattr(self.env, "_agent_include", None) is not None:
+                    agent_config["allowed_tools"] = self.env._agent_include
+                elif "allowed_tools" not in agent_config:
+                    # ["*"] was converted to None, restore it for serialization
+                    agent_config["allowed_tools"] = ["*"]
+                if getattr(self.env, "_agent_exclude", None) is not None:
+                    agent_config["disallowed_tools"] = self.env._agent_exclude
+            if agent_config:
+                result["agent_config"] = agent_config
             # Preserve metadata
             if data.get("metadata"):

{hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/tests/test_task.py RENAMED Viewed

@@ -85,7 +85,11 @@ class TestTaskSerialization:
         task = Task.from_v4(v4_dict)
         data = task.model_dump(mode="json")
-        assert data.get("agent_config") == {"system_prompt": "Custom system prompt"}
+        # agent_config should preserve system_prompt and include default allowed_tools
+        assert data.get("agent_config") == {
+            "system_prompt": "Custom system prompt",
+            "allowed_tools": ["*"],  # Default when no allowed_tools specified
+        }
         # Roundtrip
         task2 = Task(**data)
@@ -250,3 +254,31 @@ class TestV4AgentConfigToolFilters:
         assert "my_setup_tool" not in tool_names
         assert "run_query" in tool_names
+    def test_v4_tool_filters_preserved_in_serialization(self) -> None:
+        """v4 tool filters are preserved when serializing for remote execution."""
+        v4_dict = {
+            "prompt": "Test prompt",
+            "mcp_config": {"server": {"url": "http://localhost"}},
+            "evaluate_tool": {"name": "check", "arguments": {}},
+            "agent_config": {
+                "allowed_tools": ["*"],
+                "disallowed_tools": ["*setup*", "*evaluate*", "*grade*"],
+            },
+        }
+        task = Task.from_v4(v4_dict)
+        # Serialize (this is what gets sent to remote execution)
+        data = task.model_dump(mode="json")
+        # agent_config must include the tool filters for remote execution
+        assert "agent_config" in data
+        assert data["agent_config"]["allowed_tools"] == ["*"]
+        assert data["agent_config"]["disallowed_tools"] == ["*setup*", "*evaluate*", "*grade*"]
+        # Verify roundtrip works (remote worker will deserialize this)
+        task2 = Task(**data)
+        assert task2.env is not None
+        assert task2.env._agent_include is None  # ["*"] → None
+        assert task2.env._agent_exclude == ["*setup*", "*evaluate*", "*grade*"]

hud-python 0.5.9__tar.gz → 0.5.11__tar.gz

hud-python 0.5.9tar.gz → 0.5.11tar.gz