PyPI - hud-python - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl - Mend

hud-python 0.5.1py3-none-any.whl → 0.5.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

hud/__init__.py +1 -1
hud/agents/__init__.py +65 -6
hud/agents/base.py +33 -15
hud/agents/claude.py +60 -31
hud/agents/gateway.py +42 -0
hud/agents/gemini.py +15 -26
hud/agents/gemini_cua.py +6 -17
hud/agents/misc/response_agent.py +7 -0
hud/agents/openai.py +16 -29
hud/agents/openai_chat.py +3 -19
hud/agents/operator.py +5 -17
hud/agents/resolver.py +70 -0
hud/agents/tests/test_claude.py +2 -4
hud/agents/tests/test_openai.py +2 -1
hud/agents/tests/test_resolver.py +192 -0
hud/agents/types.py +148 -0
hud/cli/__init__.py +34 -3
hud/cli/build.py +37 -5
hud/cli/dev.py +11 -2
hud/cli/eval.py +51 -39
hud/cli/flows/init.py +1 -1
hud/cli/pull.py +1 -1
hud/cli/push.py +9 -2
hud/cli/tests/test_build.py +2 -2
hud/cli/tests/test_push.py +1 -1
hud/cli/utils/metadata.py +1 -1
hud/cli/utils/tests/test_metadata.py +1 -1
hud/clients/mcp_use.py +6 -1
hud/datasets/loader.py +17 -18
hud/datasets/runner.py +16 -10
hud/datasets/tests/test_loader.py +15 -15
hud/environment/__init__.py +5 -3
hud/environment/connection.py +58 -6
hud/environment/connectors/mcp_config.py +29 -1
hud/environment/environment.py +218 -77
hud/environment/router.py +175 -24
hud/environment/scenarios.py +313 -186
hud/environment/tests/test_connectors.py +10 -23
hud/environment/tests/test_environment.py +432 -0
hud/environment/tests/test_local_connectors.py +81 -40
hud/environment/tests/test_scenarios.py +820 -14
hud/eval/context.py +63 -10
hud/eval/instrument.py +4 -2
hud/eval/manager.py +79 -12
hud/eval/task.py +36 -4
hud/eval/tests/test_eval.py +1 -1
hud/eval/tests/test_task.py +147 -1
hud/eval/types.py +2 -0
hud/eval/utils.py +14 -3
hud/patches/mcp_patches.py +178 -21
hud/telemetry/instrument.py +8 -1
hud/telemetry/tests/test_eval_telemetry.py +8 -8
hud/tools/__init__.py +2 -0
hud/tools/agent.py +223 -0
hud/tools/computer/__init__.py +34 -5
hud/tools/shell.py +3 -3
hud/tools/tests/test_agent_tool.py +355 -0
hud/types.py +62 -34
hud/utils/hud_console.py +30 -17
hud/utils/strict_schema.py +1 -1
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA +2 -2
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/RECORD +67 -61
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/WHEEL +0 -0
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0

hud/tools/tests/test_agent_tool.py ADDED Viewed

@@ -0,0 +1,355 @@
+"""Tests for AgentTool - scenario-to-agent composition."""
+from __future__ import annotations
+import inspect
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+from hud.environment import Environment
+from hud.eval.task import Task
+from hud.tools.agent import AgentTool, _is_eval_only
+class TestIsEvalOnly:
+    """Tests for _is_eval_only helper function."""
+    def test_required_param_not_eval_only(self) -> None:
+        """Required params (no default) are not eval-only."""
+        def fn(x: str) -> None:
+            pass
+        sig = inspect.signature(fn)
+        param = sig.parameters["x"]
+        assert not _is_eval_only(param)
+    def test_optional_with_value_not_eval_only(self) -> None:
+        """Optional params with non-None default are not eval-only."""
+        def fn(x: str = "default") -> None:
+            pass
+        sig = inspect.signature(fn)
+        param = sig.parameters["x"]
+        assert not _is_eval_only(param)
+    def test_optional_none_without_union_not_eval_only(self) -> None:
+        """Optional with None default but no None in type is not eval-only."""
+        def fn(x: str = None) -> None:  # type: ignore[assignment]  # noqa: RUF013
+            pass
+        sig = inspect.signature(fn)
+        param = sig.parameters["x"]
+        assert not _is_eval_only(param)
+    def test_optional_none_with_union_is_eval_only(self) -> None:
+        """Params with `X | None = None` pattern are eval-only."""
+        def fn(x: str | None = None) -> None:
+            pass
+        sig = inspect.signature(fn)
+        param = sig.parameters["x"]
+        assert _is_eval_only(param)
+    def test_optional_int_none_is_eval_only(self) -> None:
+        """Works with int | None = None too."""
+        def fn(x: int | None = None) -> None:
+            pass
+        sig = inspect.signature(fn)
+        param = sig.parameters["x"]
+        assert _is_eval_only(param)
+    def test_string_annotation_with_none_union(self) -> None:
+        """Handles string annotations like 'str | None'."""
+        # Simulate string annotation
+        param = inspect.Parameter(
+            "x",
+            inspect.Parameter.POSITIONAL_OR_KEYWORD,
+            default=None,
+            annotation="str | None",
+        )
+        assert _is_eval_only(param)
+    def test_string_annotation_without_none(self) -> None:
+        """String annotations without None are not eval-only."""
+        param = inspect.Parameter(
+            "x",
+            inspect.Parameter.POSITIONAL_OR_KEYWORD,
+            default=None,
+            annotation="str",
+        )
+        assert not _is_eval_only(param)
+class TestAgentToolInit:
+    """Tests for AgentTool initialization."""
+    def test_requires_model_or_agent(self) -> None:
+        """Must provide either model or agent."""
+        task = Task(args={})
+        with pytest.raises(ValueError, match="Must provide either"):
+            AgentTool(task)
+    def test_cannot_provide_both_model_and_agent(self) -> None:
+        """Cannot provide both model and agent."""
+        task = Task(args={})
+        mock_agent = MagicMock()
+        with pytest.raises(ValueError, match="Cannot provide both"):
+            AgentTool(task, model="claude", agent=mock_agent)  # type: ignore[arg-type]
+    def test_accepts_model_string(self) -> None:
+        """Can create with model string."""
+        task = Task(scenario="test", args={})
+        tool = AgentTool(task, model="claude")
+        assert tool._model == "claude"
+        assert tool._agent_cls is None
+    def test_accepts_agent_class(self) -> None:
+        """Can create with custom agent class."""
+        task = Task(scenario="test", args={})
+        mock_agent_cls = MagicMock()
+        tool = AgentTool(task, agent=mock_agent_cls)  # type: ignore[arg-type]
+        assert tool._model is None
+        assert tool._agent_cls is mock_agent_cls
+    def test_name_defaults_to_scenario(self) -> None:
+        """Tool name defaults to scenario name."""
+        task = Task(scenario="investigate", args={})
+        tool = AgentTool(task, model="claude")
+        assert tool.name == "investigate"
+    def test_name_can_be_overridden(self) -> None:
+        """Tool name can be overridden."""
+        task = Task(scenario="investigate", args={})
+        tool = AgentTool(task, model="claude", name="custom_name")
+        assert tool.name == "custom_name"
+class TestAgentToolParamFiltering:
+    """Tests for parameter filtering (eval-only params hidden)."""
+    def test_filters_eval_only_params(self) -> None:
+        """Eval-only params (| None = None) are filtered from visible_params."""
+        env = Environment("test")
+        # Use Union syntax for consistency across Python versions
+        @env.scenario()
+        async def investigate(
+            issue_id: str,
+            include_traces: bool = True,
+            expected_cause: str | None = None,  # Eval only
+        ):
+            yield {"task": f"Investigate {issue_id}"}
+        task = env("investigate")
+        tool = AgentTool(task, model="claude")
+        # visible_params should only have issue_id and include_traces
+        assert "issue_id" in tool._visible_params
+        assert "include_traces" in tool._visible_params
+        assert "expected_cause" not in tool._visible_params
+    def test_all_required_params_visible(self) -> None:
+        """All required params are visible."""
+        env = Environment("test")
+        @env.scenario()
+        async def search(query: str, limit: int):
+            yield {"task": f"Search: {query}"}
+        task = env("search")
+        tool = AgentTool(task, model="claude")
+        assert "query" in tool._visible_params
+        assert "limit" in tool._visible_params
+    def test_optional_with_default_visible(self) -> None:
+        """Optional params with non-None defaults are visible."""
+        env = Environment("test")
+        @env.scenario()
+        async def fetch(url: str, request_timeout: int = 30, retries: int = 3):
+            yield {"task": f"Fetch {url}"}
+        task = env("fetch")
+        tool = AgentTool(task, model="claude")
+        assert "url" in tool._visible_params
+        assert "request_timeout" in tool._visible_params
+        assert "retries" in tool._visible_params
+class TestAgentToolSchema:
+    """Tests for JSON schema generation."""
+    def test_builds_json_schema(self) -> None:
+        """Builds proper JSON schema from visible params."""
+        env = Environment("test")
+        @env.scenario()
+        async def investigate(issue_id: str, verbose: bool = False):
+            yield {"task": f"Investigate {issue_id}"}
+        task = env("investigate")
+        tool = AgentTool(task, model="claude")
+        schema = tool._param_schema
+        assert schema is not None
+        assert schema["type"] == "object"
+        assert "issue_id" in schema["properties"]
+        assert "verbose" in schema["properties"]
+        assert "issue_id" in schema["required"]
+        assert "verbose" not in schema["required"]  # Has default
+    def test_schema_excludes_eval_only(self) -> None:
+        """Schema excludes eval-only params."""
+        env = Environment("test")
+        @env.scenario()
+        async def check(
+            item_id: str,
+            expected_status: str | None = None,  # Eval only
+        ):
+            yield {"task": f"Check {item_id}"}
+        task = env("check")
+        tool = AgentTool(task, model="claude")
+        schema = tool._param_schema
+        assert schema is not None
+        assert "item_id" in schema["properties"]
+        assert "expected_status" not in schema["properties"]
+class TestAgentToolMCP:
+    """Tests for MCP tool integration."""
+    def test_mcp_property_returns_tool(self) -> None:
+        """The mcp property returns a FastMCP FunctionTool."""
+        from fastmcp.tools import FunctionTool
+        env = Environment("test")
+        @env.scenario()
+        async def greet(name: str):
+            yield {"task": f"Greet {name}"}
+        task = env("greet")
+        tool = AgentTool(task, model="claude")
+        mcp_tool = tool.mcp
+        assert isinstance(mcp_tool, FunctionTool)
+    def test_mcp_has_filtered_parameters(self) -> None:
+        """MCP tool has filtered parameter schema."""
+        env = Environment("test")
+        @env.scenario()
+        async def analyze(
+            data: str,
+            expected_result: str | None = None,  # Eval only
+        ):
+            yield {"task": f"Analyze {data}"}
+        task = env("analyze")
+        tool = AgentTool(task, model="claude")
+        mcp_tool = tool.mcp
+        params = mcp_tool.parameters  # FunctionTool uses 'parameters'
+        assert "data" in params["properties"]
+        assert "expected_result" not in params["properties"]
+class TestAgentToolCall:
+    """Tests for AgentTool.__call__."""
+    @pytest.mark.asyncio
+    async def test_filters_kwargs_to_visible_only(self) -> None:
+        """Call filters kwargs to visible params only."""
+        # Import modules first so patches work
+        import hud.agents
+        import hud.eval.manager  # noqa: F401
+        env = Environment("test")
+        @env.scenario()
+        async def process(item: str, expected: str | None = None):
+            yield {"task": f"Process {item}"}
+        task = env("process")
+        tool = AgentTool(task, model="claude")
+        # Mock the eval context and agent
+        with (
+            patch("hud.eval.manager.run_eval") as mock_run_eval,
+            patch("hud.agents.create_agent") as mock_create_agent,
+        ):
+            mock_ctx = AsyncMock()
+            mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
+            mock_ctx.__aexit__ = AsyncMock(return_value=None)
+            mock_run_eval.return_value = mock_ctx
+            mock_agent = MagicMock()
+            mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
+            mock_create_agent.return_value = mock_agent
+            # Call with both visible and eval-only params
+            await tool(item="test", expected="should_be_filtered")
+            # Check that task was created with filtered args
+            call_args = mock_run_eval.call_args
+            task_arg = call_args[0][0]
+            assert "item" in task_arg.args
+            assert "expected" not in task_arg.args  # Filtered out
+    @pytest.mark.asyncio
+    async def test_merges_template_args(self) -> None:
+        """Call merges kwargs with template args."""
+        # Import modules first so patches work
+        import hud.agents
+        import hud.eval.manager  # noqa: F401
+        env = Environment("test")
+        @env.scenario()
+        async def search(query: str, limit: int = 10):
+            yield {"task": f"Search {query}"}
+        # Create template with some args pre-filled
+        task = env("search", limit=5)
+        tool = AgentTool(task, model="claude")
+        with (
+            patch("hud.eval.manager.run_eval") as mock_run_eval,
+            patch("hud.agents.create_agent") as mock_create_agent,
+        ):
+            mock_ctx = AsyncMock()
+            mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
+            mock_ctx.__aexit__ = AsyncMock(return_value=None)
+            mock_run_eval.return_value = mock_ctx
+            mock_agent = MagicMock()
+            mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
+            mock_create_agent.return_value = mock_agent
+            # Call with additional arg
+            await tool(query="test query")
+            # Check merged args
+            call_args = mock_run_eval.call_args
+            task_arg = call_args[0][0]
+            assert task_arg.args["query"] == "test query"
+            assert task_arg.args["limit"] == 5  # From template

hud/types.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Literal
 import mcp.types as types
 from mcp.types import CallToolRequestParams, CallToolResult
-from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 from hud.settings import settings
 from hud.utils.env import resolve_env_vars as _resolve_env_vars
@@ -31,59 +31,87 @@ class AgentType(str, Enum):
     @property
     def cls(self) -> type:
-        from hud.agents import OpenAIAgent, OperatorAgent
-        from hud.agents.claude import ClaudeAgent
-        from hud.agents.gemini import GeminiAgent
-        from hud.agents.gemini_cua import GeminiCUAAgent
-        from hud.agents.openai_chat import OpenAIChatAgent
+        if self == AgentType.CLAUDE:
+            from hud.agents.claude import ClaudeAgent
-        mapping: dict[AgentType, type] = {
-            AgentType.CLAUDE: ClaudeAgent,
-            AgentType.OPENAI: OpenAIAgent,
-            AgentType.OPERATOR: OperatorAgent,
-            AgentType.GEMINI: GeminiAgent,
-            AgentType.GEMINI_CUA: GeminiCUAAgent,
-            AgentType.OPENAI_COMPATIBLE: OpenAIChatAgent,
-        }
-        if self == AgentType.INTEGRATION_TEST:
+            return ClaudeAgent
+        elif self == AgentType.OPENAI:
+            from hud.agents import OpenAIAgent
+            return OpenAIAgent
+        elif self == AgentType.OPERATOR:
+            from hud.agents import OperatorAgent
+            return OperatorAgent
+        elif self == AgentType.GEMINI:
+            from hud.agents.gemini import GeminiAgent
+            return GeminiAgent
+        elif self == AgentType.GEMINI_CUA:
+            from hud.agents.gemini_cua import GeminiCUAAgent
+            return GeminiCUAAgent
+        elif self == AgentType.OPENAI_COMPATIBLE:
+            from hud.agents.openai_chat import OpenAIChatAgent
+            return OpenAIChatAgent
+        elif self == AgentType.INTEGRATION_TEST:
             from hud.agents.misc.integration_test_agent import IntegrationTestRunner
             return IntegrationTestRunner
-        if self not in mapping:
+        else:
             raise ValueError(f"Unsupported agent type: {self}")
+    @property
+    def config_cls(self) -> type:
+        """Get config class without importing agent (avoids SDK dependency)."""
+        from hud.agents.types import (
+            ClaudeConfig,
+            GeminiConfig,
+            GeminiCUAConfig,
+            OpenAIChatConfig,
+            OpenAIConfig,
+            OperatorConfig,
+        )
+        mapping: dict[AgentType, type] = {
+            AgentType.CLAUDE: ClaudeConfig,
+            AgentType.OPENAI: OpenAIConfig,
+            AgentType.OPERATOR: OperatorConfig,
+            AgentType.GEMINI: GeminiConfig,
+            AgentType.GEMINI_CUA: GeminiCUAConfig,
+            AgentType.OPENAI_COMPATIBLE: OpenAIChatConfig,
+            AgentType.INTEGRATION_TEST: BaseAgentConfig,
+        }
+        if self not in mapping:
+            raise ValueError(f"Unsupported agent type for config: {self}")
         return mapping[self]
 class BaseAgentConfig(BaseModel):
     """Agent configuration for LLM-specific settings.
-    Note: allowed_tools, disallowed_tools, append_setup_output, and initial_screenshot
-    are kept for backwards compatibility with v4 task configs but are no longer applied
-    at the agent level. These should be configured on the Environment/Task instead.
+    Note: allowed_tools, disallowed_tools, response_tool_name, append_setup_output,
+    and initial_screenshot are kept for backwards compatibility with v4 task configs
+    but are no longer applied at the agent level. These should be configured on the
+    Environment/Task instead.
     """
     model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid", populate_by_name=True)
-    # Model identifier - use 'model' (preferred) or 'checkpoint_name' (alias)
-    model: str | None = Field(
-        default=None, validation_alias=AliasChoices("model", "checkpoint_name")
-    )
-    model_name: str = "Agent"  # Human-readable display name
     # LLM-specific setting
     system_prompt: str | None = None
-    # Deprecated: kept for backwards compat with v4 task configs, not applied by agent
+    # Deprecated: kept for backwards compat with v4 task configs
+    # allowed_tools/disallowed_tools are applied at Environment level
+    # append_setup_output is applied by EvalContext -> agent
+    # response_tool_name and initial_screenshot are parsed but NOT implemented
     allowed_tools: list[str] | None = None
     disallowed_tools: list[str] | None = None
-    append_setup_output: bool = True
-    append_setup_tool: bool = True  # Alias for append_setup_output (backwards compat)
-    initial_screenshot: bool = True
-    @property
-    def checkpoint_name(self) -> str | None:
-        """Alias for model (for backwards compatibility)."""
-        return self.model
+    response_tool_name: str | None = None  # Not implemented
+    append_setup_output: bool = False
+    append_setup_tool: bool = False  # Alias for append_setup_output
+    initial_screenshot: bool = False  # Not implemented
 class LegacyTask(BaseModel):

hud/utils/hud_console.py CHANGED Viewed

@@ -21,6 +21,7 @@ import traceback
 from typing import TYPE_CHECKING, Any, Literal, Self
 from rich.console import Console
+from rich.markup import escape
 from rich.panel import Panel
 from rich.table import Table
@@ -95,7 +96,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{GREEN}]✅ {message}[/{GREEN}]")
+        console.print(f"[{GREEN}]✅ {escape(message)}[/{GREEN}]")
     def error(self, message: str, stderr: bool = True) -> None:
         """Print an error message.
@@ -106,10 +107,12 @@ class HUDConsole:
         """
         console = self._stderr_console if stderr else self._stdout_console
         tb = traceback.format_exc()
+        escaped_message = escape(message)
         if "NoneType: None" not in tb:
-            console.print(f"[{RED} not bold]❌ {message}\n{tb}[/{RED} not bold]")
+            escaped_tb = escape(tb)
+            console.print(f"[{RED} not bold]❌ {escaped_message}\n{escaped_tb}[/{RED} not bold]")
         else:
-            console.print(f"[{RED} not bold]❌ {message}[/{RED} not bold]")
+            console.print(f"[{RED} not bold]❌ {escaped_message}[/{RED} not bold]")
     def warning(self, message: str, stderr: bool = True) -> None:
         """Print a warning message.
@@ -119,7 +122,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"⚠️  [{YELLOW} not bold]{message}[/{YELLOW} not bold]")
+        console.print(f"⚠️  [{YELLOW} not bold]{escape(message)}[/{YELLOW} not bold]")
     def info(self, message: str, stderr: bool = True) -> None:
         """Print an info message.
@@ -129,7 +132,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{TEXT} not bold]{message}[/{TEXT} not bold]")
+        console.print(f"[{TEXT} not bold]{escape(message)}[/{TEXT} not bold]")
     def print(self, message: str, stderr: bool = True) -> None:
         """Print a message.
@@ -151,7 +154,7 @@ class HUDConsole:
         """
         console = self._stderr_console if stderr else self._stdout_console
         console.print(
-            f"[{DIM} not bold][default]{label}[/default][/{DIM} not bold] [default]{value}[/default]"  # noqa: E501
+            f"[{DIM} not bold][default]{escape(label)}[/default][/{DIM} not bold] [default]{escape(value)}[/default]"  # noqa: E501
         )
     def link(self, url: str, stderr: bool = True) -> None:
@@ -162,7 +165,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{SECONDARY} underline]{url}[/{SECONDARY} underline]")
+        console.print(f"[{SECONDARY} underline]{escape(url)}[/{SECONDARY} underline]")
     def json_config(self, json_str: str, stderr: bool = True) -> None:
         """Print JSON configuration with neutral theme.
@@ -173,7 +176,7 @@ class HUDConsole:
         """
         # Print JSON with neutral grey text
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{TEXT}]{json_str}[/{TEXT}]")
+        console.print(f"[{TEXT}]{escape(json_str)}[/{TEXT}]")
     def key_value_table(
         self, data: dict[str, str | int | float], show_header: bool = False, stderr: bool = True
@@ -203,7 +206,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{DIM}]{message}[/{DIM}]")
+        console.print(f"[{DIM}]{escape(message)}[/{DIM}]")
     def phase(self, phase_num: int, title: str, stderr: bool = True) -> None:
         """Print a phase header (for debug command).
@@ -236,7 +239,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[rgb(181,137,0)]💡 Hint: {hint}[/rgb(181,137,0)]")
+        console.print(f"[rgb(181,137,0)]💡 Hint: {escape(hint)}[/rgb(181,137,0)]")
     def status_item(
         self,
@@ -265,10 +268,14 @@ class HUDConsole:
         indicator = indicators.get(status, indicators["info"])
         console = self._stderr_console if stderr else self._stdout_console
+        escaped_label = escape(label)
+        escaped_value = escape(value)
         if primary:
-            console.print(f"{indicator} {label}: [bold {SECONDARY}]{value}[/bold {SECONDARY}]")
+            console.print(
+                f"{indicator} {escaped_label}: [bold {SECONDARY}]{escaped_value}[/bold {SECONDARY}]"
+            )
         else:
-            console.print(f"{indicator} {label}: [{TEXT}]{value}[/{TEXT}]")
+            console.print(f"{indicator} {escaped_label}: [{TEXT}]{escaped_value}[/{TEXT}]")
     def command_example(
         self, command: str, description: str | None = None, stderr: bool = True
@@ -546,7 +553,12 @@ class HUDConsole:
             except (TypeError, ValueError):
                 args_str = str(arguments)[:60]
-        return f"[{GOLD}]→[/{GOLD}] [bold {TEXT}]{name}[/bold {TEXT}][{DIM}]({args_str})[/{DIM}]"
+        escaped_name = escape(name)
+        escaped_args = escape(args_str)
+        return (
+            f"[{GOLD}]→[/{GOLD}] [bold {TEXT}]{escaped_name}[/bold {TEXT}]"
+            f"[{DIM}]({escaped_args})[/{DIM}]"
+        )
     def format_tool_result(self, content: str, is_error: bool = False) -> str:
         """Format a tool result in compact HUD style.
@@ -562,11 +574,12 @@ class HUDConsole:
         if len(content) > 80:
             content = content[:77] + "..."
+        escaped_content = escape(content)
         # Format with status using HUD colors
         if is_error:
-            return f"  [{RED}]✗[/{RED}] [{DIM}]{content}[/{DIM}]"
+            return f"  [{RED}]✗[/{RED}] [{DIM}]{escaped_content}[/{DIM}]"
         else:
-            return f"  [{GREEN}]✓[/{GREEN}] [{TEXT}]{content}[/{TEXT}]"
+            return f"  [{GREEN}]✓[/{GREEN}] [{TEXT}]{escaped_content}[/{TEXT}]"
     def confirm(self, message: str, default: bool = True) -> bool:
         """Print a confirmation message.
@@ -590,12 +603,12 @@ class HUDConsole:
             stderr: If True, output to stderr
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{color}]{symbol}[/{color}] {message}")
+        console.print(f"[{color}]{symbol}[/{color}] {escape(message)}")
     def detail(self, message: str, stderr: bool = True) -> None:
         """Print an indented detail line with gold pointer symbol."""
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"  [{GOLD}]{Symbols.ITEM}[/{GOLD}] {message}")
+        console.print(f"  [{GOLD}]{Symbols.ITEM}[/{GOLD}] {escape(message)}")
     def flow(self, message: str, stderr: bool = True) -> None:
         """Print a flow/transition message with wave symbol."""

hud/utils/strict_schema.py CHANGED Viewed

@@ -118,7 +118,7 @@ def _ensure_strict_json_schema(
     if "default" in json_schema:
         json_schema.pop("default")
-    for keyword in ("title", "examples"):
+    for keyword in ("title", "examples", "format"):
         json_schema.pop(keyword, None)
     ref = json_schema.get("$ref")

hud/utils/tests/test_version.py CHANGED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.5.1"
+    assert hud.__version__ == "0.5.13"

hud/version.py CHANGED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.5.1"
+__version__ = "0.5.13"

{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.5.1
+Version: 0.5.13
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -91,7 +91,7 @@ Requires-Dist: pyright==1.1.407; extra == 'dev'
 Requires-Dist: pytest-asyncio; extra == 'dev'
 Requires-Dist: pytest-cov; extra == 'dev'
 Requires-Dist: pytest-mock; extra == 'dev'
-Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
+Requires-Dist: pytest>=8.1.1; extra == 'dev'
 Requires-Dist: ruff>=0.11.8; extra == 'dev'
 Requires-Dist: tornado>=6.5.2; extra == 'dev'
 Description-Content-Type: text/markdown

hud-python 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl

hud-python 0.5.1py3-none-any.whl → 0.5.13py3-none-any.whl