PyPI - hud-python - Versions diffs - 0.5.3__tar.gz → 0.5.5__tar.gz - Mend

hud-python 0.5.3tar.gz → 0.5.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (305) hide show

{hud_python-0.5.3 → hud_python-0.5.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.5.3
+Version: 0.5.5
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.5.3 → hud_python-0.5.5}/hud/agents/claude.py RENAMED Viewed

@@ -76,10 +76,18 @@ class ClaudeAgent(MCPAgent):
         model_client = self.config.model_client
         if model_client is None:
-            api_key = settings.anthropic_api_key
-            if not api_key:
-                raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
-            model_client = AsyncAnthropic(api_key=api_key)
+            # Default to HUD gateway when HUD_API_KEY is available
+            if settings.api_key:
+                from hud.agents.gateway import build_gateway_client
+                model_client = build_gateway_client("anthropic")
+            elif settings.anthropic_api_key:
+                model_client = AsyncAnthropic(api_key=settings.anthropic_api_key)
+            else:
+                raise ValueError(
+                    "No API key found. Set HUD_API_KEY for HUD gateway, "
+                    "or ANTHROPIC_API_KEY for direct Anthropic access."
+                )
         self.anthropic_client = model_client
         self.max_tokens = self.config.max_tokens

{hud_python-0.5.3 → hud_python-0.5.5}/hud/agents/gemini.py RENAMED Viewed

@@ -61,10 +61,18 @@ class GeminiAgent(MCPAgent):
         model_client = self.config.model_client
         if model_client is None:
-            api_key = settings.gemini_api_key
-            if not api_key:
-                raise ValueError("Gemini API key not found. Set GEMINI_API_KEY.")
-            model_client = genai.Client(api_key=api_key)
+            # Default to HUD gateway when HUD_API_KEY is available
+            if settings.api_key:
+                from hud.agents.gateway import build_gateway_client
+                model_client = build_gateway_client("gemini")
+            elif settings.gemini_api_key:
+                model_client = genai.Client(api_key=settings.gemini_api_key)
+            else:
+                raise ValueError(
+                    "No API key found. Set HUD_API_KEY for HUD gateway, "
+                    "or GEMINI_API_KEY for direct Gemini access."
+                )
         if self.config.validate_api_key:
             try:

{hud_python-0.5.3 → hud_python-0.5.5}/hud/agents/openai.py RENAMED Viewed

@@ -79,10 +79,18 @@ class OpenAIAgent(MCPAgent):
         model_client = self.config.model_client
         if model_client is None:
-            api_key = settings.openai_api_key
-            if not api_key:
-                raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY.")
-            model_client = AsyncOpenAI(api_key=api_key)
+            # Default to HUD gateway when HUD_API_KEY is available
+            if settings.api_key:
+                from hud.agents.gateway import build_gateway_client
+                model_client = build_gateway_client("openai")
+            elif settings.openai_api_key:
+                model_client = AsyncOpenAI(api_key=settings.openai_api_key)
+            else:
+                raise ValueError(
+                    "No API key found. Set HUD_API_KEY for HUD gateway, "
+                    "or OPENAI_API_KEY for direct OpenAI access."
+                )
         if self.config.validate_api_key:
             try:

{hud_python-0.5.3 → hud_python-0.5.5}/hud/agents/tests/test_openai.py RENAMED Viewed

@@ -128,8 +128,9 @@ class TestOpenAIAgent:
     async def test_init_without_client_no_api_key(self) -> None:
         """Test agent initialization fails without API key."""
         with patch("hud.agents.openai.settings") as mock_settings:
+            mock_settings.api_key = None
             mock_settings.openai_api_key = None
-            with pytest.raises(ValueError, match="OpenAI API key not found"):
+            with pytest.raises(ValueError, match="No API key found"):
                 OpenAIAgent.create()
     @pytest.mark.asyncio

{hud_python-0.5.3 → hud_python-0.5.5}/hud/environment/environment.py RENAMED Viewed

@@ -129,6 +129,7 @@ class Environment(
         super().__init__(name=name, instructions=instructions, **fastmcp_kwargs)
         self._connections: dict[str, Connector] = {}
         self._router = ToolRouter(conflict_resolution=conflict_resolution)
+        self._routing_built = False  # Track if _build_routing has been called
         self._in_context = False
         # Tool call queues - run after connections established
@@ -224,6 +225,9 @@ class Environment(
         Automatically filters to only connections where the tool exists
         (based on cached_tools from initial discovery).
+        For internal tools (starting with _), tries ALL connections since
+        internal tools are hidden from list_tools() and won't be in cached_tools.
         Args:
             tool_name: Name of the tool to call
             **kwargs: Arguments to pass to the tool
@@ -233,10 +237,13 @@ class Environment(
         """
         import asyncio
-        # Only call connections that have this tool
-        targets = self._connections_with_tool(tool_name)
-        if not targets:
-            return {}
+        # For internal tools (underscore prefix), try ALL connections since
+        # they're hidden from list_tools() and won't appear in cached_tools.
+        # For regular tools, only try connections that advertise the tool.
+        if tool_name.startswith("_"):
+            targets = set(self._connections.keys())
+        else:
+            targets = self._connections_with_tool(tool_name)
         results: dict[str, Any] = {}
@@ -245,7 +252,8 @@ class Environment(
             if not connector or not connector.client:
                 return
             try:
-                results[name] = await connector.client.call_tool(tool_name, **kwargs)
+                # Use connector.call_tool which expects arguments as a dict
+                results[name] = await connector.call_tool(tool_name, kwargs)
                 logger.debug("Broadcast '%s' to '%s' succeeded", tool_name, name)
             except Exception as e:
                 results[name] = e
@@ -361,6 +369,7 @@ class Environment(
         if self._connections:
             await asyncio.gather(*[c.disconnect() for c in self._connections.values()])
         self._router.clear()
+        self._routing_built = False
     async def run_async(
         self,
@@ -389,6 +398,7 @@ class Environment(
             connections=self._connections,
             connection_order=list(self._connections.keys()),
         )
+        self._routing_built = True
         # Populate mock schemas for auto-generated mock values
         self._populate_mock_schemas()
@@ -406,6 +416,8 @@ class Environment(
     async def _env_list_tools(self) -> list[mcp_types.Tool]:
         """Return all tools including those from connectors."""
+        if not self._routing_built:
+            await self._build_routing()
         return self._router.tools
     async def _env_call_tool(self, name: str, arguments: dict[str, Any] | None = None) -> list[Any]:

{hud_python-0.5.3 → hud_python-0.5.5}/hud/environment/scenarios.py RENAMED Viewed

@@ -70,35 +70,17 @@ class ScenarioMixin:
     async def submit(self, scenario: str, answer: str) -> None:
         """Submit the agent's answer for a scenario's evaluate phase.
-        This stores the answer locally and broadcasts to connected hubs
-        that have the _hud_submit tool (auto-detected by Environment).
+        Stores locally and broadcasts to connected hubs with _hud_submit tool.
         Args:
             scenario: Name of the scenario (without env prefix)
             answer: The agent's answer/result to submit
-        Example:
-            # Direct call with scenario name
-            await env.submit("checkout", "Order completed successfully")
-            # Or via EvalContext (knows its own scenario)
-            await ctx.submit("Order completed successfully")
         """
-        # Store locally for our scenarios
         self._scenario_answers[scenario] = answer
-        logger.debug(
-            "Stored answer for scenario '%s': %s...",
-            scenario,
-            answer[:50] if len(answer) > 50 else answer,
-        )
-        # Broadcast to connections that have _hud_submit
-        # Environment._broadcast_tool auto-filters to connections with the tool
-        await self._broadcast_tool(  # type: ignore[attr-defined]
-            "_hud_submit",
-            scenario=scenario,
-            answer=answer,
-        )
+        logger.debug("Stored answer for scenario '%s'", scenario)
+        # Broadcast to all connections (internal tools try all connections)
+        await self._broadcast_tool("_hud_submit", scenario=scenario, answer=answer)  # type: ignore[attr-defined]
     def _register_hud_submit_tool(self) -> None:
         """Register the _hud_submit tool for receiving agent answers.
@@ -178,13 +160,9 @@ class ScenarioMixin:
                 prompt_id = f"{safe_env_name}:{scenario_name}"
                 logger.debug("Remote scenario (adding namespace): prompt_id=%s", prompt_id)
             # Serialize args for MCP prompt (only supports string values)
-            # JSON-encode any non-string values so they can be deserialized on the other side
             serialized_args: dict[str, str] = {}
             for key, value in args.items():
-                if isinstance(value, str):
-                    serialized_args[key] = value
-                else:
-                    serialized_args[key] = json.dumps(value)
+                serialized_args[key] = value if isinstance(value, str) else json.dumps(value)
             try:
                 result = await self.get_prompt(prompt_id, serialized_args)  # type: ignore[attr-defined]
@@ -193,14 +171,26 @@ class ScenarioMixin:
                 try:
                     prompts = await self.list_prompts()  # type: ignore[attr-defined]
                     scenario_prompts = [p.name for p in prompts if ":" in p.name]
-                    available = (
-                        "\n    ".join(scenario_prompts) if scenario_prompts else "(none found)"
-                    )
+                    available = "\n    ".join(scenario_prompts) if scenario_prompts else "(none)"
                 except Exception:
-                    available = "(could not fetch available scenarios)"
+                    available = "(could not fetch)"
+                    scenario_prompts = []
+                original_error = str(e)
+                if prompt_id in scenario_prompts:
+                    raise ValueError(
+                        f"⚠️ ERROR: Scenario '{prompt_id}' exists but failed to execute.\n\n"
+                        f"The scenario was found but encountered an error during setup:\n"
+                        f"  {original_error}\n\n"
+                        f"This could be caused by:\n"
+                        f"  - Missing or invalid scenario arguments\n"
+                        f"  - An error in the scenario's setup function\n"
+                        f"  - Connection or serialization issues\n\n"
+                        f"Check the scenario definition and required arguments."
+                    ) from e
                 raise ValueError(
-                    f"Scenario not found.\n\n"
+                    f"⚠️ ERROR: Scenario not found.\n\n"
                     f"Scenario IDs have the format 'environment_name:scenario_name'.\n"
                     f"If you only specify 'scenario_name', the SDK uses your task's env name "
                     f"as the prefix.\n"
@@ -212,7 +202,7 @@ class ScenarioMixin:
                     f"Fix: Use one of the scenario IDs above in your task JSON."
                 ) from e
-            # Validate the response (outside try/except so errors aren't wrapped)
+            # Validate the response
             if result.messages:
                 first_msg = result.messages[0]
                 content = first_msg.content
@@ -275,23 +265,24 @@ class ScenarioMixin:
                         del self._scenario_latest[scenario_name]
         # Remote scenario - read via MCP resource
-        # If scenario_name already contains ":", it's already namespaced - use directly
         if ":" in scenario_name:
             resource_id = scenario_name
         else:
             env_name = getattr(self, "_source_env_name", None) or self.name
             safe_env_name = env_name.replace("_", "-")
             resource_id = f"{safe_env_name}:{scenario_name}"
         try:
             contents = await self.read_resource(resource_id)  # type: ignore[attr-defined]
             if contents:
-                first_content = contents[0]
-                if hasattr(first_content, "text") and isinstance(first_content.text, str):  # type: ignore[union-attr]
-                    data = json.loads(first_content.text)  # type: ignore[union-attr]
+                first = contents[0]
+                if hasattr(first, "text") and isinstance(first.text, str):  # type: ignore[union-attr]
+                    data = json.loads(first.text)  # type: ignore[union-attr]
                     if "reward" in data:
                         return float(data["reward"])
         except Exception as e:
             logger.warning("Failed to get scenario reward: %s", e)
         return None
     def scenario(
@@ -362,7 +353,7 @@ class ScenarioMixin:
                     # Only include JSON-serializable defaults
                     default_val = p.default
                     if default_val is None or isinstance(
-                        default_val, (str, int, float, bool, list, dict)
+                        default_val, (str | int | float | bool | list | dict)
                     ):
                         arg_info["default"] = default_val
@@ -412,27 +403,50 @@ class ScenarioMixin:
                 from pydantic import TypeAdapter
                 # Deserialize JSON-encoded arguments using Pydantic TypeAdapter
-                # This properly handles: Pydantic models, enums, datetime, lists, dicts
+                # MCP prompts only support string arguments, so complex types are
+                # JSON-serialized on the sending side and deserialized here
                 deserialized_args: dict[str, Any] = {}
                 for arg_name, arg_value in handler_args.items():
                     annotation = param_annotations.get(arg_name)
-                    if (
-                        annotation is not None
-                        and annotation is not str
-                        and isinstance(arg_value, str)
-                    ):
-                        # Try TypeAdapter.validate_json for proper type coercion
+                    # Only attempt deserialization on string values
+                    if not isinstance(arg_value, str):
+                        deserialized_args[arg_name] = arg_value
+                        continue
+                    # If annotation is explicitly str, keep as string
+                    if annotation is str:
+                        deserialized_args[arg_name] = arg_value
+                        continue
+                    # If we have a non-str type annotation, use TypeAdapter
+                    if annotation is not None:
                         try:
                             adapter = TypeAdapter(annotation)
                             deserialized_args[arg_name] = adapter.validate_json(arg_value)
-                        except Exception:
-                            # Fall back to plain json.loads if TypeAdapter fails
-                            try:
-                                deserialized_args[arg_name] = json.loads(arg_value)
-                            except json.JSONDecodeError:
-                                deserialized_args[arg_name] = arg_value
-                    else:
-                        deserialized_args[arg_name] = arg_value
+                            continue
+                        except Exception:  # noqa: S110
+                            pass  # Fall through to generic JSON decode
+                    # Try JSON decode for strings that look like JSON
+                    stripped = arg_value.strip()
+                    if (stripped and stripped[0] in "[{") or stripped in ("true", "false", "null"):
+                        try:
+                            deserialized_args[arg_name] = json.loads(arg_value)
+                            continue
+                        except json.JSONDecodeError:
+                            pass
+                    # Try to decode if it looks like a number
+                    if stripped.lstrip("-").replace(".", "", 1).isdigit():
+                        try:
+                            deserialized_args[arg_name] = json.loads(arg_value)
+                            continue
+                        except json.JSONDecodeError:
+                            pass
+                    # Keep as string
+                    deserialized_args[arg_name] = arg_value
                 # Create generator instance with deserialized args
                 gen = scenario_fn(**deserialized_args)

{hud_python-0.5.3 → hud_python-0.5.5}/hud/utils/hud_console.py RENAMED Viewed

@@ -21,6 +21,7 @@ import traceback
 from typing import TYPE_CHECKING, Any, Literal, Self
 from rich.console import Console
+from rich.markup import escape
 from rich.panel import Panel
 from rich.table import Table
@@ -95,7 +96,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{GREEN}]✅ {message}[/{GREEN}]")
+        console.print(f"[{GREEN}]✅ {escape(message)}[/{GREEN}]")
     def error(self, message: str, stderr: bool = True) -> None:
         """Print an error message.
@@ -106,10 +107,12 @@ class HUDConsole:
         """
         console = self._stderr_console if stderr else self._stdout_console
         tb = traceback.format_exc()
+        escaped_message = escape(message)
         if "NoneType: None" not in tb:
-            console.print(f"[{RED} not bold]❌ {message}\n{tb}[/{RED} not bold]")
+            escaped_tb = escape(tb)
+            console.print(f"[{RED} not bold]❌ {escaped_message}\n{escaped_tb}[/{RED} not bold]")
         else:
-            console.print(f"[{RED} not bold]❌ {message}[/{RED} not bold]")
+            console.print(f"[{RED} not bold]❌ {escaped_message}[/{RED} not bold]")
     def warning(self, message: str, stderr: bool = True) -> None:
         """Print a warning message.
@@ -119,7 +122,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"⚠️  [{YELLOW} not bold]{message}[/{YELLOW} not bold]")
+        console.print(f"⚠️  [{YELLOW} not bold]{escape(message)}[/{YELLOW} not bold]")
     def info(self, message: str, stderr: bool = True) -> None:
         """Print an info message.
@@ -129,7 +132,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{TEXT} not bold]{message}[/{TEXT} not bold]")
+        console.print(f"[{TEXT} not bold]{escape(message)}[/{TEXT} not bold]")
     def print(self, message: str, stderr: bool = True) -> None:
         """Print a message.
@@ -151,7 +154,7 @@ class HUDConsole:
         """
         console = self._stderr_console if stderr else self._stdout_console
         console.print(
-            f"[{DIM} not bold][default]{label}[/default][/{DIM} not bold] [default]{value}[/default]"  # noqa: E501
+            f"[{DIM} not bold][default]{escape(label)}[/default][/{DIM} not bold] [default]{escape(value)}[/default]"  # noqa: E501
         )
     def link(self, url: str, stderr: bool = True) -> None:
@@ -162,7 +165,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{SECONDARY} underline]{url}[/{SECONDARY} underline]")
+        console.print(f"[{SECONDARY} underline]{escape(url)}[/{SECONDARY} underline]")
     def json_config(self, json_str: str, stderr: bool = True) -> None:
         """Print JSON configuration with neutral theme.
@@ -173,7 +176,7 @@ class HUDConsole:
         """
         # Print JSON with neutral grey text
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{TEXT}]{json_str}[/{TEXT}]")
+        console.print(f"[{TEXT}]{escape(json_str)}[/{TEXT}]")
     def key_value_table(
         self, data: dict[str, str | int | float], show_header: bool = False, stderr: bool = True
@@ -203,7 +206,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{DIM}]{message}[/{DIM}]")
+        console.print(f"[{DIM}]{escape(message)}[/{DIM}]")
     def phase(self, phase_num: int, title: str, stderr: bool = True) -> None:
         """Print a phase header (for debug command).
@@ -236,7 +239,7 @@ class HUDConsole:
             stderr: If True, output to stderr (default), otherwise stdout
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[rgb(181,137,0)]💡 Hint: {hint}[/rgb(181,137,0)]")
+        console.print(f"[rgb(181,137,0)]💡 Hint: {escape(hint)}[/rgb(181,137,0)]")
     def status_item(
         self,
@@ -265,10 +268,14 @@ class HUDConsole:
         indicator = indicators.get(status, indicators["info"])
         console = self._stderr_console if stderr else self._stdout_console
+        escaped_label = escape(label)
+        escaped_value = escape(value)
         if primary:
-            console.print(f"{indicator} {label}: [bold {SECONDARY}]{value}[/bold {SECONDARY}]")
+            console.print(
+                f"{indicator} {escaped_label}: [bold {SECONDARY}]{escaped_value}[/bold {SECONDARY}]"
+            )
         else:
-            console.print(f"{indicator} {label}: [{TEXT}]{value}[/{TEXT}]")
+            console.print(f"{indicator} {escaped_label}: [{TEXT}]{escaped_value}[/{TEXT}]")
     def command_example(
         self, command: str, description: str | None = None, stderr: bool = True
@@ -546,7 +553,12 @@ class HUDConsole:
             except (TypeError, ValueError):
                 args_str = str(arguments)[:60]
-        return f"[{GOLD}]→[/{GOLD}] [bold {TEXT}]{name}[/bold {TEXT}][{DIM}]({args_str})[/{DIM}]"
+        escaped_name = escape(name)
+        escaped_args = escape(args_str)
+        return (
+            f"[{GOLD}]→[/{GOLD}] [bold {TEXT}]{escaped_name}[/bold {TEXT}]"
+            f"[{DIM}]({escaped_args})[/{DIM}]"
+        )
     def format_tool_result(self, content: str, is_error: bool = False) -> str:
         """Format a tool result in compact HUD style.
@@ -562,11 +574,12 @@ class HUDConsole:
         if len(content) > 80:
             content = content[:77] + "..."
+        escaped_content = escape(content)
         # Format with status using HUD colors
         if is_error:
-            return f"  [{RED}]✗[/{RED}] [{DIM}]{content}[/{DIM}]"
+            return f"  [{RED}]✗[/{RED}] [{DIM}]{escaped_content}[/{DIM}]"
         else:
-            return f"  [{GREEN}]✓[/{GREEN}] [{TEXT}]{content}[/{TEXT}]"
+            return f"  [{GREEN}]✓[/{GREEN}] [{TEXT}]{escaped_content}[/{TEXT}]"
     def confirm(self, message: str, default: bool = True) -> bool:
         """Print a confirmation message.
@@ -590,12 +603,12 @@ class HUDConsole:
             stderr: If True, output to stderr
         """
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"[{color}]{symbol}[/{color}] {message}")
+        console.print(f"[{color}]{symbol}[/{color}] {escape(message)}")
     def detail(self, message: str, stderr: bool = True) -> None:
         """Print an indented detail line with gold pointer symbol."""
         console = self._stderr_console if stderr else self._stdout_console
-        console.print(f"  [{GOLD}]{Symbols.ITEM}[/{GOLD}] {message}")
+        console.print(f"  [{GOLD}]{Symbols.ITEM}[/{GOLD}] {escape(message)}")
     def flow(self, message: str, stderr: bool = True) -> None:
         """Print a flow/transition message with wave symbol."""

{hud_python-0.5.3 → hud_python-0.5.5}/hud/utils/tests/test_version.py RENAMED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.5.3"
+    assert hud.__version__ == "0.5.5"

{hud_python-0.5.3 → hud_python-0.5.5}/hud/version.py RENAMED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.5.3"
+__version__ = "0.5.5"

{hud_python-0.5.3 → hud_python-0.5.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hud-python"
-version = "0.5.3"
+version = "0.5.5"
 description = "SDK for the HUD platform."
 readme = "README.md"
 requires-python = ">=3.11, <3.13"