PyPI - hud-python - Versions diffs - 0.4.21__py3-none-any.whl → 0.4.23__py3-none-any.whl - Mend

hud-python 0.4.21py3-none-any.whl → 0.4.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (63) hide show

hud/agents/base.py +37 -37
hud/agents/claude.py +11 -6
hud/agents/grounded_openai.py +282 -0
hud/agents/misc/response_agent.py +3 -2
hud/agents/openai.py +2 -2
hud/agents/openai_chat_generic.py +3 -1
hud/agents/tests/test_client.py +6 -1
hud/agents/tests/test_grounded_openai_agent.py +155 -0
hud/cli/__init__.py +34 -24
hud/cli/analyze.py +27 -26
hud/cli/build.py +50 -46
hud/cli/debug.py +7 -7
hud/cli/dev.py +107 -99
hud/cli/eval.py +33 -31
hud/cli/hf.py +53 -53
hud/cli/init.py +28 -28
hud/cli/list_func.py +22 -22
hud/cli/pull.py +36 -36
hud/cli/push.py +76 -74
hud/cli/remove.py +42 -40
hud/cli/rl/__init__.py +2 -2
hud/cli/rl/init.py +41 -41
hud/cli/rl/pod.py +97 -91
hud/cli/rl/ssh.py +42 -40
hud/cli/rl/train.py +75 -73
hud/cli/rl/utils.py +10 -10
hud/cli/tests/test_analyze.py +1 -1
hud/cli/tests/test_analyze_metadata.py +2 -2
hud/cli/tests/test_pull.py +45 -45
hud/cli/tests/test_push.py +31 -29
hud/cli/tests/test_registry.py +15 -15
hud/cli/utils/environment.py +11 -11
hud/cli/utils/interactive.py +18 -18
hud/cli/utils/logging.py +12 -12
hud/cli/utils/metadata.py +12 -12
hud/cli/utils/registry.py +5 -5
hud/cli/utils/runner.py +23 -23
hud/cli/utils/server.py +16 -16
hud/settings.py +6 -0
hud/shared/hints.py +7 -7
hud/tools/executors/tests/test_base_executor.py +1 -1
hud/tools/executors/xdo.py +1 -1
hud/tools/grounding/__init__.py +13 -0
hud/tools/grounding/config.py +54 -0
hud/tools/grounding/grounded_tool.py +314 -0
hud/tools/grounding/grounder.py +302 -0
hud/tools/grounding/tests/__init__.py +1 -0
hud/tools/grounding/tests/test_grounded_tool.py +196 -0
hud/tools/tests/test_playwright_tool.py +1 -1
hud/tools/tests/test_tools_init.py +1 -1
hud/tools/tests/test_utils.py +2 -2
hud/types.py +4 -4
hud/utils/__init__.py +3 -3
hud/utils/agent_factories.py +86 -0
hud/utils/{design.py → hud_console.py} +39 -33
hud/utils/pretty_errors.py +6 -6
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/METADATA +3 -1
{hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/RECORD +63 -54
{hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/WHEEL +0 -0
{hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/licenses/LICENSE +0 -0

hud/agents/tests/test_grounded_openai_agent.py ADDED Viewed

@@ -0,0 +1,155 @@
+from __future__ import annotations
+import json
+from typing import Any
+import mcp.types as types
+import pytest
+from hud.agents.grounded_openai import GroundedOpenAIChatAgent
+from hud.tools.grounding import GrounderConfig
+from hud.types import MCPToolCall, MCPToolResult
+class DummyOpenAI:
+    class chat:  # type: ignore[no-redef]
+        class completions:
+            @staticmethod
+            async def create(**kwargs: Any) -> Any:
+                # Return a minimal object mimicking OpenAI response
+                class Msg:
+                    def __init__(self) -> None:
+                        self.content = "Thinking..."
+                        self.tool_calls = [
+                            type(
+                                "ToolCall",
+                                (),
+                                {
+                                    "id": "call_1",
+                                    "function": type(
+                                        "Fn",
+                                        (),
+                                        {
+                                            "name": "computer",
+                                            "arguments": json.dumps(
+                                                {
+                                                    "action": "click",
+                                                    "element_description": "blue button",
+                                                }
+                                            ),
+                                        },
+                                    ),
+                                },
+                            )()
+                        ]
+                class Choice:
+                    def __init__(self) -> None:
+                        self.message = Msg()
+                        self.finish_reason = "tool_calls"
+                class Resp:
+                    def __init__(self) -> None:
+                        self.choices = [Choice()]
+                return Resp()
+class FakeMCPClient:
+    def __init__(self) -> None:
+        self.tools: list[types.Tool] = [
+            types.Tool(name="computer", description="", inputSchema={}),
+            types.Tool(name="setup", description="internal functions", inputSchema={}),
+        ]
+        self.called: list[MCPToolCall] = []
+    async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
+        return None
+    async def list_tools(self) -> list[types.Tool]:
+        return self.tools
+    async def call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
+        self.called.append(tool_call)
+        return MCPToolResult(content=[types.TextContent(text="ok", type="text")], isError=False)
+    @property
+    def mcp_config(self) -> dict[str, dict[str, Any]]:
+        return {"local": {"command": "echo", "args": ["ok"]}}
+    async def shutdown(self) -> None:
+        return None
+    async def list_resources(self) -> list[types.Resource]:  # not used here
+        return []
+    async def read_resource(self, uri: str) -> types.ReadResourceResult | None:
+        return None
+class DummyGrounder:
+    async def predict_click(self, *, image_b64: str, instruction: str, max_retries: int = 3):
+        return (7, 9)
+class DummyGroundedTool:
+    def __init__(self) -> None:
+        self.last_args: dict[str, Any] | None = None
+    async def __call__(self, **kwargs: Any):
+        self.last_args = kwargs
+        return [types.TextContent(text="ok", type="text")]
+    def get_openai_tool_schema(self) -> dict:
+        return {
+            "type": "function",
+            "function": {"name": "computer", "parameters": {"type": "object"}},
+        }
+@pytest.mark.asyncio
+async def test_call_tools_injects_screenshot_and_delegates(monkeypatch: pytest.MonkeyPatch) -> None:
+    # Agent with fake OpenAI client and fake MCP client
+    grounder_cfg = GrounderConfig(api_base="http://example", model="qwen")
+    agent = GroundedOpenAIChatAgent(
+        grounder_config=grounder_cfg,
+        openai_client=DummyOpenAI(),
+        model_name="gpt-4o-mini",
+        mcp_client=FakeMCPClient(),
+        initial_screenshot=False,
+    )
+    # Inject a dummy grounded tool to observe args without full initialization
+    dummy_tool = DummyGroundedTool()
+    agent.grounded_tool = dummy_tool  # type: ignore
+    # Seed conversation history with a user image
+    png_b64 = (
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGMAAQAABQAB"
+        "J2n0mQAAAABJRU5ErkJggg=="
+    )
+    agent.conversation_history = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{png_b64}"}},
+            ],
+        }
+    ]
+    # Build a tool call as GroundedOpenAIChatAgent.get_response would produce
+    tool_call = MCPToolCall(
+        name="computer", arguments={"action": "click", "element_description": "blue button"}
+    )
+    results = await agent.call_tools(tool_call)
+    # One result returned
+    assert len(results) == 1 and not results[0].isError
+    # Grounded tool received screenshot_b64 injected
+    assert dummy_tool.last_args is not None
+    assert dummy_tool.last_args["action"] == "click"
+    assert dummy_tool.last_args["element_description"] == "blue button"
+    assert "screenshot_b64" in dummy_tool.last_args
+    assert isinstance(dummy_tool.last_args["screenshot_b64"], str)

hud/cli/__init__.py CHANGED Viewed

@@ -184,7 +184,7 @@ def debug(
         hud debug . --max-phase 3               # Stop after phase 3
     """
     # Import here to avoid circular imports
-    from hud.utils.design import HUDDesign
+    from hud.utils.hud_console import HUDConsole
     from .utils.environment import (
         build_environment,
@@ -193,7 +193,7 @@ def debug(
         is_environment_directory,
     )
-    design = HUDDesign()
+    hud_console = HUDConsole()
     # Determine the command to run
     command = None
@@ -227,7 +227,7 @@ def debug(
             image_name, source = get_image_name(directory)
             if source == "auto":
-                design.info(f"Auto-generated image name: {image_name}")
+                hud_console.info(f"Auto-generated image name: {image_name}")
             # Build if requested or if image doesn't exist
             if build or not image_exists(image_name):
@@ -263,20 +263,20 @@ def debug(
     phases_completed = asyncio.run(debug_mcp_stdio(command, logger, max_phase=max_phase))
     # Show summary using design system
-    from hud.utils.design import HUDDesign
+    from hud.utils.hud_console import HUDConsole
-    design = HUDDesign()
+    hud_console = HUDConsole()
-    design.info("")  # Empty line
-    design.section_title("Debug Summary")
+    hud_console.info("")  # Empty line
+    hud_console.section_title("Debug Summary")
     if phases_completed == max_phase:
-        design.success(f"All {max_phase} phases completed successfully!")
+        hud_console.success(f"All {max_phase} phases completed successfully!")
         if max_phase == 5:
-            design.info("Your MCP server is fully functional and ready for production use.")
+            hud_console.info("Your MCP server is fully functional and ready for production use.")
     else:
-        design.warning(f"Completed {phases_completed} out of {max_phase} phases")
-        design.info("Check the errors above for troubleshooting.")
+        hud_console.warning(f"Completed {phases_completed} out of {max_phase} phases")
+        hud_console.info("Check the errors above for troubleshooting.")
     # Exit with appropriate code
     if phases_completed < max_phase:
@@ -831,9 +831,9 @@ def eval(
     ),
 ) -> None:
     """🚀 Run evaluation on datasets or individual tasks with agents."""
-    from hud.utils.design import HUDDesign
+    from hud.utils.hud_console import HUDConsole
-    design = HUDDesign()
+    hud_console = HUDConsole()
     # If no source provided, look for task/eval JSON files in current directory
     if source is None:
@@ -863,30 +863,30 @@ def eval(
         json_files = sorted(set(json_files))
         if not json_files:
-            design.error(
+            hud_console.error(
                 "No source provided and no task/eval JSON files found in current directory"
             )
-            design.info(
+            hud_console.info(
                 "Usage: hud eval <source> or create a task JSON file "
                 "(e.g., task.json, eval_config.json)"
             )
             raise typer.Exit(1)
         elif len(json_files) == 1:
             source = str(json_files[0])
-            design.info(f"Found task file: {source}")
+            hud_console.info(f"Found task file: {source}")
         else:
             # Multiple files found, let user choose
-            design.info("Multiple task files found:")
-            file_choice = design.select(
+            hud_console.info("Multiple task files found:")
+            file_choice = hud_console.select(
                 "Select a task file to run:",
                 choices=[str(f) for f in json_files],
             )
             source = file_choice
-            design.success(f"Selected: {source}")
+            hud_console.success(f"Selected: {source}")
     # If no agent specified, prompt for selection
     if agent is None:
-        agent = design.select(
+        agent = hud_console.select(
             "Select an agent to use:",
             choices=[
                 {"name": "Claude 4 Sonnet", "value": "claude"},
@@ -898,14 +898,14 @@ def eval(
     # Validate agent choice
     valid_agents = ["claude", "openai"]
     if agent not in valid_agents:
-        design.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
+        hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
         raise typer.Exit(1)
     # Import eval_command lazily to avoid importing agent dependencies
     try:
         from .eval import eval_command
     except ImportError as e:
-        design.error(
+        hud_console.error(
             "Evaluation dependencies are not installed. "
             "Please install with: pip install 'hud-python[agent]'"
         )
@@ -962,6 +962,16 @@ def hf(
 def main() -> None:
     """Main entry point for the CLI."""
+    # Handle --version flag before Typer parses args
+    if "--version" in sys.argv:
+        try:
+            from hud import __version__
+            console.print(f"HUD CLI version: [cyan]{__version__}[/cyan]")
+        except ImportError:
+            console.print("HUD CLI version: [cyan]unknown[/cyan]")
+        return
     try:
         # Show header for main help
         if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"]):
@@ -995,9 +1005,9 @@ def main() -> None:
         except Exception:
             exit_code = 1
         if exit_code != 0:
-            from hud.utils.design import design
+            from hud.utils.hud_console import hud_console
-            design.info(SUPPORT_HINT)
+            hud_console.info(SUPPORT_HINT)
         raise
     except Exception:
         raise

hud/cli/analyze.py CHANGED Viewed

@@ -13,10 +13,10 @@ from rich.table import Table
 from rich.tree import Tree
 from hud.clients import MCPClient
-from hud.utils.design import HUDDesign
+from hud.utils.hud_console import HUDConsole
 console = Console()
-design = HUDDesign()
+hud_console = HUDConsole()
 def parse_docker_command(docker_cmd: list[str]) -> dict:
@@ -28,14 +28,14 @@ def parse_docker_command(docker_cmd: list[str]) -> dict:
 async def analyze_environment(docker_cmd: list[str], output_format: str, verbose: bool) -> None:
     """Analyze MCP environment and display results."""
-    design.header("MCP Environment Analysis", icon="🔍")
+    hud_console.header("MCP Environment Analysis", icon="🔍")
     # Convert Docker command to MCP config
     mcp_config = parse_docker_command(docker_cmd)
     # Display command being analyzed
-    design.dim_info("Command:", " ".join(docker_cmd))
-    design.info("")  # Empty line
+    hud_console.dim_info("Command:", " ".join(docker_cmd))
+    hud_console.info("")  # Empty line
     # Create client
     with Progress(
@@ -85,9 +85,9 @@ async def analyze_environment(docker_cmd: list[str], output_format: str, verbose
 def display_interactive(analysis: dict) -> None:
     """Display analysis results in interactive format."""
     # Server metadata
-    design.section_title("📊 Environment Overview")
+    hud_console.section_title("📊 Environment Overview")
     meta_table = Table(show_header=False, box=None)
-    meta_table.add_column("Property", style="dim")
+    meta_table.add_column("Property", style="bright_black")
     meta_table.add_column("Value")
     # Check if this is a live analysis (has metadata) or metadata-only analysis
@@ -126,19 +126,19 @@ def display_interactive(analysis: dict) -> None:
     console.print(meta_table)
     # Tools
-    design.section_title("🔧 Available Tools")
-    tools_tree = Tree("Tools")
+    hud_console.section_title("🔧 Available Tools")
+    tools_tree = Tree("[bold bright_white]Tools[/bold bright_white]")
     # Check if we have hub_tools info (live analysis) or not (metadata-only)
     if "hub_tools" in analysis:
         # Live analysis format - separate regular and hub tools
         # Regular tools
-        regular_tools = tools_tree.add("Regular Tools")
+        regular_tools = tools_tree.add("[bright_white]Regular Tools[/bright_white]")
         for tool in analysis["tools"]:
             if tool["name"] not in analysis["hub_tools"]:
-                tool_node = regular_tools.add(f"[default]{tool['name']}[/default]")
+                tool_node = regular_tools.add(f"[bright_white]{tool['name']}[/bright_white]")
                 if tool["description"]:
-                    tool_node.add(f"[dim]{tool['description']}[/dim]")
+                    tool_node.add(f"[bright_black]{tool['description']}[/bright_black]")
                 # Show input schema if verbose
                 if analysis.get("verbose") and tool.get("input_schema"):
@@ -148,17 +148,17 @@ def display_interactive(analysis: dict) -> None:
         # Hub tools
         if analysis["hub_tools"]:
-            hub_tools = tools_tree.add("Hub Tools")
+            hub_tools = tools_tree.add("[bright_white]Hub Tools[/bright_white]")
             for hub_name, functions in analysis["hub_tools"].items():
-                hub_node = hub_tools.add(f"[yellow]{hub_name}[/yellow]")
+                hub_node = hub_tools.add(f"[rgb(181,137,0)]{hub_name}[/rgb(181,137,0)]")
                 for func in functions:
-                    hub_node.add(f"[default]{func}[/default]")
+                    hub_node.add(f"[bright_white]{func}[/bright_white]")
     else:
         # Metadata-only format - just list all tools
         for tool in analysis["tools"]:
-            tool_node = tools_tree.add(f"[default]{tool['name']}[/default]")
+            tool_node = tools_tree.add(f"[bright_white]{tool['name']}[/bright_white]")
             if tool.get("description"):
-                tool_node.add(f"[dim]{tool['description']}[/dim]")
+                tool_node.add(f"[bright_black]{tool['description']}[/bright_black]")
             # Show input schema if verbose
             if tool.get("inputSchema"):
@@ -170,11 +170,11 @@ def display_interactive(analysis: dict) -> None:
     # Resources
     if analysis["resources"]:
-        design.section_title("📚 Available Resources")
+        hud_console.section_title("📚 Available Resources")
         resources_table = Table()
-        resources_table.add_column("URI", style="default")
-        resources_table.add_column("Name", style="white")
-        resources_table.add_column("Type", style="dim")
+        resources_table.add_column("URI", style="bright_white")
+        resources_table.add_column("Name", style="bright_white")
+        resources_table.add_column("Type", style="bright_black")
         for resource in analysis["resources"][:10]:
             resources_table.add_row(
@@ -184,11 +184,12 @@ def display_interactive(analysis: dict) -> None:
         console.print(resources_table)
         if len(analysis["resources"]) > 10:
-            console.print(f"[dim]... and {len(analysis['resources']) - 10} more resources[/dim]")
+            remaining = len(analysis["resources"]) - 10
+            console.print(f"[bright_black]... and {remaining} more resources[/bright_black]")
     # Telemetry (only for live analysis)
     if analysis.get("telemetry"):
-        design.section_title("📡 Telemetry Data")
+        hud_console.section_title("📡 Telemetry Data")
         telemetry_table = Table(show_header=False, box=None)
         telemetry_table.add_column("Key", style="dim")
         telemetry_table.add_column("Value")
@@ -206,7 +207,7 @@ def display_interactive(analysis: dict) -> None:
     # Environment variables (for metadata-only analysis)
     if analysis.get("env_vars"):
-        design.section_title("🔑 Environment Variables")
+        hud_console.section_title("🔑 Environment Variables")
         env_table = Table(show_header=False, box=None)
         env_table.add_column("Type", style="dim")
         env_table.add_column("Variables")
@@ -309,7 +310,7 @@ async def analyze_environment_from_config(
     config_path: Path, output_format: str, verbose: bool
 ) -> None:
     """Analyze MCP environment from a JSON config file."""
-    design.header("MCP Environment Analysis", icon="🔍")
+    hud_console.header("MCP Environment Analysis", icon="🔍")
     # Load config from file
     try:
@@ -327,7 +328,7 @@ async def analyze_environment_from_mcp_config(
     mcp_config: dict[str, Any], output_format: str, verbose: bool
 ) -> None:
     """Analyze MCP environment from MCP config dict."""
-    design.header("MCP Environment Analysis", icon="🔍")
+    hud_console.header("MCP Environment Analysis", icon="🔍")
     await _analyze_with_config(mcp_config, output_format, verbose)

hud-python 0.4.21__py3-none-any.whl → 0.4.23__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.21py3-none-any.whl → 0.4.23py3-none-any.whl