PyPI - hud-python - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

hud-python 0.2.4py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (50) hide show

hud/__init__.py +22 -2
hud/adapters/claude/adapter.py +9 -2
hud/adapters/claude/tests/__init__.py +1 -0
hud/adapters/claude/tests/test_adapter.py +519 -0
hud/adapters/common/types.py +5 -1
hud/adapters/operator/adapter.py +4 -0
hud/adapters/operator/tests/__init__.py +1 -0
hud/adapters/operator/tests/test_adapter.py +370 -0
hud/agent/__init__.py +4 -0
hud/agent/base.py +18 -2
hud/agent/claude.py +20 -17
hud/agent/claude_plays_pokemon.py +282 -0
hud/agent/langchain.py +12 -7
hud/agent/misc/__init__.py +3 -0
hud/agent/misc/response_agent.py +80 -0
hud/agent/operator.py +27 -19
hud/agent/tests/__init__.py +1 -0
hud/agent/tests/test_base.py +202 -0
hud/env/docker_client.py +28 -18
hud/env/environment.py +32 -16
hud/env/local_docker_client.py +83 -42
hud/env/remote_client.py +1 -3
hud/env/remote_docker_client.py +72 -15
hud/exceptions.py +12 -0
hud/gym.py +71 -53
hud/job.py +52 -7
hud/settings.py +6 -0
hud/task.py +45 -33
hud/taskset.py +44 -4
hud/telemetry/__init__.py +21 -0
hud/telemetry/_trace.py +173 -0
hud/telemetry/context.py +193 -0
hud/telemetry/exporter.py +417 -0
hud/telemetry/instrumentation/__init__.py +3 -0
hud/telemetry/instrumentation/mcp.py +498 -0
hud/telemetry/instrumentation/registry.py +59 -0
hud/telemetry/mcp_models.py +331 -0
hud/telemetry/tests/__init__.py +1 -0
hud/telemetry/tests/test_context.py +203 -0
hud/telemetry/tests/test_trace.py +270 -0
hud/types.py +10 -26
hud/utils/common.py +22 -2
hud/utils/misc.py +53 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +7 -0
{hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/METADATA +90 -22
hud_python-0.2.5.dist-info/RECORD +84 -0
hud_python-0.2.4.dist-info/RECORD +0 -62
{hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/WHEEL +0 -0
{hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/licenses/LICENSE +0 -0

hud/adapters/operator/tests/test_adapter.py ADDED Viewed

@@ -0,0 +1,370 @@
+from __future__ import annotations
+import pytest
+from hud.adapters.common.types import (
+    ClickAction,
+    DragAction,
+    MoveAction,
+    PressAction,
+    ResponseAction,
+    ScreenshotFetch,
+    ScrollAction,
+    TypeAction,
+    WaitAction,
+)
+from hud.adapters.operator import OperatorAdapter
+class TestOperatorAdapter:
+    """Test the OperatorAdapter class."""
+    @pytest.fixture
+    def adapter(self):
+        """Fixture providing a clean adapter instance."""
+        return OperatorAdapter()
+    def test_init(self, adapter):
+        """Test adapter initialization."""
+        assert adapter.agent_width == 1024
+        assert adapter.agent_height == 768
+        assert adapter.env_width == 1920  # Inherited from parent
+        assert adapter.env_height == 1080  # Inherited from parent
+    def test_key_map_constants(self, adapter):
+        """Test KEY_MAP constants."""
+        assert adapter.KEY_MAP["return"] == "enter"
+        assert adapter.KEY_MAP["arrowup"] == "up"
+        assert adapter.KEY_MAP["arrowdown"] == "down"
+        assert adapter.KEY_MAP["arrowleft"] == "left"
+        assert adapter.KEY_MAP["arrowright"] == "right"
+    def test_button_map_constants(self, adapter):
+        """Test BUTTON_MAP constants."""
+        assert adapter.BUTTON_MAP["wheel"] == "middle"
+    def test_map_key_mapped(self, adapter):
+        """Test _map_key with mapped keys."""
+        assert adapter._map_key("return") == "enter"
+        assert adapter._map_key("RETURN") == "enter"  # Test case insensitive
+        assert adapter._map_key("arrowup") == "up"
+        assert adapter._map_key("ArrowDown") == "down"
+    def test_map_key_unmapped(self, adapter):
+        """Test _map_key with unmapped keys."""
+        assert adapter._map_key("space") == "space"
+        assert adapter._map_key("CTRL") == "ctrl"
+        assert adapter._map_key("Unknown") == "unknown"
+class TestOperatorAdapterConvert:
+    """Test the convert method of OperatorAdapter."""
+    @pytest.fixture
+    def adapter(self):
+        """Fixture providing a clean adapter instance."""
+        return OperatorAdapter()
+    def test_convert_click_action(self, adapter):
+        """Test converting click action."""
+        data = {"type": "click", "x": 100, "y": 200, "button": "left"}
+        result = adapter.convert(data)
+        assert isinstance(result, ClickAction)
+        assert result.point is not None
+        assert result.point.x == 100
+        assert result.point.y == 200
+        assert result.button == "left"
+    def test_convert_click_action_default_values(self, adapter):
+        """Test converting click action with default values."""
+        data = {"type": "click"}
+        result = adapter.convert(data)
+        assert isinstance(result, ClickAction)
+        assert result.point is not None
+        assert result.point.x == 0
+        assert result.point.y == 0
+        assert result.button == "left"
+    def test_convert_click_action_mapped_button(self, adapter):
+        """Test converting click action with mapped button."""
+        data = {"type": "click", "x": 100, "y": 200, "button": "wheel"}
+        result = adapter.convert(data)
+        assert isinstance(result, ClickAction)
+        assert result.button == "middle"
+    def test_convert_double_click_action(self, adapter):
+        """Test converting double click action."""
+        data = {"type": "double_click", "x": 150, "y": 250}
+        result = adapter.convert(data)
+        assert isinstance(result, ClickAction)
+        assert result.point is not None
+        assert result.point.x == 150
+        assert result.point.y == 250
+        assert result.button == "left"
+        assert result.pattern == [100]  # Double click pattern
+    def test_convert_scroll_action(self, adapter):
+        """Test converting scroll action."""
+        data = {"type": "scroll", "x": 300, "y": 400, "scroll_x": 10, "scroll_y": -20}
+        result = adapter.convert(data)
+        assert isinstance(result, ScrollAction)
+        assert result.point is not None
+        assert result.scroll is not None
+        assert result.point.x == 300
+        assert result.point.y == 400
+        assert result.scroll.x == 10
+        assert result.scroll.y == -20
+    def test_convert_scroll_action_default_values(self, adapter):
+        """Test converting scroll action with default values."""
+        data = {"type": "scroll"}
+        result = adapter.convert(data)
+        assert isinstance(result, ScrollAction)
+        assert result.point is not None
+        assert result.scroll is not None
+        assert result.point.x == 0
+        assert result.point.y == 0
+        assert result.scroll.x == 0
+        assert result.scroll.y == 0
+    def test_convert_type_action(self, adapter):
+        """Test converting type action."""
+        data = {"type": "type", "text": "Hello, World!"}
+        result = adapter.convert(data)
+        assert isinstance(result, TypeAction)
+        assert result.text == "Hello, World!"
+        assert result.enter_after is False
+    def test_convert_type_action_default_text(self, adapter):
+        """Test converting type action with default text."""
+        data = {"type": "type"}
+        result = adapter.convert(data)
+        assert isinstance(result, TypeAction)
+        assert result.text == ""
+        assert result.enter_after is False
+    def test_convert_wait_action(self, adapter):
+        """Test converting wait action."""
+        data = {"type": "wait", "ms": 2000}
+        result = adapter.convert(data)
+        assert isinstance(result, WaitAction)
+        assert result.time == 2000
+    def test_convert_wait_action_default_time(self, adapter):
+        """Test converting wait action with default time."""
+        data = {"type": "wait"}
+        result = adapter.convert(data)
+        assert isinstance(result, WaitAction)
+        assert result.time == 1000
+    def test_convert_move_action(self, adapter):
+        """Test converting move action."""
+        data = {"type": "move", "x": 500, "y": 600}
+        result = adapter.convert(data)
+        assert isinstance(result, MoveAction)
+        assert result.point is not None
+        assert result.point.x == 500
+        assert result.point.y == 600
+    def test_convert_move_action_default_values(self, adapter):
+        """Test converting move action with default values."""
+        data = {"type": "move"}
+        result = adapter.convert(data)
+        assert isinstance(result, MoveAction)
+        assert result.point is not None
+        assert result.point.x == 0
+        assert result.point.y == 0
+    def test_convert_keypress_action(self, adapter):
+        """Test converting keypress action."""
+        data = {"type": "keypress", "keys": ["ctrl", "c"]}
+        result = adapter.convert(data)
+        assert isinstance(result, PressAction)
+        assert result.keys == ["ctrl", "c"]
+    def test_convert_keypress_action_mapped_keys(self, adapter):
+        """Test converting keypress action with mapped keys."""
+        data = {"type": "keypress", "keys": ["return", "arrowup"]}
+        result = adapter.convert(data)
+        assert isinstance(result, PressAction)
+        assert result.keys == ["enter", "up"]
+    def test_convert_keypress_action_default_keys(self, adapter):
+        """Test converting keypress action with default keys."""
+        data = {"type": "keypress"}
+        result = adapter.convert(data)
+        assert isinstance(result, PressAction)
+        assert result.keys == []
+    def test_convert_drag_action(self, adapter):
+        """Test converting drag action."""
+        data = {
+            "type": "drag",
+            "path": [{"x": 100, "y": 200}, {"x": 150, "y": 250}, {"x": 200, "y": 300}],
+        }
+        result = adapter.convert(data)
+        assert isinstance(result, DragAction)
+        assert len(result.path) == 3
+        assert result.path[0].x == 100
+        assert result.path[0].y == 200
+        assert result.path[1].x == 150
+        assert result.path[1].y == 250
+        assert result.path[2].x == 200
+        assert result.path[2].y == 300
+    def test_convert_drag_action_default_path(self, adapter):
+        """Test converting drag action with default path."""
+        data = {"type": "drag"}
+        result = adapter.convert(data)
+        assert isinstance(result, DragAction)
+        assert result.path == []
+    def test_convert_drag_action_path_with_missing_coords(self, adapter):
+        """Test converting drag action with missing coordinates."""
+        data = {
+            "type": "drag",
+            "path": [
+                {"x": 100},  # Missing y
+                {"y": 200},  # Missing x
+                {},  # Missing both
+            ],
+        }
+        result = adapter.convert(data)
+        assert isinstance(result, DragAction)
+        assert len(result.path) == 3
+        assert result.path[0].x == 100
+        assert result.path[0].y == 0  # Default value
+        assert result.path[1].x == 0  # Default value
+        assert result.path[1].y == 200
+        assert result.path[2].x == 0  # Default value
+        assert result.path[2].y == 0  # Default value
+    def test_convert_screenshot_action(self, adapter):
+        """Test converting screenshot action."""
+        data = {"type": "screenshot"}
+        result = adapter.convert(data)
+        assert isinstance(result, ScreenshotFetch)
+    def test_convert_response_action(self, adapter):
+        """Test converting response action."""
+        data = {"type": "response", "text": "Task completed successfully"}
+        result = adapter.convert(data)
+        assert isinstance(result, ResponseAction)
+        assert result.text == "Task completed successfully"
+    def test_convert_response_action_default_text(self, adapter):
+        """Test converting response action with default text."""
+        data = {"type": "response"}
+        result = adapter.convert(data)
+        assert isinstance(result, ResponseAction)
+        assert result.text == ""
+    def test_convert_unsupported_action_type(self, adapter):
+        """Test converting unsupported action type."""
+        data = {"type": "unsupported_action"}
+        with pytest.raises(ValueError) as exc_info:
+            adapter.convert(data)
+        assert "Unsupported action type: unsupported_action" in str(exc_info.value)
+    def test_convert_invalid_data_structure(self, adapter):
+        """Test converting invalid data structure."""
+        # Test with non-dict data
+        with pytest.raises(ValueError) as exc_info:
+            adapter.convert("invalid_data")
+        assert "Invalid action" in str(exc_info.value)
+    def test_convert_missing_type_field(self, adapter):
+        """Test converting data without type field."""
+        data = {"x": 100, "y": 200}  # Missing type
+        with pytest.raises(ValueError) as exc_info:
+            adapter.convert(data)
+        assert "Unsupported action type: None" in str(exc_info.value)
+    def test_convert_none_data(self, adapter):
+        """Test converting None data."""
+        with pytest.raises(ValueError) as exc_info:
+            adapter.convert(None)
+        assert "Invalid action" in str(exc_info.value)
+class TestOperatorAdapterIntegration:
+    """Integration tests for OperatorAdapter."""
+    @pytest.fixture
+    def adapter(self):
+        """Fixture providing a clean adapter instance."""
+        return OperatorAdapter()
+    def test_full_click_pipeline(self, adapter):
+        """Test full click action processing pipeline."""
+        # Set adapter dimensions to avoid scaling
+        adapter.agent_width = 1920
+        adapter.agent_height = 1080
+        adapter.env_width = 1920
+        adapter.env_height = 1080
+        # Test the full adapt method
+        raw_action = {"type": "click", "x": 100, "y": 200, "button": "right"}
+        result = adapter.adapt(raw_action)
+        assert isinstance(result, ClickAction)
+        assert result.point is not None
+        assert result.point.x == 100
+        assert result.point.y == 200
+        assert result.button == "right"
+        # Check that it was added to memory
+        assert len(adapter.memory) == 1
+        assert adapter.memory[0] == result
+    def test_multiple_actions_processing(self, adapter):
+        """Test processing multiple actions."""
+        # Set adapter dimensions to avoid scaling
+        adapter.agent_width = 1920
+        adapter.agent_height = 1080
+        adapter.env_width = 1920
+        adapter.env_height = 1080
+        actions = [
+            {"type": "click", "x": 100, "y": 200},
+            {"type": "type", "text": "hello"},
+            {"type": "keypress", "keys": ["return"]},
+        ]
+        results = adapter.adapt_list(actions)
+        assert len(results) == 3
+        assert isinstance(results[0], ClickAction)
+        assert isinstance(results[1], TypeAction)
+        assert isinstance(results[2], PressAction)
+        # Check memory
+        assert len(adapter.memory) == 3

hud/agent/__init__.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from .base import Agent
 from .claude import ClaudeAgent
+from .claude_plays_pokemon import ClaudePlaysPokemon
 from .operator import OperatorAgent
 from .langchain import LangchainAgent
+from .misc import ResponseAgent
 from hud.adapters import OperatorAdapter, ClaudeAdapter
@@ -12,4 +14,6 @@ __all__ = [
     "OperatorAdapter",
     "ClaudeAdapter",
     "LangchainAgent",
+    "ClaudePlaysPokemon",
+    "ResponseAgent",
 ]

hud/agent/base.py CHANGED Viewed

@@ -2,7 +2,11 @@ from abc import ABC, abstractmethod
 from typing import Sequence, TypeVar, Generic
 from hud.adapters import Adapter, CLA
+from hud.types import Gym
 from hud.utils.common import Observation
+import logging
+logger = logging.getLogger(__name__)
 # Generic type for different client types (Anthropic, OpenAI, etc.)
 ClientT = TypeVar("ClientT")
@@ -21,7 +25,13 @@ class Agent(Generic[ClientT, ActionT], ABC):
     Subclasses only need to implement the fetch_response method.
     """
-    def __init__(self, client: ClientT | None = None, adapter: Adapter | None = None):
+    transfer_gyms: dict[Gym, Gym] = {}
+    def __init__(
+        self,
+        client: ClientT | None = None,
+        adapter: Adapter | None = None,
+    ):
         """
         Initialize the agent.
@@ -81,7 +91,9 @@ class Agent(Generic[ClientT, ActionT], ABC):
         return self.adapter.adapt_list(actions)
-    async def predict(self, observation: Observation) -> tuple[list[CLA] | list[ActionT], bool]:
+    async def predict(
+        self, observation: Observation, verbose: bool = False
+    ) -> tuple[list[CLA] | list[ActionT], bool]:
         """
         Predict the next action based on the observation.
@@ -94,11 +106,15 @@ class Agent(Generic[ClientT, ActionT], ABC):
             tuple[list[CLA] | list[ActionT], bool]: A tuple containing the list of actions and a boolean
                                                        indicating if the agent believes it has completed the task
         """
+        if verbose:
+            logger.info("Predicting action...")
         # Stage 1: Preprocess the observation
         processed_obs = self.preprocess(observation)
         # Stage 2: Fetch response from the model
         actions, done = await self.fetch_response(processed_obs)
+        if verbose:
+            logger.info("Raw action: %s", actions)
         # Stage 3: Postprocess the actions if we have an adapter
         if self.adapter and actions:

hud/agent/claude.py CHANGED Viewed

@@ -13,6 +13,7 @@ from anthropic.types.beta import (
 from hud.adapters import Adapter
 from hud.agent.base import Agent
 from hud.adapters.claude import ClaudeAdapter
+from hud.types import Gym
 from hud.utils.common import Observation
 from hud.settings import settings
@@ -53,6 +54,8 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
     through the ClaudeAdapter which converts actions to the format expected by HUD.
     """
+    transfer_gyms: dict[Gym, Gym] = {"qa": "hud-browser"}
     def __init__(
         self,
         client: AsyncAnthropic | None = None,
@@ -123,20 +126,20 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
         # Add text instruction if present
         if observation.text:
-            logger.info("Adding text to user content: %s", observation.text)
+            # logger.info("Adding text to user content: %s", observation.text)
             user_content.append(text_to_content_block(str(observation.text)))
         # Add screenshot if present
         if observation.screenshot:
-            logger.info("Adding screenshot to user content")
+            # logger.info("Adding screenshot to user content")
             if not self.pending_computer_use_tool_id:
-                logger.info("Adding screenshot to user content, no tool id")
+                # logger.info("Adding screenshot to user content, no tool id")
                 user_content.append(base64_to_content_block(observation.screenshot))
             else:
-                logger.info(
-                    "Adding screenshot to user content, tool id: %s",
-                    self.pending_computer_use_tool_id,
-                )
+                # logger.info(
+                #    "Adding screenshot to user content, tool id: %s",
+                #    self.pending_computer_use_tool_id,
+                # )
                 user_content.append(
                     tool_use_content_block(
                         self.pending_computer_use_tool_id,
@@ -183,9 +186,9 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
         done = True  # Assume we're done unless we find a tool use
         for block in response_content:
-            logger.info("Processing block: %s", block)
+            # logger.info("Processing block: %s", block)
             if block.type == "tool_use":
-                logger.info("Processing tool use: %s", block)
+                # logger.info("Processing tool use: %s", block)
                 assert block.name == "computer"
                 # Store the raw action
@@ -197,20 +200,20 @@ class ClaudeAgent(Agent[AsyncAnthropic, Any]):
                 break
         # If no tool use action was found, check for a final text response
-        if not actions and done:
+        if len(actions) == 0 and done:
             final_text_response = ""
             for block in response_content:
                 if block.type == "text":
                     final_text_response += block.text
             if final_text_response.strip():
-                logger.info(
-                    f"No tool use found. Using final text as response: {final_text_response}"
-                )
+                # logger.info(
+                #    f"No tool use found. Using final text as response: {final_text_response}"
+                # )
                 actions = [{"action": "response", "text": final_text_response.strip()}]
-                # Keep done = True
-            else:
-                logger.info("No tool use and no final text block found.")
-                # Keep done = True, actions remains empty
+                done = True
+            # else:
+            # logger.info("No tool use and no final text block found.")
+            # Keep done = True, actions remains empty
         return actions, done

hud-python 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

Potentially problematic release.

hud-python 0.2.4py3-none-any.whl → 0.2.5py3-none-any.whl