PyPI - hud-python - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

hud-python 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (46) hide show

hud/__init__.py +16 -12
hud/adapters/__init__.py +4 -2
hud/adapters/claude/adapter.py +0 -1
hud/adapters/common/adapter.py +11 -10
hud/adapters/common/types.py +27 -13
hud/adapters/operator/__init__.py +5 -0
hud/adapters/operator/adapter.py +93 -0
hud/agent/__init__.py +7 -0
hud/agent/base.py +109 -0
hud/agent/claude.py +187 -0
hud/agent/operator.py +190 -0
hud/env/__init__.py +11 -0
hud/env/client.py +35 -0
hud/env/docker_client.py +306 -0
hud/env/environment.py +181 -0
hud/env/local_docker_client.py +249 -0
hud/env/remote_client.py +185 -0
hud/env/remote_docker_client.py +221 -0
hud/evaluators/__init__.py +10 -0
hud/evaluators/base.py +31 -0
hud/evaluators/inspect.py +29 -0
hud/evaluators/judge.py +213 -0
hud/evaluators/match.py +163 -0
hud/evaluators/remote.py +78 -0
hud/gym.py +101 -15
hud/job.py +185 -0
hud/server/__init__.py +2 -2
hud/server/requests.py +87 -0
hud/settings.py +13 -2
hud/task.py +133 -0
hud/taskset.py +95 -0
hud/trajectory.py +90 -0
hud/types.py +65 -0
hud/utils/__init__.py +4 -2
hud/utils/common.py +69 -0
hud/utils/config.py +182 -4
hud/utils/telemetry.py +67 -0
hud_python-0.2.0.dist-info/METADATA +188 -0
hud_python-0.2.0.dist-info/RECORD +44 -0
{hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/licenses/LICENSE +1 -1
hud/client.py +0 -200
hud/environment.py +0 -318
hud/run.py +0 -208
hud_python-0.1.5.dist-info/METADATA +0 -125
hud_python-0.1.5.dist-info/RECORD +0 -21
{hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/WHEEL +0 -0

hud/agent/operator.py ADDED Viewed

@@ -0,0 +1,190 @@
+import json
+import logging
+import os
+from typing import Any, Literal, cast
+from openai import OpenAI
+from openai.types.responses import (
+    ToolParam,
+    ResponseInputParam,
+    ResponseInputItemParam,
+    ResponseOutputMessage,
+    ResponseComputerToolCall
+)
+from hud.agent.base import Agent
+from hud.adapters.operator import OperatorAdapter
+from hud.env.environment import Observation
+from hud.settings import settings
+logger = logging.getLogger(__name__)
+class OperatorAgent(Agent[OpenAI, dict[str, Any]]):
+    """
+    An agent implementation using OpenAI's Computer Use API.
+    This agent interacts with HUD environments using OpenAI's Computer Use API
+    through the OperatorAdapter which converts actions to the format expected by HUD.
+    """
+    def __init__(
+        self,
+        client: OpenAI | None = None,
+        model: str = "computer-use-preview",
+        environment: Literal["windows", "mac", "linux", "browser"] = "windows",
+        adapter: OperatorAdapter | None = None,
+        max_iterations: int = 8
+    ):
+        """
+        Initialize the OperatorAgent.
+        Args:
+            client: The OpenAI client for API calls (optional, created automatically if not provided)
+            model: The model to use for computer use
+            environment: The environment type (windows, mac, linux, browser)
+            adapter: The adapter to use for preprocessing and postprocessing
+            max_iterations: Maximum number of iterations for the agent
+        """
+        # Initialize client if not provided
+        if client is None:
+            # Get API key from settings
+            api_key = settings.openai_api_key
+            if not api_key:
+                raise ValueError("OpenAI API key not found in settings or environment variables. Set OPENAI_API_KEY.")
+            # Create synchronous client
+            client = OpenAI(api_key=api_key)
+        super().__init__(client=client, adapter=adapter)
+        self.model = model
+        self.environment = environment
+        self.max_iterations = max_iterations
+        # Default dimensions
+        self.width = 1024
+        self.height = 768
+        # Update dimensions if adapter is provided
+        if self.adapter:
+            self.width = self.adapter.agent_width
+            self.height = self.adapter.agent_height
+        # Message history and state tracking
+        self.last_response_id = None
+        self.pending_call_id = None
+        self.initial_prompt = None
+    async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
+        """
+        Fetch a response from the model based on the observation.
+        Args:
+            observation: The preprocessed observation
+        Returns:
+            tuple[list[dict[str, Any]], bool]: A tuple containing the list of raw actions and a
+                                             boolean indicating if the agent believes the task is complete
+        """
+        if not self.client:
+            raise ValueError("Client is required")
+        # Define the computer use tool with correct type using cast
+        computer_tool = cast(ToolParam, {
+            "type": "computer_use_preview",
+            "display_width": self.width,
+            "display_height": self.height,
+            "environment": self.environment
+        })
+        # Process the observation based on whether it's the first one or a response to an action
+        if self.pending_call_id is None and self.last_response_id is None:
+            # This is the first observation, store and send the prompt
+            self.initial_prompt = observation.text
+            # Create the initial request following the required structure
+            input_content: list[dict[str, Any]] = [
+                {"type": "input_text", "text": observation.text or ""}
+            ]
+            # Add screenshot if present
+            if observation.screenshot:
+                input_content.append({
+                    "type": "input_image",
+                    "image_url": f"data:image/png;base64,{observation.screenshot}"
+                })
+            # Structure the input correctly for the API using cast
+            input_param = cast(ResponseInputParam, [{
+                "role": "user",
+                "content": input_content
+            }])
+            # Call OpenAI API for the initial prompt (synchronous call)
+            response = self.client.responses.create(
+                model=self.model,
+                tools=[computer_tool],
+                input=input_param,
+                truncation="auto"
+            )
+        else:
+            # This is a response to a previous action
+            if not observation.screenshot:
+                logger.warning("No screenshot provided for response to action")
+                return [], True
+            # Create a response to the previous action with the new screenshot
+            input_param_followup = cast(ResponseInputParam, [
+                    cast(ResponseInputItemParam, {
+                        "call_id": self.pending_call_id,
+                        "type": "computer_call_output",
+                        "output": {
+                            "type": "input_image",
+                            "image_url": f"data:image/png;base64,{observation.screenshot}"
+                        }
+                    })
+                ])
+            # Call OpenAI API for follow-up (synchronous call)
+            response = self.client.responses.create(
+                model=self.model,
+                previous_response_id=self.last_response_id,
+                tools=[computer_tool],
+                input=input_param_followup,
+                truncation="auto"
+            )
+        # Store the response ID for the next call
+        self.last_response_id = response.id
+        # Process the response to extract computer calls
+        actions = []
+        done = True  # Assume we're done unless we find a computer call
+        # Loop through all items in the output to find computer_call items
+        computer_calls = [
+            item for item in response.output
+            if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
+        ]
+        if computer_calls:
+            # Extract the computer calls and mark that we're not done
+            done = False
+            # Process all computer calls
+            for computer_call in computer_calls:
+                self.pending_call_id = computer_call.call_id
+                action = computer_call.action
+                actions.append(action.model_dump())
+                # Log the action
+                logger.info(f"Computer call action: {action}")
+        else:
+            # If there are no computer calls, print some debug info
+            logger.info("No computer call found in the response. Either complete or error.")
+            for item in response.output:
+                if isinstance(item, ResponseOutputMessage) and item.type == "message":
+                    logger.info(f"Message: {item.content}")
+        return actions, done

hud/env/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from __future__ import annotations
+from . import docker_client, environment, local_docker_client, remote_client, remote_docker_client
+__all__ = [
+     "docker_client",
+     "environment",
+     "local_docker_client",
+     "remote_client",
+     "remote_docker_client",
+]

hud/env/client.py ADDED Viewed

@@ -0,0 +1,35 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel
+if TYPE_CHECKING:
+    from hud.types import EnvironmentStatus
+    from hud.utils.config import HudStyleConfig
+class Client(BaseModel, ABC):
+    """
+    Base class for all environment clients.
+    """
+    @abstractmethod
+    async def invoke(self, config: HudStyleConfig) -> Any:
+        """
+        Invoke the environment with the given config.
+        """
+    @abstractmethod
+    async def get_status(self) -> EnvironmentStatus:
+        """
+        Get the current status of the environment.
+        """
+    @abstractmethod
+    async def close(self) -> None:
+        """
+        Close the environment and clean up any resources.
+        This method should be called when the environment is no longer needed.
+        """

hud/env/docker_client.py ADDED Viewed

@@ -0,0 +1,306 @@
+from __future__ import annotations
+import abc
+import json
+import logging
+import os
+import uuid
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+import toml
+from hud.env.client import Client
+from hud.types import EnvironmentStatus
+from hud.utils.common import directory_to_tar_bytes
+if TYPE_CHECKING:
+    from hud.utils import ExecuteResult
+    from hud.utils.config import HudStyleConfig
+logger = logging.getLogger("hud.env.docker_client")
+STATUS_MESSAGES = {
+    EnvironmentStatus.RUNNING.value: "is running",
+    EnvironmentStatus.ERROR.value: "had an error initializing",
+    EnvironmentStatus.COMPLETED.value: "completed",
+}
+class InvokeError(Exception):
+    """
+    Error raised when an invoke fails.
+    """
+def invoke_template(config: HudStyleConfig, package_name: str, divider: str) -> str:
+    """
+    Return a python script to run the given config.
+    """
+    func_parts = config.function.split(".")
+    module_str = ".".join([package_name] + func_parts[:-1])
+    func_str = func_parts[-1]
+    # the reason we call `json.dumps` twice is to escape the json string
+    return f"""import json
+from {module_str} import {func_str}
+args = json.loads({json.dumps(json.dumps(config.args))})
+result = {func_str}(*args)
+result_str = json.dumps(result)
+print("{divider}")
+print(result_str)
+"""
+class DockerClient(Client):
+    """
+    Base class for environment clients.
+    Handles updating the environment when local files change.
+    """
+    _last_pyproject_toml_str: str | None = None
+    _last_update_time: int = 0
+    _last_file_mtimes: dict[str, float] = {} # noqa: RUF012
+    _source_path: Path | None = None
+    _package_name: str | None = None
+    @property
+    def source_path(self) -> Path | None:
+        """Get the source path."""
+        return self._source_path
+    @property
+    def package_name(self) -> str:
+        """Get the package name."""
+        if not self._package_name:
+            raise ValueError("Package name not set")
+        return self._package_name
+    def set_source_path(self, source_path: Path) -> None:
+        """
+        Set the source path for this environment controller.
+        Can only be set once, and cannot be set if source_path is already set.
+        Args:
+            source_path: Path to the source code to use in the environment
+        Raises:
+            ValueError: If source_path has already been set
+        """
+        if self._source_path:
+            raise ValueError("Source path has already been set")
+        # Validate source path
+        if not source_path.exists():
+            raise FileNotFoundError(f"Source path {source_path} does not exist")
+        if not source_path.is_dir():
+            raise NotADirectoryError(f"Source path {source_path} is not a directory")
+        # Parse pyproject.toml to get package name
+        pyproject_path = source_path / "pyproject.toml"
+        if not pyproject_path.exists():
+            raise FileNotFoundError(f"pyproject.toml not found in {source_path}")
+        pyproject_data = toml.load(pyproject_path)
+        self._package_name = pyproject_data.get("project", {}).get("name")
+        if not self._package_name:
+            raise ValueError("Could not find package name in pyproject.toml")
+        self._source_path = source_path
+    @classmethod
+    @abc.abstractmethod
+    async def create(cls, dockerfile: str) -> DockerClient:
+        """
+        Creates an environment client from a dockerfile.
+        Args:
+            dockerfile: The dockerfile content to build the environment
+        Returns:
+            EnvClient: An instance of the environment client
+        """
+    @abc.abstractmethod
+    async def get_status(self) -> EnvironmentStatus:
+        """
+        Get the current status of the environment.
+        Returns:
+            EnvironmentStatus: A status enum indicating the current state of the environment
+        """
+    def _get_all_file_mtimes(self) -> dict[str, float]:
+        """
+        Get modification times for all files in the source path.
+        Returns:
+            Dict[str, float]: Dictionary mapping file paths to modification times
+        """
+        if not self._source_path:
+            return {}
+        file_mtimes = {}
+        for root, _, files in os.walk(self._source_path):
+            for file in files:
+                file_path = Path(root) / file
+                try:
+                    file_mtimes[str(file_path)] = file_path.stat().st_mtime
+                except (FileNotFoundError, PermissionError):
+                    # Skip files that can't be accessed
+                    continue
+        return file_mtimes
+    async def needs_update(self) -> bool:
+        """
+        Check if the environment needs an update by:
+        1. Checking if any file has been modified since the last update
+        Returns:
+            bool: True if the environment needs an update, False otherwise.
+        """
+        # If no source path, no update needed
+        if not self.source_path:
+            return False
+        # Check if any file has been modified since the last update
+        current_mtimes = self._get_all_file_mtimes()
+        # If we don't have previous modification times, we need an update
+        if not self._last_file_mtimes:
+            return True
+        # Check for new or modified files
+        for file_path, mtime in current_mtimes.items():
+            if file_path not in self._last_file_mtimes or mtime > self._last_file_mtimes[file_path]:
+                return True
+        return False
+    async def update(self) -> None:
+        """
+        Base update method for environment controllers.
+        For controllers with no source path, this is a no-op.
+        """
+        # If no source path, nothing to update
+        if not self._source_path:
+            return
+        logger.info("Updating environment")
+        # Save current file modification times
+        self._last_file_mtimes = self._get_all_file_mtimes()
+        # Create tar archive of the source code and send it to the container
+        tar_bytes = directory_to_tar_bytes(self._source_path)
+        await self.execute(["mkdir", "-p", "/root/controller"], timeout=5)
+        await self.put_archive("/root/controller", tar_bytes)
+        # Check if pyproject.toml exists and parse it
+        pyproject_path = self._source_path / "pyproject.toml"
+        if not pyproject_path.exists():
+            raise FileNotFoundError(f"pyproject.toml not found in {self._source_path}")
+        # Read and parse the current content of pyproject.toml
+        current_pyproject_content = pyproject_path.read_text()
+        if (
+            self._last_pyproject_toml_str is None
+            or self._last_pyproject_toml_str != current_pyproject_content
+        ):
+            # Update package name if pyproject.toml changed
+            pyproject_data = toml.loads(current_pyproject_content)
+            self._package_name = pyproject_data.get("project", {}).get("name")
+            if not self._package_name:
+                raise ValueError("Could not find package name in pyproject.toml")
+            logger.info("Installing %s in /root/controller", self._package_name)
+            result = await self.execute(
+                ["bash", "-c", "cd /root/controller && pip install -e ."],
+                timeout=60,
+            )
+            if result["stdout"]:
+                logger.info("STDOUT:\n%s", result["stdout"])
+            if result["stderr"]:
+                logger.warning("STDERR:\n%s", result["stderr"])
+            # Save current pyproject.toml content
+            self._last_pyproject_toml_str = current_pyproject_content
+    @abc.abstractmethod
+    async def execute(
+        self,
+        command: list[str],
+        *,
+        timeout: int | None = None,
+    ) -> ExecuteResult:
+        """
+        Execute a command in the environment. May not be supported by all environments.
+        Args:
+            command: The command to execute
+            workdir: The working directory to execute the command in
+            timeout: The timeout for the command
+        Returns:
+            ExecuteResult: The result of the command
+        """
+    async def invoke(self, config: HudStyleConfig) -> tuple[Any, bytes, bytes]:
+        """
+        Invoke a function in the environment. Supported by all environments.
+        Args:
+            config: The configuration to invoke
+        Returns:
+            tuple[Any, bytes, bytes]: The result of the invocation, stdout, and stderr
+        """
+        if await self.needs_update():
+            logger.info("Environment needs update, updating")
+            await self.update()
+        # generate a random uuid as a divider
+        divider = str(uuid.uuid4())
+        template = invoke_template(config, self.package_name, divider)
+        logger.debug("Invoking template: %s", template)
+        result = await self.execute(["python3", "-c", template])
+        # parse the result
+        # we take the whole stderr as the stderr, and the stdout is the result pre-divider
+        stderr = result["stderr"]
+        stdout_parts = result["stdout"].split(divider.encode())
+        stdout = stdout_parts[0]
+        # parse the json part of the stdout (if it exists)
+        if len(stdout_parts) > 1:
+            result = json.loads(stdout_parts[1])
+        else:
+            raise InvokeError(stdout, stderr)
+        return result, stdout, stderr
+    @abc.abstractmethod
+    async def get_archive(self, path: str) -> bytes:
+        """
+        Get an archive of a path from the environment.
+        May not be supported by all environments. (notably browser environments)
+        Args:
+            path: The path to get the archive of
+        Returns:
+            bytes: The archive of the path
+        """
+    @abc.abstractmethod
+    async def put_archive(self, path: str, data: bytes) -> bool:
+        """
+        Put an archive of data at a path in the environment.
+        May not be supported by all environments. (notably browser environments)
+        Args:
+            path: The path to put the archive at
+            data: The data to put in the archive
+        """

hud-python 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

hud-python 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl