PyPI - hud-python - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

hud-python 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (59) hide show

hud/__init__.py +5 -3
hud/adapters/__init__.py +2 -1
hud/adapters/claude/adapter.py +13 -17
hud/adapters/common/adapter.py +3 -3
hud/adapters/common/tests/__init__.py +0 -0
hud/adapters/common/tests/test_adapter.py +277 -0
hud/adapters/common/types.py +3 -6
hud/adapters/operator/adapter.py +22 -29
hud/agent/__init__.py +9 -1
hud/agent/base.py +28 -28
hud/agent/claude.py +69 -60
hud/agent/langchain.py +204 -0
hud/agent/operator.py +75 -67
hud/env/__init__.py +5 -5
hud/env/client.py +2 -2
hud/env/docker_client.py +37 -39
hud/env/environment.py +91 -66
hud/env/local_docker_client.py +5 -7
hud/env/remote_client.py +40 -29
hud/env/remote_docker_client.py +13 -3
hud/evaluators/__init__.py +2 -3
hud/evaluators/base.py +4 -3
hud/evaluators/inspect.py +3 -8
hud/evaluators/judge.py +34 -58
hud/evaluators/match.py +42 -49
hud/evaluators/remote.py +13 -26
hud/evaluators/tests/__init__.py +0 -0
hud/evaluators/tests/test_inspect.py +12 -0
hud/evaluators/tests/test_judge.py +231 -0
hud/evaluators/tests/test_match.py +115 -0
hud/evaluators/tests/test_remote.py +98 -0
hud/exceptions.py +167 -0
hud/gym.py +12 -10
hud/job.py +525 -47
hud/server/__init__.py +2 -2
hud/server/requests.py +148 -186
hud/server/tests/__init__.py +0 -0
hud/server/tests/test_requests.py +275 -0
hud/settings.py +3 -2
hud/task.py +12 -22
hud/taskset.py +44 -11
hud/trajectory.py +6 -9
hud/types.py +14 -9
hud/utils/__init__.py +2 -2
hud/utils/common.py +37 -13
hud/utils/config.py +44 -29
hud/utils/progress.py +149 -0
hud/utils/telemetry.py +10 -11
hud/utils/tests/__init__.py +0 -0
hud/utils/tests/test_common.py +52 -0
hud/utils/tests/test_config.py +129 -0
hud/utils/tests/test_progress.py +225 -0
hud/utils/tests/test_telemetry.py +37 -0
hud/utils/tests/test_version.py +8 -0
{hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/METADATA +44 -21
hud_python-0.2.3.dist-info/RECORD +62 -0
hud_python-0.2.1.dist-info/RECORD +0 -44
{hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/WHEEL +0 -0
{hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/licenses/LICENSE +0 -0

hud/agent/operator.py CHANGED Viewed

@@ -10,36 +10,37 @@ from openai.types.responses import (
     ResponseInputItemParam,
     ResponseOutputMessage,
     ResponseComputerToolCall,
-    ResponseOutputText
+    ResponseOutputText,
 )
 from hud.adapters import Adapter
 from hud.agent.base import Agent
 from hud.adapters.operator import OperatorAdapter
-from hud.env.environment import Observation
+from hud.utils.common import Observation
 from hud.settings import settings
 logger = logging.getLogger(__name__)
 class OperatorAgent(Agent[OpenAI, dict[str, Any]]):
     """
     An agent implementation using OpenAI's Computer Use API.
     This agent interacts with HUD environments using OpenAI's Computer Use API
     through the OperatorAdapter which converts actions to the format expected by HUD.
     """
     def __init__(
-        self,
+        self,
         client: OpenAI | None = None,
         model: str = "computer-use-preview",
         environment: Literal["windows", "mac", "linux", "browser"] = "windows",
         adapter: Adapter | None = None,
-        max_iterations: int = 8
+        max_iterations: int = 8,
     ):
         """
         Initialize the OperatorAgent.
         Args:
             client: The OpenAI client for API calls (optional, created automatically if not provided)
             model: The model to use for computer use
@@ -52,28 +53,30 @@ class OperatorAgent(Agent[OpenAI, dict[str, Any]]):
             # Get API key from settings
             api_key = settings.openai_api_key
             if not api_key:
-                raise ValueError("OpenAI API key not found in settings or environment variables. Set OPENAI_API_KEY.")
+                raise ValueError(
+                    "OpenAI API key not found in settings or environment variables. Set OPENAI_API_KEY."
+                )
             # Create synchronous client
             client = OpenAI(api_key=api_key)
         adapter = adapter or OperatorAdapter()
         super().__init__(client=client, adapter=adapter)
         self.model = model
         self.environment = environment
         self.max_iterations = max_iterations
         # Default dimensions
         self.width = 1024
         self.height = 768
         # Update dimensions if adapter is provided
         if self.adapter:
             self.width = self.adapter.agent_width
             self.height = self.adapter.agent_height
         # Message history and state tracking
         self.last_response_id = None
         self.pending_call_id = None
@@ -82,86 +85,91 @@ class OperatorAgent(Agent[OpenAI, dict[str, Any]]):
     async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
         """
         Fetch a response from the model based on the observation.
         Args:
             observation: The preprocessed observation
         Returns:
             tuple[list[dict[str, Any]], bool]: A tuple containing the list of raw actions and a
                                              boolean indicating if the agent believes the task is complete
         """
         if not self.client:
             raise ValueError("Client is required")
         # Define the computer use tool with correct type using cast
-        computer_tool = cast(ToolParam, {
-            "type": "computer_use_preview",
-            "display_width": self.width,
-            "display_height": self.height,
-            "environment": self.environment
-        })
+        computer_tool = cast(
+            ToolParam,
+            {
+                "type": "computer_use_preview",
+                "display_width": self.width,
+                "display_height": self.height,
+                "environment": self.environment,
+            },
+        )
         # Process the observation based on whether it's the first one or a response to an action
         if self.pending_call_id is None and self.last_response_id is None:
             # This is the first observation, store and send the prompt
             self.initial_prompt = observation.text
             # Create the initial request following the required structure
             input_content: list[dict[str, Any]] = [
                 {"type": "input_text", "text": observation.text or ""}
             ]
             # Add screenshot if present
             if observation.screenshot:
-                input_content.append({
-                    "type": "input_image",
-                    "image_url": f"data:image/png;base64,{observation.screenshot}"
-                })
+                input_content.append(
+                    {
+                        "type": "input_image",
+                        "image_url": f"data:image/png;base64,{observation.screenshot}",
+                    }
+                )
             # Structure the input correctly for the API using cast
-            input_param = cast(ResponseInputParam, [{
-                "role": "user",
-                "content": input_content
-            }])
+            input_param = cast(ResponseInputParam, [{"role": "user", "content": input_content}])
             # Call OpenAI API for the initial prompt (synchronous call)
             response = self.client.responses.create(
-                model=self.model,
-                tools=[computer_tool],
-                input=input_param,
-                truncation="auto"
+                model=self.model, tools=[computer_tool], input=input_param, truncation="auto"
             )
         else:
             # This is a response to a previous action
             if not observation.screenshot:
                 logger.warning("No screenshot provided for response to action")
                 return [], True
             # Create a response to the previous action with the new screenshot
-            input_param_followup = cast(ResponseInputParam, [
-                    cast(ResponseInputItemParam, {
-                        "call_id": self.pending_call_id,
-                        "type": "computer_call_output",
-                        "output": {
-                            "type": "input_image",
-                            "image_url": f"data:image/png;base64,{observation.screenshot}"
-                        }
-                    })
-                ])
+            input_param_followup = cast(
+                ResponseInputParam,
+                [
+                    cast(
+                        ResponseInputItemParam,
+                        {
+                            "call_id": self.pending_call_id,
+                            "type": "computer_call_output",
+                            "output": {
+                                "type": "input_image",
+                                "image_url": f"data:image/png;base64,{observation.screenshot}",
+                            },
+                        },
+                    )
+                ],
+            )
             # Call OpenAI API for follow-up (synchronous call)
             response = self.client.responses.create(
                 model=self.model,
                 previous_response_id=self.last_response_id,
                 tools=[computer_tool],
                 input=input_param_followup,
-                truncation="auto"
+                truncation="auto",
             )
         # Store the response ID for the next call
         self.last_response_id = response.id
         # Process the response to extract actions or final text
         actions = []
         done = True  # Assume done unless a computer call is found
@@ -169,17 +177,18 @@ class OperatorAgent(Agent[OpenAI, dict[str, Any]]):
         # Check for computer calls first
         computer_calls = [
-            item for item in response.output
+            item
+            for item in response.output
             if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
         ]
         if computer_calls:
             # If computer calls exist, process them and set done=False
             done = False
             for computer_call in computer_calls:
                 self.pending_call_id = computer_call.call_id
                 action = computer_call.action
-                actions.append(action.model_dump()) # Convert Pydantic model to dict
+                actions.append(action.model_dump())  # Convert Pydantic model to dict
                 logger.info(f"Computer call action: {action}")
         else:
             # No computer calls, check for a final text message
@@ -188,21 +197,20 @@ class OperatorAgent(Agent[OpenAI, dict[str, Any]]):
             for item in response.output:
                 if isinstance(item, ResponseOutputMessage) and item.type == "message":
                     # Extract text from content blocks within the message
-                    full_text = "".join([c.text for c in item.content if isinstance(c, ResponseOutputText)])
+                    full_text = "".join(
+                        [c.text for c in item.content if isinstance(c, ResponseOutputText)]
+                    )
                     if full_text:
                         final_text_response = full_text
                         logger.info(f"Final text message: {final_text_response}")
-                        break # Stop after finding the first text message
+                        break  # Stop after finding the first text message
             # If we found final text, package it as a 'response' action
             if final_text_response:
-                actions = [{
-                    "type": "response",
-                    "text": final_text_response
-                }]
+                actions = [{"type": "response", "text": final_text_response}]
                 # Keep done = True
             else:
                 logger.info("No computer calls and no final text message found.")
                 # Keep done = True, actions remains empty
-        return actions, done
+        return actions, done

hud/env/__init__.py CHANGED Viewed

@@ -3,9 +3,9 @@ from __future__ import annotations
 from . import docker_client, environment, local_docker_client, remote_client, remote_docker_client
 __all__ = [
-     "docker_client",
-     "environment",
-     "local_docker_client",
-     "remote_client",
-     "remote_docker_client",
+    "docker_client",
+    "environment",
+    "local_docker_client",
+    "remote_client",
+    "remote_docker_client",
 ]

hud/env/client.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pydantic import BaseModel
 if TYPE_CHECKING:
     from hud.types import EnvironmentStatus
-    from hud.utils.config import HudStyleConfig
+    from hud.utils.config import FunctionConfig
 class Client(BaseModel, ABC):
@@ -16,7 +16,7 @@ class Client(BaseModel, ABC):
     """
     @abstractmethod
-    async def invoke(self, config: HudStyleConfig) -> Any:
+    async def invoke(self, config: FunctionConfig) -> Any:
         """
         Invoke the environment with the given config.
         """

hud/env/docker_client.py CHANGED Viewed

@@ -16,7 +16,7 @@ from hud.utils.common import directory_to_tar_bytes
 if TYPE_CHECKING:
     from hud.utils import ExecuteResult
-    from hud.utils.config import HudStyleConfig
+    from hud.utils.config import FunctionConfig
 logger = logging.getLogger("hud.env.docker_client")
@@ -33,7 +33,7 @@ class InvokeError(Exception):
     """
-def invoke_template(config: HudStyleConfig, package_name: str, divider: str) -> str:
+def invoke_template(config: FunctionConfig, package_name: str, divider: str) -> str:
     """
     Return a python script to run the given config.
     """
@@ -51,16 +51,17 @@ print("{divider}")
 print(result_str)
 """
 class DockerClient(Client):
     """
     Base class for environment clients.
     Handles updating the environment when local files change.
     """
     _last_pyproject_toml_str: str | None = None
     _last_update_time: int = 0
-    _last_file_mtimes: dict[str, float] = {} # noqa: RUF012
+    _last_file_mtimes: dict[str, float] = {}  # noqa: RUF012 - Not recognized as Pydantic model
     _source_path: Path | None = None
     _package_name: str | None = None
@@ -68,47 +69,46 @@ class DockerClient(Client):
     def source_path(self) -> Path | None:
         """Get the source path."""
         return self._source_path
     @property
     def package_name(self) -> str:
         """Get the package name."""
         if not self._package_name:
             raise ValueError("Package name not set")
         return self._package_name
     def set_source_path(self, source_path: Path) -> None:
         """
         Set the source path for this environment controller.
         Can only be set once, and cannot be set if source_path is already set.
         Args:
             source_path: Path to the source code to use in the environment
         Raises:
             ValueError: If source_path has already been set
         """
         if self._source_path:
             raise ValueError("Source path has already been set")
         # Validate source path
         if not source_path.exists():
             raise FileNotFoundError(f"Source path {source_path} does not exist")
         if not source_path.is_dir():
             raise NotADirectoryError(f"Source path {source_path} is not a directory")
         # Parse pyproject.toml to get package name
         pyproject_path = source_path / "pyproject.toml"
         if not pyproject_path.exists():
             raise FileNotFoundError(f"pyproject.toml not found in {source_path}")
         pyproject_data = toml.load(pyproject_path)
         self._package_name = pyproject_data.get("project", {}).get("name")
         if not self._package_name:
             raise ValueError("Could not find package name in pyproject.toml")
         self._source_path = source_path
     @classmethod
     @abc.abstractmethod
     async def create(cls, dockerfile: str) -> DockerClient:
@@ -121,26 +121,26 @@ class DockerClient(Client):
         Returns:
             EnvClient: An instance of the environment client
         """
     @abc.abstractmethod
     async def get_status(self) -> EnvironmentStatus:
         """
         Get the current status of the environment.
         Returns:
             EnvironmentStatus: A status enum indicating the current state of the environment
         """
     def _get_all_file_mtimes(self) -> dict[str, float]:
         """
         Get modification times for all files in the source path.
         Returns:
             Dict[str, float]: Dictionary mapping file paths to modification times
         """
         if not self._source_path:
             return {}
         file_mtimes = {}
         for root, _, files in os.walk(self._source_path):
             for file in files:
@@ -151,12 +151,12 @@ class DockerClient(Client):
                     # Skip files that can't be accessed
                     continue
         return file_mtimes
     async def needs_update(self) -> bool:
         """
         Check if the environment needs an update by:
         1. Checking if any file has been modified since the last update
         Returns:
             bool: True if the environment needs an update, False otherwise.
         """
@@ -166,18 +166,18 @@ class DockerClient(Client):
         # Check if any file has been modified since the last update
         current_mtimes = self._get_all_file_mtimes()
         # If we don't have previous modification times, we need an update
         if not self._last_file_mtimes:
             return True
         # Check for new or modified files
         for file_path, mtime in current_mtimes.items():
             if file_path not in self._last_file_mtimes or mtime > self._last_file_mtimes[file_path]:
                 return True
         return False
     async def update(self) -> None:
         """
         Base update method for environment controllers.
@@ -186,22 +186,22 @@ class DockerClient(Client):
         # If no source path, nothing to update
         if not self._source_path:
             return
         logger.info("Updating environment")
         # Save current file modification times
         self._last_file_mtimes = self._get_all_file_mtimes()
         # Create tar archive of the source code and send it to the container
         tar_bytes = directory_to_tar_bytes(self._source_path)
         await self.execute(["mkdir", "-p", "/root/controller"], timeout=5)
         await self.put_archive("/root/controller", tar_bytes)
         # Check if pyproject.toml exists and parse it
         pyproject_path = self._source_path / "pyproject.toml"
         if not pyproject_path.exists():
             raise FileNotFoundError(f"pyproject.toml not found in {self._source_path}")
         # Read and parse the current content of pyproject.toml
         current_pyproject_content = pyproject_path.read_text()
         if (
@@ -224,8 +224,7 @@ class DockerClient(Client):
                 logger.warning("STDERR:\n%s", result["stderr"])
             # Save current pyproject.toml content
             self._last_pyproject_toml_str = current_pyproject_content
     @abc.abstractmethod
     async def execute(
         self,
@@ -235,20 +234,20 @@ class DockerClient(Client):
     ) -> ExecuteResult:
         """
         Execute a command in the environment. May not be supported by all environments.
         Args:
             command: The command to execute
             workdir: The working directory to execute the command in
             timeout: The timeout for the command
         Returns:
             ExecuteResult: The result of the command
         """
-    async def invoke(self, config: HudStyleConfig) -> tuple[Any, bytes, bytes]:
+    async def invoke(self, config: FunctionConfig) -> tuple[Any, bytes, bytes]:
         """
         Invoke a function in the environment. Supported by all environments.
         Args:
             config: The configuration to invoke
@@ -289,11 +288,11 @@ class DockerClient(Client):
         May not be supported by all environments. (notably browser environments)
         Args:
             path: The path to get the archive of
         Returns:
             bytes: The archive of the path
         """
     @abc.abstractmethod
     async def put_archive(self, path: str, data: bytes) -> bool:
         """
@@ -303,4 +302,3 @@ class DockerClient(Client):
             path: The path to put the archive at
             data: The data to put in the archive
         """

hud-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

Potentially problematic release.

hud-python 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl