PyPI - hud-python - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

hud-python 0.1.5py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (46) hide show

hud/__init__.py +16 -12
hud/adapters/__init__.py +4 -2
hud/adapters/claude/adapter.py +9 -2
hud/adapters/common/adapter.py +11 -10
hud/adapters/common/types.py +34 -13
hud/adapters/operator/__init__.py +5 -0
hud/adapters/operator/adapter.py +97 -0
hud/agent/__init__.py +7 -0
hud/agent/base.py +109 -0
hud/agent/claude.py +207 -0
hud/agent/operator.py +208 -0
hud/env/__init__.py +11 -0
hud/env/client.py +35 -0
hud/env/docker_client.py +306 -0
hud/env/environment.py +354 -0
hud/env/local_docker_client.py +251 -0
hud/env/remote_client.py +185 -0
hud/env/remote_docker_client.py +221 -0
hud/evaluators/__init__.py +10 -0
hud/evaluators/base.py +31 -0
hud/evaluators/inspect.py +29 -0
hud/evaluators/judge.py +213 -0
hud/evaluators/match.py +163 -0
hud/evaluators/remote.py +78 -0
hud/gym.py +101 -15
hud/job.py +185 -0
hud/server/__init__.py +2 -2
hud/server/requests.py +87 -0
hud/settings.py +13 -2
hud/task.py +144 -0
hud/taskset.py +103 -0
hud/trajectory.py +90 -0
hud/types.py +65 -0
hud/utils/__init__.py +4 -2
hud/utils/common.py +96 -0
hud/utils/config.py +91 -4
hud/utils/telemetry.py +67 -0
hud_python-0.2.1.dist-info/METADATA +181 -0
hud_python-0.2.1.dist-info/RECORD +44 -0
{hud_python-0.1.5.dist-info → hud_python-0.2.1.dist-info}/licenses/LICENSE +1 -1
hud/client.py +0 -200
hud/environment.py +0 -318
hud/run.py +0 -208
hud_python-0.1.5.dist-info/METADATA +0 -125
hud_python-0.1.5.dist-info/RECORD +0 -21
{hud_python-0.1.5.dist-info → hud_python-0.2.1.dist-info}/WHEEL +0 -0

hud/env/environment.py ADDED Viewed

@@ -0,0 +1,354 @@
+"""Base classes for environment implementations."""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel
+from hud.env.client import Client
+from hud.env.remote_client import RemoteClient
+from hud.task import Task
+from hud.utils.common import HudStyleConfig, HudStyleConfigs
+from hud.utils.config import REMOTE_EVALUATE, REMOTE_FUNCTION_PREFIX, REMOTE_SETUP, expand_config
+logger = logging.getLogger("hud.environment")
+if TYPE_CHECKING:
+    from hud.adapters.common import CLA
+class Observation(BaseModel):
+    """
+    Observation from the environment.
+    Attributes:
+        screenshot: Base64 encoded PNG string of the screen
+        text: Text observation, if available
+    """
+    screenshot: str | None = None  # base64 string png
+    text: str | None = None
+class Environment(BaseModel):
+    """
+    Environment base class that provides common functionality for all environment implementations.
+    This class uses the primitives provided by EnvClient to implement core environment operations.
+    """
+    metadata: dict[str, Any]
+    client: Client
+    url: str | None = None
+    live_url: str | None = None
+    # The task id to use for the environment reset
+    task: Task | None = None
+    build_data: dict[str, Any]
+    # final response
+    final_response: str | None = None
+    async def _invoke_all(self, configs: HudStyleConfigs) -> list[Any]:
+        # Execute each config and collect results
+        configs_all = [configs] if not isinstance(configs, list) else configs
+        results = []
+        for config in configs_all:
+            for expanded_config in expand_config(config):
+                result, stdout, stderr = await self.client.invoke(expanded_config)
+                results.append(result)
+                if stdout:
+                    logger.info(
+                        "%s produced stdout:\n%s",
+                        expanded_config.function,
+                        stdout.decode(),
+                    )
+                if stderr:
+                    logger.warning(
+                        "%s produced stderr:\n%s",
+                        expanded_config.function,
+                        stderr.decode(),
+                    )
+        return results
+    async def _setup(self, config: HudStyleConfigs | None = None) -> None:
+        """
+        Setup the environment.
+        Args:
+            config: The configuration to use for the setup
+        """
+        if isinstance(self.client, RemoteClient):
+            await self._invoke_all(create_remote_config(self, config, REMOTE_SETUP))
+        else:
+            if config is not None:
+                await self._invoke_all(config)
+            elif self.task and self.task.config is not None:
+                await self._invoke_all(self.task.config)
+            else:
+                raise ValueError("No config or task provided for local environment")
+    async def evaluate(self, config: HudStyleConfigs | None = None) -> Any:
+        """
+        Evaluate the environment.
+        Args:
+            config: The configuration to use for the evaluation
+        Returns:
+            Any: Result of the evaluation
+        """
+        if isinstance(self.client, RemoteClient):
+            results = await self._invoke_all(
+                create_remote_config(self, config, REMOTE_EVALUATE))
+        else:
+            if config is not None:
+                results = await self._invoke_all(config)
+            elif self.task and self.task.config is not None:
+                results = await self._invoke_all(self.task.config)
+            else:
+                raise ValueError("No config or task provided for local environment")
+        if len(results) == 1:
+            return results[0]
+        else:
+            return results
+    async def reset(self, configs: HudStyleConfigs | None = None) -> tuple[
+        Observation, dict[str, Any]
+    ]:
+        """
+        Reset the environment.
+        Args:
+            configs: The configuration to use for the reset
+        Returns:
+            Observation: The first observation from the environment
+            info: Dictionary of information about the environment
+        """
+        #await self._setup(configs)
+        obs, _, _, info = await self.step()
+        if self.task and self.task.prompt:
+            obs.text = self.task.prompt
+        return obs, info
+    async def step(self, actions: list[CLA] | None = None) -> tuple[
+        Observation, float, bool, dict[str, Any]
+    ]:
+        """Execute a step in the environment.
+        Args:
+            action: The action to execute
+        Returns:
+            Any: Result of the step execution
+        """
+        if actions is None or len(actions) == 0:
+            actions = []
+        args = [[action.model_dump() for action in actions]]
+        # TODO: Move this into the server side
+        if self._maybe_store_response(actions):
+            return Observation(text=self.final_response), 0, False, {}
+        result, stdout, stderr = await self.client.invoke(
+            HudStyleConfig(function="step", args=args)
+        )
+        if stdout:
+            logger.info("Step produced stdout: %s", stdout.decode())
+        if stderr:
+            logger.warning("Step produced stderr: %s", stderr.decode())
+        observation = Observation.model_validate(result["observation"], strict=True)
+        return observation, 0, False, {}
+    def _maybe_store_response(self, actions: list[CLA]) -> bool:
+        """Store the final response into the environment.
+        Args:
+            actions: The action(s) to check
+        Returns:
+            bool: True if the response was submitted, False otherwise
+        """
+        if len(actions) > 0 and actions[-1].type == "response":
+            self.final_response = actions[-1].text
+            return True
+        return False
+    async def get_urls(self) -> dict[str, Any]:
+        """Get URLs for the environment.
+        Returns:
+            dict: Dictionary of URLs for accessing the environment
+        """
+        data, _, _ = await self.client.invoke(HudStyleConfig(function="get_urls", args=[]))
+        self.url = data.get("url")
+        self.live_url = data.get("live_url")
+        return {
+            "url": self.url,
+            "live_url": self.live_url,
+        }
+    async def close(self) -> None:
+        """Close the environment.
+        This should release any resources and clean up the environment.
+        """
+        await self.client.close()
+def create_remote_config(
+    env: Environment | None = None,
+    config: HudStyleConfigs | None = None,
+    function: str | None = None,
+) -> list[HudStyleConfig]:
+    """
+    Create a remote configuration for setup or evaluate, determining the final
+    function call structure based on the provided task or explicit config.
+    This function orchestrates how setup and evaluate steps defined in a Task
+    or passed directly are prepared for remote execution via `env._invoke_all`.
+    Args:
+        env: Environment object, potentially containing a task definition.
+             Used to access `env.task` and `env.final_response`.
+        config: Direct configuration override (e.g., passed to `env.evaluate(config=...)`).
+                Can be in various HudStyleConfigs formats.
+        function: The top-level function context, typically "setup" or "evaluate".
+    Returns:
+        list[HudStyleConfig]: A list containing a single HudStyleConfig object
+                              ready for remote invocation via `client.invoke`.
+                              The specific function/arguments are chosen based on this priority:
+                              1. Explicit `config` parameter (if provided).
+                              2. Specific `task` attribute (e.g., `task.evaluate`).
+                              3. General `task.config` dictionary.
+                              4. Default private function using `task.id`
+                              (e.g., `private_evaluate(task.id)`).
+                              5. Base `function` name with minimal/default arguments.
+    Logic & Examples (Assuming `function="evaluate"` for examples):
+        1) Explicit `config` provided: The `config` is expanded and becomes the `args`
+           for the top-level `function` call. If the environment has a final_response,
+           it's appended to these args.
+           - Example Input:
+             `env` (with `final_response="Paris"`)
+             `config=("contains_text", "Paris")`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[
+                HudStyleConfig(function='contains_text', args=['Paris', 'Paris'])
+             ])]`
+        2) No explicit `config`, Task has the attribute (e.g., `task.evaluate`):
+           The Task's attribute value (e.g., `task.evaluate`) is expanded and becomes the `args`
+           for the top-level `function` call. Task ID is added if present. `final_response` is
+           appended if present.
+           - Example Input:
+             `env` (`task=Task(id="t1", evaluate=("check_answer",), ...)`, `final_response="42"`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[HudStyleConfig(function='check_answer',
+                args=['42'], id='t1')])]`
+        3) No explicit `config`, no specific Task attribute, Task has `task.config`:
+           The `task.config` dictionary becomes the single argument for the top-level
+           `function` call. Task ID is added to the config dict if present. `final_response` is
+           appended if present.
+           - Example Input:
+             `env` (with `task=Task(id="t2", config={"expected": "val"}, ...)`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[{"expected": "val", "id": "t2"}])]`
+        4) No explicit `config`, no specific Task attribute, no `task.config`, Task has `task.id`:
+           Calls a private function (`private_<function>`) on the remote end, passing
+           the `task.id` as the only argument.
+           - Example Input:
+             `env` (with `task=Task(id="t3", ...)`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='private_evaluate', args=['t3'])]`
+        5) No explicit `config` and no relevant Task info:
+           Calls the top-level `function` with empty args.
+           - Example Input:
+             `env` (with `task=Task(...)`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[])]`
+    """
+    # If no function provided, just expand the config and return it directly
+    if function is None:
+        if config:
+            return expand_config(config)
+        raise ValueError("Either function or config must be provided")
+    # Case 1: Explicit config provided
+    if config:
+        expanded_configs = expand_config(config)
+        if env and env.final_response:
+            # Ensure args is a list before appending
+            if not isinstance(expanded_configs[0].args, list):
+                 expanded_configs[0].args = [expanded_configs[0].args]
+            expanded_configs[0].args.append(env.final_response) # for remote responses
+        return [HudStyleConfig(function=function, args=expanded_configs)]
+    # Otherwise, use the environment's task
+    task = env.task if env else None
+    # Must have a task for the remaining cases
+    if task is None:
+        raise ValueError("Either task or config must be provided")
+    # Case 2: Task has the specified function attribute
+    task_config = getattr(task, function, None)
+    if task_config:
+        expanded_configs = expand_config(task_config)
+        if task.id:
+            expanded_configs[0].id = task.id # for remote IDs
+        elif env and env.final_response:
+            # Ensure args is a list before appending
+            if not isinstance(expanded_configs[0].args, list):
+                 expanded_configs[0].args = [expanded_configs[0].args]
+            expanded_configs[0].args.append(env.final_response) # for remote responses
+        return [HudStyleConfig(function=function, args=expanded_configs)]
+    # Case 3: Check for task.config
+    if hasattr(task, "config") and task.config:
+        # Ensure task.config is a dictionary before adding id
+        final_args = task.config.copy() if isinstance(task.config, dict) else {}
+        if task.id:
+            final_args["id"] = task.id # for remote IDs
+        if env and env.final_response:
+            # Append response, ensuring args exists and is a list
+            if "args" not in final_args:
+                final_args["args"] = []
+            if not isinstance(final_args["args"], list):
+                final_args["args"] = [final_args["args"]]
+            final_args["args"].append(env.final_response)
+        return [HudStyleConfig(function=function, args=[final_args])]
+    # Case 4: Use task.id
+    if task.id:
+        args_list = [task.id]
+        if env and env.final_response:
+             args_list.append(env.final_response) # Append final response
+        return [HudStyleConfig(function=f"{REMOTE_FUNCTION_PREFIX}{function}", args=args_list)]
+    # Case 5: No valid configuration found
+    args_list = []
+    if env and env.final_response:
+        args_list.append(env.final_response)
+    return [HudStyleConfig(function=function, args=args_list)]

hud/env/local_docker_client.py ADDED Viewed

@@ -0,0 +1,251 @@
+from __future__ import annotations
+import io
+import logging
+import tarfile
+import tempfile
+import uuid
+from typing import TYPE_CHECKING, Any
+import aiodocker
+from aiohttp import ClientTimeout
+from hud.env.docker_client import DockerClient, EnvironmentStatus
+from hud.utils import ExecuteResult
+if TYPE_CHECKING:
+    from aiodocker.containers import DockerContainer
+    from aiodocker.stream import Stream
+logger = logging.getLogger("hud.env.docker_env_client")
+class LocalDockerClient(DockerClient):
+    """
+    Docker-based environment client implementation.
+    """
+    @classmethod
+    async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[
+            LocalDockerClient, dict[str, Any]
+        ]:
+        """
+        Creates a Docker environment client from a dockerfile.
+        Args:
+            dockerfile: The dockerfile content to build the Docker image
+        Returns:
+            DockerClient: An instance of the Docker environment client
+        """
+        # Create a unique image tag
+        image_tag = f"hud-env-{uuid.uuid4().hex[:8]}"
+        # Initialize Docker client
+        docker_client = aiodocker.Docker()
+        # Create fileobj for the Dockerfile
+        dockerfile_fileobj = io.BytesIO(dockerfile.encode("utf-8"))
+        if ports is None:
+            ports = []
+        # Create a tar file from the dockerfile
+        with tempfile.NamedTemporaryFile() as f:
+            with tarfile.open(mode="w:gz", fileobj=f) as t:
+                dfinfo = tarfile.TarInfo("Dockerfile")
+                dfinfo.size = len(dockerfile_fileobj.getvalue())
+                dockerfile_fileobj.seek(0)
+                t.addfile(dfinfo, dockerfile_fileobj)
+            # Reset the file pointer to the beginning of the file
+            f.seek(0)
+            # Build the image
+            build_stream = await docker_client.images.build(
+                fileobj=f,
+                encoding="gzip",
+                tag=image_tag,
+                rm=True,
+                pull=True,
+                forcerm=True,
+            )
+        # Print build output
+        output = ""
+        for chunk in build_stream:
+            if "stream" in chunk:
+                logger.info(chunk["stream"])
+                output += chunk["stream"]
+        # Create and start the container
+        container_config = {
+            "Image": image_tag,
+            "Tty": True,
+            "OpenStdin": True,
+            "Cmd": None,
+            "HostConfig": {
+                "PublishAllPorts": True,
+            },
+            "ExposedPorts": {
+                f"{port}/tcp": {} for port in ports
+            },
+        }
+        container = await docker_client.containers.create(config=container_config)
+        await container.start()
+        # Return the controller instance
+        return cls(docker_client, container.id), {"build_output": output}
+    def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
+        """
+        Initialize the DockerClient.
+        Args:
+            docker_conn: Docker client connection
+            container_id: ID of the Docker container to control
+        """
+        super().__init__()
+        # Store container ID instead of container object
+        self._container_id = container_id
+        # Docker client will be initialized when needed
+        self._docker = docker_conn
+    @property
+    def container_id(self) -> str:
+        """Get the container ID."""
+        return self._container_id
+    @container_id.setter
+    def container_id(self, value: str) -> None:
+        """Set the container ID."""
+        self._container_id = value
+    async def _get_container(self) -> DockerContainer:
+        """Get the container object from aiodocker."""
+        return await self._docker.containers.get(self.container_id)
+    async def get_status(self) -> EnvironmentStatus:
+        """
+        Get the current status of the Docker environment.
+        Returns:
+            EnvironmentStatus: The current status of the environment
+        """
+        try:
+            container = await self._get_container()
+            container_data = await container.show()
+            # Check the container state
+            state = container_data.get("State", {})
+            status = state.get("Status", "").lower()
+            if status == "running":
+                return EnvironmentStatus.RUNNING
+            elif status == "created" or status == "starting":
+                return EnvironmentStatus.INITIALIZING
+            elif status in ["exited", "dead", "removing", "paused"]:
+                return EnvironmentStatus.COMPLETED
+            else:
+                # Any other state is considered an error
+                return EnvironmentStatus.ERROR
+        except Exception:
+            # If we can't connect to the container or there's any other error
+            return EnvironmentStatus.ERROR
+    async def execute(
+        self,
+        command: list[str],
+        *,
+        timeout: int | None = None,
+    ) -> ExecuteResult:
+        """
+        Execute a command in the container.
+        Args:
+            command: Command to execute
+            workdir: Working directory for the command
+        Returns:
+            ExecuteResult: Result of the command execution
+        """
+        container = await self._get_container()
+        exec_result = await container.exec(
+            cmd=command,
+        )
+        output: Stream = exec_result.start(timeout=ClientTimeout(timeout), detach=False)
+        stdout_data = bytearray()
+        stderr_data = bytearray()
+        while True:
+            message = await output.read_out()
+            if message is None:
+                break
+            if message.stream == 1:  # stdout
+                stdout_data.extend(message.data)
+            elif message.stream == 2:  # stderr
+                stderr_data.extend(message.data)
+        return ExecuteResult(
+            stdout=bytes(stdout_data),
+            stderr=bytes(stderr_data),
+            # TODO: Get the exit code from the output
+            exit_code=0,
+        )
+    async def get_archive(self, path: str) -> bytes:
+        """
+        Get an archive of a path from the container.
+        Args:
+            path: Path in the container to archive
+        Returns:
+            bytes: Tar archive containing the path contents
+        """
+        container = await self._get_container()
+        tarfile = await container.get_archive(path)
+        # we know tarfile has fileobj BytesIO
+        # read the tarfile into a bytes object
+        fileobj = tarfile.fileobj
+        if not isinstance(fileobj, io.BytesIO):
+            raise TypeError("fileobj is not a BytesIO object")
+        return fileobj.getvalue()
+    async def put_archive(self, path: str, data: bytes) -> None:
+        """
+        Put an archive of data at a path in the container.
+        Args:
+            path: Path in the container to extract the archive to
+            data: Bytes of the tar archive to extract
+        Returns:
+            bool: True if successful
+        """
+        container = await self._get_container()
+        # Convert bytes to a file-like object for aiodocker
+        file_obj = io.BytesIO(data)
+        await container.put_archive(path=path, data=file_obj)
+    async def close(self) -> None:
+        """
+        Close the Docker environment by stopping and removing the container.
+        """
+        try:
+            container = await self._get_container()
+            await container.stop()
+            await container.delete()
+        except Exception as e:
+            # Log the error but don't raise it since this is cleanup
+            logger.warning("Error during Docker container cleanup: %s", e)
+        finally:
+            await self._docker.close()

hud-python 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

hud-python 0.1.5py3-none-any.whl → 0.2.1py3-none-any.whl