PyPI - hud-python - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

hud-python 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (46) hide show

hud/__init__.py +16 -12
hud/adapters/__init__.py +4 -2
hud/adapters/claude/adapter.py +0 -1
hud/adapters/common/adapter.py +11 -10
hud/adapters/common/types.py +27 -13
hud/adapters/operator/__init__.py +5 -0
hud/adapters/operator/adapter.py +93 -0
hud/agent/__init__.py +7 -0
hud/agent/base.py +109 -0
hud/agent/claude.py +187 -0
hud/agent/operator.py +190 -0
hud/env/__init__.py +11 -0
hud/env/client.py +35 -0
hud/env/docker_client.py +306 -0
hud/env/environment.py +181 -0
hud/env/local_docker_client.py +249 -0
hud/env/remote_client.py +185 -0
hud/env/remote_docker_client.py +221 -0
hud/evaluators/__init__.py +10 -0
hud/evaluators/base.py +31 -0
hud/evaluators/inspect.py +29 -0
hud/evaluators/judge.py +213 -0
hud/evaluators/match.py +163 -0
hud/evaluators/remote.py +78 -0
hud/gym.py +101 -15
hud/job.py +185 -0
hud/server/__init__.py +2 -2
hud/server/requests.py +87 -0
hud/settings.py +13 -2
hud/task.py +133 -0
hud/taskset.py +95 -0
hud/trajectory.py +90 -0
hud/types.py +65 -0
hud/utils/__init__.py +4 -2
hud/utils/common.py +69 -0
hud/utils/config.py +182 -4
hud/utils/telemetry.py +67 -0
hud_python-0.2.0.dist-info/METADATA +188 -0
hud_python-0.2.0.dist-info/RECORD +44 -0
{hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/licenses/LICENSE +1 -1
hud/client.py +0 -200
hud/environment.py +0 -318
hud/run.py +0 -208
hud_python-0.1.5.dist-info/METADATA +0 -125
hud_python-0.1.5.dist-info/RECORD +0 -21
{hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/WHEEL +0 -0

hud/env/environment.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""Base classes for environment implementations."""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel
+from hud.env.client import Client
+from hud.env.remote_client import RemoteClient
+from hud.task import Task
+from hud.utils import HudStyleConfigs, expand_config
+from hud.utils.config import REMOTE_EVALUATE, REMOTE_SETUP, HudStyleConfig, create_remote_config
+if TYPE_CHECKING:
+    from hud.adapters.common import CLA
+logger = logging.getLogger("hud.environment")
+class Observation(BaseModel):
+    """
+    Observation from the environment.
+    Attributes:
+        screenshot: Base64 encoded PNG string of the screen
+        text: Text observation, if available
+    """
+    screenshot: str | None = None  # base64 string png
+    text: str | None = None
+class Environment(BaseModel):
+    """
+    Environment base class that provides common functionality for all environment implementations.
+    This class uses the primitives provided by EnvClient to implement core environment operations.
+    """
+    metadata: dict[str, Any]
+    client: Client
+    url: str | None = None
+    live_url: str | None = None
+    # The task id to use for the environment reset
+    task: Task | None = None
+    build_data: dict[str, Any]
+    async def _invoke_all(self, configs: HudStyleConfigs) -> list[Any]:
+        # Execute each config and collect results
+        configs_all = [configs] if not isinstance(configs, list) else configs
+        results = []
+        for config in configs_all:
+            for expanded_config in expand_config(config):
+                result, stdout, stderr = await self.client.invoke(expanded_config)
+                results.append(result)
+                if stdout:
+                    logger.info(
+                        "%s produced stdout:\n%s",
+                        expanded_config.function,
+                        stdout.decode(),
+                    )
+                if stderr:
+                    logger.warning(
+                        "%s produced stderr:\n%s",
+                        expanded_config.function,
+                        stderr.decode(),
+                    )
+        return results
+    async def _setup(self, config: HudStyleConfigs | None = None) -> None:
+        """
+        Setup the environment.
+        Args:
+            config: The configuration to use for the setup
+        """
+        if isinstance(self.client, RemoteClient):
+            await self._invoke_all(create_remote_config(self.task, config, REMOTE_SETUP))
+        else:
+            if config is not None:
+                await self._invoke_all(config)
+            elif self.task and self.task.config is not None:
+                await self._invoke_all(self.task.config)
+            else:
+                raise ValueError("No config or task provided for local environment")
+    async def evaluate(self, config: HudStyleConfigs | None = None) -> Any:
+        """
+        Evaluate the environment.
+        Args:
+            config: The configuration to use for the evaluation
+        Returns:
+            Any: Result of the evaluation
+        """
+        if isinstance(self.client, RemoteClient):
+            results = await self._invoke_all(
+                create_remote_config(self.task, config, REMOTE_EVALUATE))
+        else:
+            if config is not None:
+                results = await self._invoke_all(config)
+            elif self.task and self.task.config is not None:
+                results = await self._invoke_all(self.task.config)
+            else:
+                raise ValueError("No config or task provided for local environment")
+        if len(results) == 1:
+            return results[0]
+        else:
+            return results
+    async def reset(self, configs: HudStyleConfigs | None = None) -> tuple[
+        Observation, dict[str, Any]
+    ]:
+        """
+        Reset the environment.
+        Args:
+            configs: The configuration to use for the reset
+        Returns:
+            Observation: The first observation from the environment
+            info: Dictionary of information about the environment
+        """
+        #await self._setup(configs)
+        obs, _, _, info = await self.step()
+        if self.task and self.task.prompt:
+            obs.text = self.task.prompt
+        return obs, info
+    async def step(self, actions: list[CLA] | None = None) -> tuple[
+        Observation, float, bool, dict[str, Any]
+    ]:
+        """Execute a step in the environment.
+        Args:
+            action: The action to execute
+        Returns:
+            Any: Result of the step execution
+        """
+        if actions is None or len(actions) == 0:
+            actions = []
+        result, stdout, stderr = await self.client.invoke(
+            HudStyleConfig(function="step", args=[[action.model_dump() for action in actions]])
+        )
+        if stdout:
+            logger.info("Step produced stdout: %s", stdout.decode())
+        if stderr:
+            logger.warning("Step produced stderr: %s", stderr.decode())
+        observation = Observation.model_validate(result["observation"], strict=True)
+        return observation, 0, False, {}
+    async def get_urls(self) -> dict[str, Any]:
+        """Get URLs for the environment.
+        Returns:
+            dict: Dictionary of URLs for accessing the environment
+        """
+        data, _, _ = await self.client.invoke(HudStyleConfig(function="get_urls", args=[]))
+        self.url = data.get("url")
+        self.live_url = data.get("live_url")
+        return {
+            "url": self.url,
+            "live_url": self.live_url,
+        }
+    async def close(self) -> None:
+        """Close the environment.
+        This should release any resources and clean up the environment.
+        """
+        await self.client.close()

hud/env/local_docker_client.py ADDED Viewed

@@ -0,0 +1,249 @@
+from __future__ import annotations
+import io
+import logging
+import tarfile
+import tempfile
+import uuid
+from typing import TYPE_CHECKING, Any
+import aiodocker
+from aiohttp import ClientTimeout
+from hud.env.docker_client import DockerClient, EnvironmentStatus
+from hud.utils import ExecuteResult
+if TYPE_CHECKING:
+    from aiodocker.containers import DockerContainer
+    from aiodocker.stream import Stream
+logger = logging.getLogger("hud.env.docker_env_client")
+class LocalDockerClient(DockerClient):
+    """
+    Docker-based environment client implementation.
+    """
+    @classmethod
+    async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[LocalDockerClient, dict[str, Any]]:
+        """
+        Creates a Docker environment client from a dockerfile.
+        Args:
+            dockerfile: The dockerfile content to build the Docker image
+        Returns:
+            DockerClient: An instance of the Docker environment client
+        """
+        # Create a unique image tag
+        image_tag = f"hud-env-{uuid.uuid4().hex[:8]}"
+        # Initialize Docker client
+        docker_client = aiodocker.Docker()
+        # Create fileobj for the Dockerfile
+        dockerfile_fileobj = io.BytesIO(dockerfile.encode("utf-8"))
+        if ports is None:
+            ports = []
+        # Create a tar file from the dockerfile
+        with tempfile.NamedTemporaryFile() as f:
+            with tarfile.open(mode="w:gz", fileobj=f) as t:
+                dfinfo = tarfile.TarInfo("Dockerfile")
+                dfinfo.size = len(dockerfile_fileobj.getvalue())
+                dockerfile_fileobj.seek(0)
+                t.addfile(dfinfo, dockerfile_fileobj)
+            # Reset the file pointer to the beginning of the file
+            f.seek(0)
+            # Build the image
+            build_stream = await docker_client.images.build(
+                fileobj=f,
+                encoding="gzip",
+                tag=image_tag,
+                rm=True,
+                pull=True,
+                forcerm=True,
+            )
+        # Print build output
+        output = ""
+        for chunk in build_stream:
+            if "stream" in chunk:
+                logger.info(chunk["stream"])
+                output += chunk["stream"]
+        # Create and start the container
+        container_config = {
+            "Image": image_tag,
+            "Tty": True,
+            "OpenStdin": True,
+            "Cmd": None,
+            "HostConfig": {
+                "PublishAllPorts": True,
+            },
+            "ExposedPorts": {
+                f"{port}/tcp": {} for port in ports
+            },
+        }
+        container = await docker_client.containers.create(config=container_config)
+        await container.start()
+        # Return the controller instance
+        return cls(docker_client, container.id), {"build_output": output}
+    def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
+        """
+        Initialize the DockerClient.
+        Args:
+            docker_conn: Docker client connection
+            container_id: ID of the Docker container to control
+        """
+        super().__init__()
+        # Store container ID instead of container object
+        self._container_id = container_id
+        # Docker client will be initialized when needed
+        self._docker = docker_conn
+    @property
+    def container_id(self) -> str:
+        """Get the container ID."""
+        return self._container_id
+    @container_id.setter
+    def container_id(self, value: str) -> None:
+        """Set the container ID."""
+        self._container_id = value
+    async def _get_container(self) -> DockerContainer:
+        """Get the container object from aiodocker."""
+        return await self._docker.containers.get(self.container_id)
+    async def get_status(self) -> EnvironmentStatus:
+        """
+        Get the current status of the Docker environment.
+        Returns:
+            EnvironmentStatus: The current status of the environment
+        """
+        try:
+            container = await self._get_container()
+            container_data = await container.show()
+            # Check the container state
+            state = container_data.get("State", {})
+            status = state.get("Status", "").lower()
+            if status == "running":
+                return EnvironmentStatus.RUNNING
+            elif status == "created" or status == "starting":
+                return EnvironmentStatus.INITIALIZING
+            elif status in ["exited", "dead", "removing", "paused"]:
+                return EnvironmentStatus.COMPLETED
+            else:
+                # Any other state is considered an error
+                return EnvironmentStatus.ERROR
+        except Exception:
+            # If we can't connect to the container or there's any other error
+            return EnvironmentStatus.ERROR
+    async def execute(
+        self,
+        command: list[str],
+        *,
+        timeout: int | None = None,
+    ) -> ExecuteResult:
+        """
+        Execute a command in the container.
+        Args:
+            command: Command to execute
+            workdir: Working directory for the command
+        Returns:
+            ExecuteResult: Result of the command execution
+        """
+        container = await self._get_container()
+        exec_result = await container.exec(
+            cmd=command,
+        )
+        output: Stream = exec_result.start(timeout=ClientTimeout(timeout), detach=False)
+        stdout_data = bytearray()
+        stderr_data = bytearray()
+        while True:
+            message = await output.read_out()
+            if message is None:
+                break
+            if message.stream == 1:  # stdout
+                stdout_data.extend(message.data)
+            elif message.stream == 2:  # stderr
+                stderr_data.extend(message.data)
+        return ExecuteResult(
+            stdout=bytes(stdout_data),
+            stderr=bytes(stderr_data),
+            # TODO: Get the exit code from the output
+            exit_code=0,
+        )
+    async def get_archive(self, path: str) -> bytes:
+        """
+        Get an archive of a path from the container.
+        Args:
+            path: Path in the container to archive
+        Returns:
+            bytes: Tar archive containing the path contents
+        """
+        container = await self._get_container()
+        tarfile = await container.get_archive(path)
+        # we know tarfile has fileobj BytesIO
+        # read the tarfile into a bytes object
+        fileobj = tarfile.fileobj
+        if not isinstance(fileobj, io.BytesIO):
+            raise TypeError("fileobj is not a BytesIO object")
+        return fileobj.getvalue()
+    async def put_archive(self, path: str, data: bytes) -> None:
+        """
+        Put an archive of data at a path in the container.
+        Args:
+            path: Path in the container to extract the archive to
+            data: Bytes of the tar archive to extract
+        Returns:
+            bool: True if successful
+        """
+        container = await self._get_container()
+        # Convert bytes to a file-like object for aiodocker
+        file_obj = io.BytesIO(data)
+        await container.put_archive(path=path, data=file_obj)
+    async def close(self) -> None:
+        """
+        Close the Docker environment by stopping and removing the container.
+        """
+        try:
+            container = await self._get_container()
+            await container.stop()
+            await container.delete()
+        except Exception as e:
+            # Log the error but don't raise it since this is cleanup
+            logger.warning("Error during Docker container cleanup: %s", e)
+        finally:
+            await self._docker.close()

hud/env/remote_client.py ADDED Viewed

@@ -0,0 +1,185 @@
+from __future__ import annotations
+import logging
+from base64 import b64decode
+from typing import TYPE_CHECKING, Any
+from hud.env.client import Client
+from hud.server import make_request
+from hud.settings import settings
+from hud.types import EnvironmentStatus
+from hud.utils import ExecuteResult
+if TYPE_CHECKING:
+    from hud.utils.config import HudStyleConfig
+logger = logging.getLogger("hud.env.remote_env_client")
+class RemoteClient(Client):
+    """
+    Remote environment client implementation.
+    Uses the HUD API to manage a remote environment.
+    """
+    @classmethod
+    async def create(
+        cls,
+        *,
+        gym_id: str | None = None,
+        job_id: str | None = None,
+        task_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> tuple[RemoteClient, dict[str, Any]]:
+        """
+        Creates a remote environment client from a dockerfile or gym_id.
+        Args:
+            dockerfile: The dockerfile content to build the environment
+            gym_id: The gym_id of the environment to create
+            metadata: Metadata to associate with the environment
+        Returns:
+            RemoteClient: An instance of the remote environment client
+        """
+        # Validate arguments
+        if metadata is None:
+            metadata = {}
+        request_data = {
+            # still named run_id for backwards compatibility
+            "run_id": job_id,
+            "metadata": metadata,
+            "gym_id": gym_id,
+            "task_id": task_id,
+        }
+        # Create a new environment via the HUD API
+        response = await make_request(
+            method="POST",
+            url=f"{settings.base_url}/v2/create_environment",
+            json=request_data,
+            api_key=settings.api_key,
+        )
+        # Get the environment ID from the response
+        env_id = response.get("id")
+        if not env_id:
+            raise ValueError("Failed to create remote environment: No ID returned")
+        # Create the controller instance
+        controller = cls(env_id)
+        build_data = response.get("metadata", {})
+        return controller, build_data
+    def __init__(self, env_id: str) -> None:
+        """
+        Initialize the RemoteClient.
+        Args:
+            env_id: ID of the remote environment to control
+        """
+        super().__init__()
+        self._env_id = env_id
+    @property
+    def env_id(self) -> str:
+        """The ID of the remote environment."""
+        return self._env_id
+    async def get_status(self) -> EnvironmentStatus:
+        """
+        Get the current status of the remote environment.
+        Returns:
+            EnvironmentStatus: The current status of the environment
+        """
+        try:
+            response = await make_request(
+                method="GET",
+                url=f"{settings.base_url}/v2/environments/{self.env_id}/state",
+                api_key=settings.api_key,
+            )
+            logger.debug("Environment status response: %s", response)
+            status = response.get("state", "").lower()
+            if status == "running":
+                return EnvironmentStatus.RUNNING
+            elif status == "initializing" or status == "pending":
+                return EnvironmentStatus.INITIALIZING
+            elif status == "completed" or status == "terminated":
+                return EnvironmentStatus.COMPLETED
+            else:
+                # Any other status is considered an error
+                logger.warning("Abnormal environment status response: %s", response)
+                return EnvironmentStatus.ERROR
+        except Exception:
+            # If we can't connect to the API or there's any other error
+            logger.info("(potentially transient) Error getting environment status")
+            return EnvironmentStatus.ERROR
+    async def execute(
+        self,
+        command: list[str],
+        *,
+        workdir: str | None = None,
+        timeout: float | None = None,
+    ) -> ExecuteResult:
+        """
+        Execute a command in the environment.
+        No-op in some environments (like browser use).
+        Args:
+            command: Command to execute
+            workdir: Working directory for the command (ignored for remote environments)
+        Returns:
+            ExecuteResult: Result of the command execution
+        """
+        data = await make_request(
+            method="POST",
+            url=f"{settings.base_url}/v2/environments/{self.env_id}/execute",
+            json={
+               "command": command,
+               "workdir": workdir,
+               "timeout": timeout,
+            },
+            api_key=settings.api_key,
+        )
+        return ExecuteResult(
+            stdout=b64decode(data["stdout"]),
+            stderr=b64decode(data["stderr"]),
+            exit_code=data["exit_code"]
+        )
+    async def invoke(self, config: HudStyleConfig) -> tuple[Any, bytes, bytes]:
+        """
+        Invoke a function in the environment.
+        """
+        data = await make_request(
+            method="POST",
+            url=f"{settings.base_url}/v2/environments/{self.env_id}/invoke",
+            json=config.model_dump(),
+            api_key=settings.api_key,
+        )
+        return data["result"], b64decode(data["stdout"]), b64decode(data["stderr"])
+    async def close(self) -> None:
+        """
+        Close the remote environment by making a request to the server.
+        """
+        await make_request(
+            method="POST",
+            url=f"{settings.base_url}/v2/environments/{self.env_id}/close",
+            api_key=settings.api_key,
+        )

hud-python 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

hud-python 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl