PyPI - hud-python - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

hud-python 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (46) hide show

hud/__init__.py +16 -12
hud/adapters/__init__.py +4 -2
hud/adapters/claude/adapter.py +0 -1
hud/adapters/common/adapter.py +11 -10
hud/adapters/common/types.py +27 -13
hud/adapters/operator/__init__.py +5 -0
hud/adapters/operator/adapter.py +93 -0
hud/agent/__init__.py +7 -0
hud/agent/base.py +109 -0
hud/agent/claude.py +187 -0
hud/agent/operator.py +190 -0
hud/env/__init__.py +11 -0
hud/env/client.py +35 -0
hud/env/docker_client.py +306 -0
hud/env/environment.py +181 -0
hud/env/local_docker_client.py +249 -0
hud/env/remote_client.py +185 -0
hud/env/remote_docker_client.py +221 -0
hud/evaluators/__init__.py +10 -0
hud/evaluators/base.py +31 -0
hud/evaluators/inspect.py +29 -0
hud/evaluators/judge.py +213 -0
hud/evaluators/match.py +163 -0
hud/evaluators/remote.py +78 -0
hud/gym.py +101 -15
hud/job.py +185 -0
hud/server/__init__.py +2 -2
hud/server/requests.py +87 -0
hud/settings.py +13 -2
hud/task.py +133 -0
hud/taskset.py +95 -0
hud/trajectory.py +90 -0
hud/types.py +65 -0
hud/utils/__init__.py +4 -2
hud/utils/common.py +69 -0
hud/utils/config.py +182 -4
hud/utils/telemetry.py +67 -0
hud_python-0.2.0.dist-info/METADATA +188 -0
hud_python-0.2.0.dist-info/RECORD +44 -0
{hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/licenses/LICENSE +1 -1
hud/client.py +0 -200
hud/environment.py +0 -318
hud/run.py +0 -208
hud_python-0.1.5.dist-info/METADATA +0 -125
hud_python-0.1.5.dist-info/RECORD +0 -21
{hud_python-0.1.5.dist-info → hud_python-0.2.0.dist-info}/WHEEL +0 -0

hud/taskset.py ADDED Viewed

@@ -0,0 +1,95 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from pydantic import BaseModel
+from hud.server import make_request
+from hud.settings import settings
+from hud.task import Task
+if TYPE_CHECKING:
+    from inspect_ai.dataset import Dataset
+class TaskSet(BaseModel):
+    """
+    Collection of related tasks for benchmarking.
+    Attributes:
+        id: Unique identifier for the taskset
+        description: Description of the taskset
+        tasks: List of Task objects in the taskset
+    """
+    id: str | None = None
+    description: str | None = None
+    tasks: list[Task] = []
+    def __getitem__(self, index: int) -> Task:
+        """
+        Allows accessing tasks by index using square bracket notation.
+        Args:
+            index: The index of the task to retrieve
+        Returns:
+            Task: The task at the specified index
+        Raises:
+            IndexError: If the index is out of range
+        """
+        return self.tasks[index]
+    def __len__(self) -> int:
+        """
+        Returns the number of tasks in the taskset.
+        Returns:
+            int: The number of tasks in the taskset
+        """
+        return len(self.tasks)
+async def load_taskset(taskset_id: str, api_key: str | None = None) -> TaskSet:
+    """
+    Loads a TaskSet by its ID.
+    Args:
+        taskset_id: The ID of the taskset to load
+        api_key: Optional API key to use for the request
+    Returns:
+        TaskSet: The loaded taskset
+    """
+    if api_key is None:
+        api_key = settings.api_key
+    data = await make_request(
+        method="GET",
+        url=f"{settings.base_url}/v2/tasksets/{taskset_id}/tasks",
+        api_key=api_key,
+    )
+    return TaskSet.model_validate({
+        "id": taskset_id,
+        "tasks": data["evalset"],
+    })
+def load_from_inspect(dataset: Dataset) -> TaskSet:
+    """
+    Creates a TaskSet from an inspect-ai dataset.
+    Args:
+        dataset: An inspect-ai dataset
+    Returns:
+        TaskSet: A new TaskSet instance
+    """
+    tasks = [Task.from_inspect_sample(sample) for sample in dataset]
+    return TaskSet(
+        id=None,
+        tasks=tasks,
+        description=dataset.name,
+    )

hud/trajectory.py ADDED Viewed

@@ -0,0 +1,90 @@
+# ruff: noqa: T201
+from __future__ import annotations
+import datetime
+from IPython.display import HTML, Markdown, display
+from pydantic import BaseModel, Field
+class TrajectoryStep(BaseModel):
+    """Model representing a single task run's trajectory information."""
+    observation_url: str | None = None
+    observation_text: str | None = None
+    actions: list[dict]
+    start_timestamp: str | None = None
+    end_timestamp: str | None = None
+class Trajectory(BaseModel):
+    """Model representing a single task run's trajectory information."""
+    id: str
+    reward: float | None = None
+    logs: str | None = None
+    error: str | None = None
+    trajectory: list[TrajectoryStep] = Field(default_factory=list)
+    def display(self) -> None:
+        trajectory_start_timestamp_str = self.trajectory[0].start_timestamp
+        t_start_dt = (
+            datetime.datetime.fromisoformat(
+                trajectory_start_timestamp_str.replace("Z", "+00:00")
+            )
+            if trajectory_start_timestamp_str
+            else None
+        )
+        for i, step in enumerate(self.trajectory):
+            # Use Markdown for better step separation in Jupyter
+            display(Markdown(f"### Step {i + 1}"))
+            # Observation Image
+            if step.observation_url:
+                try:
+                    # Display in Jupyter/IPython environment using HTML
+                    display(Markdown("**Observation Image:**"))
+                    display(HTML(f'<img src="{step.observation_url}" style="max-width:100%;"/>'))
+                    display(Markdown(f"[Image Link]({step.observation_url})"))
+                except Exception as e:
+                    print(f"    [Error processing image: {e}]")
+            elif not step.observation_text: # Only print if no image AND no text
+                 print("    No visual or text observation provided.")
+            # Observation Text
+            if step.observation_text:
+                print(f"    Observation Text: {step.observation_text}")
+            # Actions
+            print(f"\n    Actions: {step.actions}") # Added newline for spacing
+            # Duration
+            duration_str = "N/A"
+            step_start_timestamp = self.trajectory[i].start_timestamp
+            step_end_timestamp = self.trajectory[i].end_timestamp
+            if step_start_timestamp and step_end_timestamp and t_start_dt:
+                try:
+                    # Attempt to parse timestamps (assuming ISO format)
+                    start_dt = datetime.datetime.fromisoformat(
+                        step_start_timestamp.replace("Z", "+00:00")
+                    )
+                    end_dt = datetime.datetime.fromisoformat(
+                        step_end_timestamp.replace("Z", "+00:00")
+                    )
+                    duration = end_dt - start_dt
+                    total_seconds = duration.total_seconds()
+                    minutes = int(total_seconds // 60)
+                    seconds = total_seconds % 60
+                    duration_str = f"{minutes}m {seconds:.2f}s"
+                    # Calculate the total duration up to this step
+                    total_duration = end_dt - t_start_dt
+                    total_minutes = int(total_duration.total_seconds() // 60)
+                    total_seconds = total_duration.total_seconds() % 60
+                    total_duration_str = f"{total_minutes}m {total_seconds:.2f}s"
+                except ValueError:
+                    duration_str = "Error parsing timestamps" # Handle potential format issues
+            print(f"    Step Duration: {duration_str}")
+            print(f"    Total Duration: {total_duration_str}")
+            display(Markdown("---")) # Use Markdown horizontal rule

hud/types.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+import enum
+from pathlib import Path
+from typing import Any, Literal
+from pydantic import BaseModel
+class CustomGym(BaseModel):
+    """
+    Public environment specification with a dockerfile and controller.
+    If the location is remote, the env will be created on the server.
+    If the location is dev, the env will be created locally via docker.
+    The dockerfile can be specified directly or automatically found in the controller_source_dir.
+    If neither is provided, an error will be raised during validation.
+    """
+    type: Literal["public"] = "public"
+    dockerfile: str | None = None
+    location: Literal["local", "remote"]
+    ports: list[int] | None = None
+    # If path, then it is a development environment on the local computer
+    # If none, then the controller must be installed in the environment through the dockerfile
+    # Can be provided as a string or Path object
+    controller_source_dir: str | Path | None = None
+    def model_post_init(self, __context: Any, /) -> None:
+        """Validate and set up dockerfile if not explicitly provided."""
+        # Convert string path to Path object if needed
+        if isinstance(self.controller_source_dir, str):
+            self.controller_source_dir = Path(self.controller_source_dir)
+        if self.dockerfile is None:
+            if self.controller_source_dir is None:
+                raise ValueError("Either dockerfile or controller_source_dir must be provided")
+            # Look for Dockerfile in the controller_source_dir
+            dockerfile_path = self.controller_source_dir / "Dockerfile"
+            if not dockerfile_path.exists():
+                raise ValueError(f"Dockerfile not found in {self.controller_source_dir}")
+            # Read the Dockerfile content
+            self.dockerfile = dockerfile_path.read_text()
+# Strings are identifiers for gyms on the HUD server
+Gym = CustomGym | str
+class EnvironmentStatus(str, enum.Enum):
+    """
+    Status of the environment.
+    Attributes:
+        INITIALIZING: The environment is initializing
+        RUNNING: The environment is running
+        COMPLETED: The environment is completed
+        ERROR: The environment is in an error state
+    """
+    INITIALIZING = "initializing"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    ERROR = "error"

hud/utils/__init__.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
-from .config import configuration
+from .common import ExecuteResult
+from .config import HudStyleConfig, HudStyleConfigs, expand_config
+from .telemetry import stream
-__all__ = ["configuration"]
+__all__ = ["ExecuteResult", "HudStyleConfig", "HudStyleConfigs", "expand_config", "stream"]

hud/utils/common.py ADDED Viewed

@@ -0,0 +1,69 @@
+from __future__ import annotations
+import io
+import logging
+import tarfile
+from typing import TYPE_CHECKING, TypedDict
+from hud.server.requests import make_request
+from hud.settings import settings
+if TYPE_CHECKING:
+    from pathlib import Path
+logger = logging.getLogger("hud.utils.common")
+class ExecuteResult(TypedDict):
+    """
+    Result of an execute command.
+    Attributes:
+        stdout: Standard output from the command
+        stderr: Standard error from the command
+        exit_code: Exit code of the command
+    """
+    stdout: bytes
+    stderr: bytes
+    exit_code: int
+def directory_to_tar_bytes(directory_path: Path) -> bytes:
+    """
+    Converts a directory to a tar archive and returns it as bytes.
+    This function creates a tar archive of the specified directory in memory,
+    without writing to a temporary file on disk.
+    Args:
+        path: Path to the directory to convert
+    Returns:
+        Bytes of the tar archive
+    """
+    output = io.BytesIO()
+    with tarfile.open(fileobj=output, mode="w") as tar:
+        # Walk through the directory
+        for file_path in directory_path.rglob("*"):
+            if file_path.is_file():
+                # Calculate relative path for the archive
+                rel_path = file_path.relative_to(directory_path)
+                logger.debug("Adding %s to tar archive", rel_path)
+                tar.add(file_path, arcname=str(rel_path))
+    # Get the bytes from the BytesIO object
+    output.seek(0)
+    return output.getvalue()
+async def get_gym_id(gym_name_or_id: str) -> str:
+    """
+    Get the gym ID for a given gym name or ID.
+    """
+    data = await make_request(
+        method="GET",
+        url=f"{settings.base_url}/v1/gyms/{gym_name_or_id}",
+        api_key=settings.api_key,
+    )
+    return data["id"]

hud/utils/config.py CHANGED Viewed

@@ -1,7 +1,185 @@
 from __future__ import annotations
-from hud.settings import settings
+import logging
+import re
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from hud.task import Task
+logger = logging.getLogger("hud.utils.config")
+REMOTE_FUNCTION_PREFIX = "private_"
+REMOTE_SETUP = "setup"
+REMOTE_EVALUATE = "evaluate"
+class HudStyleConfig(BaseModel):
+    function: str  # Format: "x.y.z"
+    args: list[Any] # Must be json serializable
+    id: str | None = None # Optional id for remote execution
+    def __len__(self) -> int:
+        return len(self.args)
+    def __getitem__(self, index: int) -> Any:
+        return self.args[index]
+    def __iter__(self) -> Iterator[Any]:
+        return iter(self.args)
+    def __str__(self) -> str:
+        return f"{self.function}: {', '.join(str(arg) for arg in self.args)}"
+# Type alias for the shorthand config, which just converts to function name and args
+ShorthandConfig = tuple[str | dict[str, Any] | list[str] | list[dict[str, Any]], ...]
+# Type alias for multiple config formats
+HudStyleConfigs = ShorthandConfig | HudStyleConfig | list[HudStyleConfig] | dict[str, Any] | str
+def _is_valid_python_name(name: str) -> bool:
+    """Check if a string is a valid Python identifier."""
+    return bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name))
+def _validate_hud_config(config: dict) -> HudStyleConfig:
+    """Validate and convert a dictionary to an HudStyleConfig."""
+    if not isinstance(config.get("function"), str):
+        raise ValueError("function must be a string")
+    # Validate function path components
+    _split_and_validate_path(config["function"])
+    args = config["args"] if isinstance(config.get("args"), list) else [config["args"]]
+    # Create a proper HudStyleConfig object instead of using cast
+    return HudStyleConfig(function=config["function"], args=args, id=config.get("id"))
+def _split_and_validate_path(path: str) -> None:
+    """Split a function path into components, validating each part."""
+    parts = path.split(".")
+    if not parts:
+        raise ValueError("Empty function path")
+    # Validate each part
+    for part in parts:
+        if not _is_valid_python_name(part):
+            raise ValueError(f"Invalid Python identifier in path: {part}")
+def expand_config(config: HudStyleConfigs) -> list[HudStyleConfig]:
+    """
+    Process a config into a standardized list of HudStyleConfig objects.
+    Args:
+        config: Can be:
+            - A tuple where first element is function name and rest are args
+            - A HudStyleConfig object
+            - A dictionary with "function" and "args" keys
+            - A list of HudStyleConfig objects
+    Returns:
+        list[HudStyleConfig]: List of standardized configurations
+    Raises:
+        ValueError: If the configuration format is invalid
+    """
+    logger.debug("Processing config: %s", config)
+    # If it's already a HudStyleConfig, just wrap it in a list
+    if isinstance(config, HudStyleConfig):
+        return [config]
+    # If it's a list of HudStyleConfigs, return as is
+    if isinstance(config, list) and all(isinstance(item, HudStyleConfig) for item in config):
+        return config
+    # Handle dictionary configuration
+    if isinstance(config, dict):
+        return [_validate_hud_config(config)]
+    if isinstance(config, str):
+        return [HudStyleConfig(function=config, args=[])]
+    # Handle tuple format
+    if isinstance(config, tuple):
+        if len(config) < 1 or not isinstance(config[0], str):
+            error_msg = "Invalid tuple configuration. "
+            "Expected tuple[str, ...], got: {type(config)}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        # First element is the function name, rest are args
+        function_name = config[0]
+        args = list(config[1:]) if len(config) > 1 else []
+        return [HudStyleConfig(function=function_name, args=args)]
+    # Unknown configuration type
+    error_msg = f"Unknown configuration type: {type(config)}"
+    logger.error(error_msg)
+    raise ValueError(error_msg)
+def create_remote_config(
+    task: Task | None = None,
+    config: HudStyleConfigs | None = None,
+    function: str | None = None,
+) -> list[HudStyleConfig]:
+    """
+    Create a configuration based on provided inputs.
+    Args:
+        task: Task object with configuration
+        config: Direct configuration (expanded or not)
+        function: Function name to use
+    Returns:
+        list[HudStyleConfig]: List of standardized configurations
+    Logic:
+        1) If explicit config: expand and return HudStyleConfig with func of the function,
+        and args of expanded config
+        2) If task has the specified function defined: use that
+        3) If no task function: check for task._config and use that
+        4) If no _config: use task.id and create private_[function]
+    """
+    # If no function provided, just expand the config and return it directly
+    if function is None:
+        if config:
+            return expand_config(config)
+        raise ValueError("Either function or config must be provided")
+    # Case 1: Explicit config provided
+    if config:
+        expanded_configs = expand_config(config)
+        return [HudStyleConfig(function=function, args=expanded_configs)]
+    # Must have a task for the remaining cases
+    if task is None:
+        raise ValueError("Either task or config must be provided")
+    # Case 2: Task has the specified function attribute
+    task_config = getattr(task, function, None)
+    if task_config and len(task_config) > 0:
+        expanded_configs = expand_config(task_config)
+        if task.id:
+            expanded_configs[0].id = task.id # for remote IDs
+        return [HudStyleConfig(function=function, args=expanded_configs)]
+    # Case 3: Check for _config
+    if hasattr(task, "config") and task.config:
+        if task.id:
+            task.config["id"] = task.id # for remote IDs
+        return [HudStyleConfig(function=function, args=[task.config])]
+    # Case 4: Use task.id
+    if task.id:
+        return [HudStyleConfig(function=f"{REMOTE_FUNCTION_PREFIX}{function}", args=[task.id])]
+    # No valid configuration found
+    #logger.warning("No valid configuration found for function: %s", function)
+    return [HudStyleConfig(function=function, args=[])]
-# For backwards compatibility, keep 'configuration'
-# but have it point to the settings instance
-configuration = settings

hud/utils/telemetry.py ADDED Viewed

@@ -0,0 +1,67 @@
+from __future__ import annotations
+import logging
+logger = logging.getLogger(__name__)
+def stream(live_url: str | None = None) -> str:
+    """
+    Display a stream in the HUD system.
+    """
+    if live_url is None:
+        raise ValueError("live_url cannot be None")
+    from IPython.display import HTML, display
+    html_content = f"""
+    <div style="width: 960px; height: 540px; overflow: hidden;">
+        <div style="transform: scale(0.5); transform-origin: top left;">
+            <iframe src="{live_url}" width="1920" height="1080" style="border: 1px solid #ddd;">
+            </iframe>
+        </div>
+    </div>
+    """
+    try:
+        display(HTML(html_content))
+    except Exception as e:
+        logger.warning(e)
+    return html_content
+def display_screenshot(base64_image: str, width: int = 960, height: int = 540) -> str:
+    """
+    Display a base64-encoded screenshot image.
+    Args:
+        base64_image: Base64-encoded image string (without the data URI prefix)
+        width: Display width in pixels
+        height: Display height in pixels
+    Returns:
+        The HTML string used to display the image
+    Note:
+        This function will both display the image in IPython environments
+        and return the HTML string for other contexts.
+    """
+    from IPython.display import HTML, display
+    # Ensure the base64 image doesn't already have the data URI prefix
+    if base64_image.startswith("data:image"):
+        img_src = base64_image
+    else:
+        img_src = f"data:image/png;base64,{base64_image}"
+    html_content = f"""
+    <div style="width: {width}px; height: {height}px; overflow: hidden; margin: 10px 0; border: 1px solid #ddd;">
+        <img src="{img_src}" style="max-width: 100%; max-height: 100%;">
+    </div>
+    """  # noqa: E501
+    # Display in IPython environments
+    try:
+        display(HTML(html_content))
+    except Exception as e:
+        logger.warning(e)
+    return html_content

hud-python 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

hud-python 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl