PyPI - hud-python - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.0b2__py3-none-any.whl - Mend

hud-python 0.1.0py3-none-any.whl → 0.1.0b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (9) hide show

hud/__init__.py +3 -3
hud/client.py +19 -4
hud/{env.py → environment.py} +41 -2
hud/run.py +62 -9
hud/server/requests.py +98 -11
{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/METADATA +32 -20
{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/RECORD +9 -9
{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/WHEEL +0 -0
{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/licenses/LICENSE +0 -0

hud/__init__.py CHANGED Viewed

@@ -5,14 +5,14 @@ HUD Gym SDK - A Python SDK for interacting with HUD environments.
 from __future__ import annotations
 from hud.client import HUDClient
-from hud.env import Env, EvalSet, Observation, TaskResult
+from hud.environment import Environment, EvalSet, Observation, TaskResult
 from hud.gym import Gym
 from hud.run import Run
-__version__ = "0.1.0"
+__version__ = "0.1.0b2"
 __all__ = [
-    "Env",
+    "Environment",
     "EvalSet",
     "Gym",
     "HUDClient",

hud/client.py CHANGED Viewed

@@ -8,7 +8,7 @@ import json
 from typing import Any
 from .adapters.common import Adapter
-from .env import EvalSet
+from .environment import EvalSet
 from .gym import Gym
 from .run import Run, RunResponse
 from .server import make_request, make_sync_request
@@ -23,15 +23,15 @@ class HUDClient:
     evalsets, and create runs.
     """
-    def __init__(self, api_key: str) -> None:
+    def __init__(self, api_key: str | None = None) -> None:
         """
         Initialize the HUD client with an API key.
         Args:
             api_key: API key for authentication with the HUD API
         """
-        self.api_key = api_key
-        settings.api_key = api_key  # Set global config
+        self.api_key = api_key or settings.api_key
+        settings.api_key = self.api_key
     async def load_gym(self, id: str) -> Gym:
         """
@@ -182,3 +182,18 @@ class HUDClient:
             config=config,
             metadata=metadata,
         )
+    def display_stream(self, live_url: str) -> None:
+        """
+        Display a stream in the HUD system.
+        """
+        from IPython.display import HTML, display
+        html_content = f"""
+        <div style="width: 960px; height: 540px; overflow: hidden;">
+            <div style="transform: scale(0.5); transform-origin: top left;">
+                <iframe src="{live_url}" width="1920" height="1080" style="border: 1px solid #ddd;">
+                </iframe>
+            </div>
+        </div>
+        """
+        display(HTML(html_content))

hud/{env.py → environment.py} RENAMED Viewed

@@ -1,5 +1,8 @@
 from __future__ import annotations
+import asyncio
+import enum
+import logging
 from typing import TYPE_CHECKING, Any
 from pydantic import BaseModel
@@ -10,6 +13,7 @@ from hud.settings import settings
 if TYPE_CHECKING:
     from .adapters.common import Adapter
+logger = logging.getLogger("hud.environment")
 class Observation(BaseModel):
     """
@@ -38,8 +42,29 @@ class TaskResult(BaseModel):
     terminated: bool
     info: dict[str, Any]
+class EnvironmentStatus(str, enum.Enum):
+    """
+    Status of the environment.
+    Attributes:
+        INITIALIZING: The environment is initializing
+        RUNNING: The environment is running
+        COMPLETED: The environment is completed
+        ERROR: The environment is in an error state
+    """
+    INITIALIZING = "initializing"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    ERROR = "error"
+status_messages = {
+    EnvironmentStatus.RUNNING.value: "is running",
+    EnvironmentStatus.ERROR.value: "had an error initializing",
+    EnvironmentStatus.COMPLETED.value: "completed",
+}
-class Env:
+class Environment:
     """
     Environment interface for agent interactions.
@@ -192,7 +217,9 @@ class Env:
             api_key=settings.api_key,
         )
-    async def reset(self, task_id: str, metadata: dict[str, Any] | None = None) -> Observation:
+    async def reset(
+        self, task_id: str, metadata: dict[str, Any] | None = None
+    ) -> Observation:
         """
         Reset the environment to the task.
@@ -213,6 +240,18 @@ class Env:
         )
         return Observation(**data["observation"])
+    async def wait_for_ready(self) -> None:
+        """Wait for the environment to be ready"""
+        while True:
+            state = await self.get_env_state()
+            if state in (
+                EnvironmentStatus.RUNNING.value,
+                EnvironmentStatus.ERROR.value,
+                EnvironmentStatus.COMPLETED.value,
+            ):
+                logger.info("Environment %s %s", self.id, status_messages.get(state))
+                break
+            await asyncio.sleep(10)
 class EvalSet:
     """

hud/run.py CHANGED Viewed

@@ -1,16 +1,17 @@
 from __future__ import annotations
+import datetime
 from typing import TYPE_CHECKING, Any
 from pydantic import BaseModel, Field
 from .adapters.common import Adapter
-from .env import Env, EvalSet
+from .environment import Environment, EvalSet
 from .server import make_request
 from .settings import settings
 if TYPE_CHECKING:
-    from datetime import datetime
+    import datetime
     from .gym import Gym
@@ -61,11 +62,63 @@ class RunAnalyticsResponse(BaseModel):
     total_tasks: int
     completed_tasks: int
     running_time: float | None = None  # runtime in seconds if available
-    created_at: datetime
+    created_at: datetime.datetime
     raw_data: dict[str, list[dict[str, Any]]] = Field(
         default_factory=lambda: {"tasks": [], "environments": []}
     )
+    def __str__(self) -> str:
+        return self.visualize()
+    def visualize(self) -> str:
+        """
+        Generate an ASCII bar chart visualization of run analytics.
+        Args:
+            data: The run analytics data to visualize
+        Returns:
+            A string containing an ASCII visualization
+        """
+        max_width = 50
+        completion_rate = (
+            self.completion_rate if self.completion_rate is not None else 0
+        )
+        result = [
+            f"Run: {self.name} (ID: {self.id})",
+            f"Created: {self.created_at.strftime('%Y-%m-%d %H:%M:%S')}",
+            "-" * 60,
+            f"""Progress: {self.completed_tasks}/{self.total_tasks} tasks completed (
+            {completion_rate:.1f}% completion rate)""",
+            "",
+        ]
+        result.append("Status Distribution:")
+        total = sum(self.status_counts.values())
+        for status, count in self.status_counts.items():
+            percentage = (count / total) * 100
+            bar_length = int((count / total) * max_width)
+            bar = "█" * bar_length
+            result.append(f"{status.ljust(10)}: {bar} {count} ({percentage:.1f}%)")
+        if self.avg_score is not None:
+            result.append("")
+            result.append(f"Average Score: {self.avg_score:.2f}")
+            score_bar_length = int((self.avg_score / 100) * max_width)
+            score_bar = "█" * score_bar_length
+            result.append(f"Score: {score_bar} {self.avg_score:.2f}/1.00")
+        if self.running_time is not None:
+            hours, remainder = divmod(self.running_time, 3600)
+            minutes, seconds = divmod(remainder, 60)
+            runtime_str = f"{int(hours)}h {int(minutes)}m {int(seconds)}s"
+            result.append(f"Total Runtime: {runtime_str}")
+        return "\n".join(result)
 class Run:
     """
@@ -109,7 +162,7 @@ class Run:
         self.adapter = adapter
         self.config = config
         self.metadata = metadata
-        self.envs: list[Env] = []
+        self.environments: list[Environment] = []
     async def fetch_task_ids(self) -> list[str]:
         """
@@ -120,7 +173,7 @@ class Run:
         """
         return await self.evalset.fetch_tasks()
-    async def make(self, metadata: dict[str, Any]) -> Env:
+    async def make(self, metadata: dict[str, Any] | None = None) -> Environment:
         """
         Create a new environment for this run.
@@ -128,17 +181,17 @@ class Run:
             metadata: Metadata for the environment
         Returns:
-            Env: The created environment
+            Environment: The created environment
         """
         # Make the env class
-        env = Env(
+        env = Environment(
             run_id=self.id,
             config=self.config,
             adapter=self.adapter,
-            metadata=metadata,
+            metadata=metadata or {},
         )
         await env.create_environment()
-        self.envs.append(env)
+        self.environments.append(env)
         return env
     async def get_analytics(self) -> RunAnalyticsResponse:

hud/server/requests.py CHANGED Viewed

@@ -4,16 +4,87 @@ HTTP request utilities for the HUD API.
 from __future__ import annotations
+import logging
 from typing import Any
 import httpx
+logger = logging.getLogger("hud.http")
 class RequestError(Exception):
-    """
-    Custom exception for API request errors.
-    """
+    """Custom exception for API request errors"""
+    def __init__(
+        self,
+        message: str,
+        status_code: int | None = None,
+        response_text: str | None = None,
+        response_json: dict[str, Any] | None = None,
+        response_headers: dict[str, str] | None = None
+    ) -> None:
+        self.message = message
+        self.status_code = status_code
+        self.response_text = response_text
+        self.response_json = response_json
+        self.response_headers = response_headers
+        super().__init__(message)
+    def __str__(self) -> str:
+        parts = [self.message]
+        if self.status_code:
+            parts.append(f"Status: {self.status_code}")
+        if self.response_text:
+            parts.append(f"Response Text: {self.response_text}")
+        if self.response_json:
+            parts.append(f"Response JSON: {self.response_json}")
+        if self.response_headers:
+            parts.append(f"Headers: {self.response_headers}")
+        return " | ".join(parts)
+    @classmethod
+    def from_http_error(cls, error: httpx.HTTPStatusError) -> RequestError:
+        """Create a RequestError from an HTTP error response"""
+        response = error.response
+        status_code = response.status_code
+        response_text = response.text
+        response_headers = dict(response.headers)
+        # Try to get detailed error info from JSON if available
+        response_json = None
+        try:
+            response_json = response.json()
+            detail = response_json.get("detail")
+            if detail:
+                message = f"Request failed: {detail}"
+            else:
+                # If no detail field but we have JSON, include a summary
+                message = f"Request failed with status {status_code}"
+                if (
+                    len(response_json) <= 5
+                ):  # If it's a small object, include it in the message
+                    message += f" - JSON response: {response_json}"
+        except Exception:
+            # Fallback to simple message if JSON parsing fails
+            message = f"Request failed with status {status_code}"
+        # Log the error details
+        logger.error(
+            "HTTP error from HUD SDK: %s | URL: %s | Status: %s | Response: %s%s",
+            message,
+            response.url,
+            status_code,
+            response_text[:500],
+            "..." if len(response_text) > 500 else ""
+        )
+        return cls(
+            message=message,
+            status_code=status_code,
+            response_text=response_text,
+            response_json=response_json,
+            response_headers=response_headers,
+        )
 async def make_request(
     method: str, url: str, json: Any | None = None, api_key: str | None = None
@@ -40,11 +111,19 @@ async def make_request(
     async with httpx.AsyncClient(timeout=240.0) as client:
         try:
-            response = await client.request(method=method, url=url, json=json, headers=headers)
+            response = await client.request(
+                method=method, url=url, json=json, headers=headers
+            )
             response.raise_for_status()
-            return response.json()
-        except httpx.HTTPError as e:
-            raise RequestError(f"Request failed: {e!s}") from None
+            result = response.json()
+            return result
+        except httpx.HTTPStatusError as e:
+            raise RequestError.from_http_error(e) from None
+        except httpx.RequestError as e:
+            raise RequestError(f"Network error: {e!s}") from None
+        except Exception as e:
+            # Catch-all for unexpected errors
+            raise RequestError(f"Unexpected error: {e!s}") from None
 def make_sync_request(
@@ -72,8 +151,16 @@ def make_sync_request(
     with httpx.Client(timeout=240.0) as client:
         try:
-            response = client.request(method=method, url=url, json=json, headers=headers)
+            response = client.request(
+                method=method, url=url, json=json, headers=headers
+            )
             response.raise_for_status()
-            return response.json()
-        except httpx.HTTPError as e:
-            raise RequestError(f"Request failed: {e!s}") from None
+            result = response.json()
+            return result
+        except httpx.HTTPStatusError as e:
+            raise RequestError.from_http_error(e) from None
+        except httpx.RequestError as e:
+            raise RequestError(f"Network error: {e!s}") from None
+        except Exception as e:
+            # Catch-all for unexpected errors
+            raise RequestError(f"Unexpected error: {e!s}") from None

{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.1.0
+Version: 0.1.0b2
 Summary: SDK for the HUD evaluation platform.
 Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
 Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
@@ -44,6 +44,7 @@ Requires-Dist: pydantic-settings<3,>=2
 Requires-Dist: pydantic<3,>=2
 Provides-Extra: dev
 Requires-Dist: anthropic; extra == 'dev'
+Requires-Dist: dotenv; extra == 'dev'
 Requires-Dist: ipykernel; extra == 'dev'
 Requires-Dist: ipython<9; extra == 'dev'
 Requires-Dist: jupyter-client; extra == 'dev'
@@ -54,38 +55,40 @@ Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
 Requires-Dist: ruff==0.9.8; extra == 'dev'
 Description-Content-Type: text/markdown
-# HUD SDK (Alpha Release)
+# HUD
-A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
+A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
-Visit [hud.so](https://hud.so) for more information about HUD.
-> **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is still evolving and may change in future releases as we gather feedback and improve functionality.
+> **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
 [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
-[📚 Documentation](https://docs.hud.so) | [🏠 Homepage](https://hud.so)
+[📚 Documentation](https://documentation.hud.so) | [🏠 Homepage](https://hud.so)
+## Quick start
-## Quick Start
+[RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
+Otherwise, install the package with Python>=3.9:
 ```bash
-# Install the latest stable release
 pip install hud-python
+```
-# Install the latest alpha release (may include breaking changes)
-pip install --pre hud-python
-# Install a specific alpha version
-pip install hud-python==0.1.0-alpha
+Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
+```bash
+HUD_API_KEY=<your-api-key>
 ```
+Load in your agent and create a run! Go to the [examples](https://github.com/Human-Data/hud-sdk/tree/main/examples) folder for more examples.
 ```python
 import asyncio
 from hud import HUDClient
 async def main():
     # Initialize client with API key
-    client = HUDClient(api_key="your-api-key")
+    client = HUDClient(api_key=os.getenv("HUD_API_KEY"))
     # Load a gym and evaluation set
     gym = await client.load_gym(id="OSWorld-Ubuntu")
@@ -93,24 +96,33 @@ async def main():
     # Create a run and environment
     run = client.create_run(name="example-run", gym=gym, evalset=evalset)
-    env = await run.make(metadata={"agent_id": "example"})
+    env = await run.make(metadata={"agent_id": "OSWORLD-1"})
+    await env.wait_for_ready()
+    ###
+    ### Agent loop goes here, see example in /examples
+    ###
-    # Agent loop goes here
-    # For complete examples and usage guides, see our documentation
+    # Evaluate the environment
+    result = await env.evaluate()
     # Close the environment when done
     await env.close()
+    # Get analytics for the run such as rewards, task completions, etc.
+    analytics = await run.get_analytics()
+    print(analytics)
 if __name__ == "__main__":
     asyncio.run(main())
 ```
-## Key Features
+## Features
 - Connect to HUD evaluation environments
 - Run benchmarks across various tasks
 - Support for different agent adapters
-- Asynchronous API for efficient interaction
+- Asynchronous API
 ## Documentation

{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-hud/__init__.py,sha256=8o5QRfwUHXHoXvjzLM1HLZ3-DWpViMTumNexqVcXgRA,390
-hud/client.py,sha256=JNJGsuzBNDRShZ8OKSGeuABZZYpKwE_XC1lVbRMaUE8,5262
-hud/env.py,sha256=mgVbOpRX8ilG4BiIAN4ZFhzY5TAJBB9HX5LUh0FIcII,7383
+hud/__init__.py,sha256=GmX-LujM2oZR6_tP_mOW09BY8HeK41lLF-P0sMW_1pY,416
+hud/client.py,sha256=ztWPiAJyJUdJxdxGqDmsQnVK-_jccinWQUUXmq0OOmY,5843
+hud/environment.py,sha256=R-t-21V0gveuHL6LlVLnBp0gYGm8tn5FbLcq_rRlH9g,8587
 hud/gym.py,sha256=dKmf0Ol0-XyLhji034pF_5dXnhW1IgIr-dJUg4KfslE,475
-hud/run.py,sha256=rxWtw1Pgm-KysxB2b_aqW4hVjBn5X0FkhD1489P3g8Y,4633
+hud/run.py,sha256=_K7POPjJyqcJ_DVLAO7hRmvLUcg9gg2KrLHw_26DB9I,6570
 hud/settings.py,sha256=FbZHI1q6bDHe7Awl32SDPb-syqtkLI3C7gIIXuMXCiQ,1045
 hud/adapters/__init__.py,sha256=y3H7yMl7rC-rrXG2WvePdSojoNFSui02eYTH17Xd7OY,87
 hud/adapters/claude/__init__.py,sha256=GsMxaBL5ZuKV6-jJsLfw23n_Ml9e88SXIddYDGkIUKE,101
@@ -11,11 +11,11 @@ hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0m
 hud/adapters/common/adapter.py,sha256=SCtOuRjW5Szzd45LXCaqDEaKr2lhA-nIqSEMJ9KLsKI,5799
 hud/adapters/common/types.py,sha256=LlWxH9sWucYgnIv6DKrgqToh3k7Bu-xdTxNFU4L8Xg8,1962
 hud/server/__init__.py,sha256=HeIXBGb-bxtq3xF20jP4IrOy77PlsqhClOf3bZ9wrwI,169
-hud/server/requests.py,sha256=kEMWt3k1DrvWa4iO1RyzD7PI0tEW29vkQzElAxMjHsQ,2240
+hud/server/requests.py,sha256=M_pK1oCd4QjIE0yguD6iaybJ_mempOWDQYEpdOkophU,5522
 hud/utils/__init__.py,sha256=0m8klSLnMLeIJT23ipBXfFACk4hNWPsA6ZNqZDpv6oY,99
 hud/utils/config.py,sha256=dze0BGE4q14omjj9822kL9BeiIgWQvJyuU29A2wa1SE,193
 hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hud_python-0.1.0.dist-info/METADATA,sha256=F0D0V3taE7Bvtdzg_AgvaxWQrDppQm4pX_406SeQWXo,4663
-hud_python-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hud_python-0.1.0.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
-hud_python-0.1.0.dist-info/RECORD,,
+hud_python-0.1.0b2.dist-info/METADATA,sha256=5skHs5IfSJP4DQAGzpuo_yjO7l65XxHIPWGitHQt0Ug,5140
+hud_python-0.1.0b2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hud_python-0.1.0b2.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
+hud_python-0.1.0b2.dist-info/RECORD,,

{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/WHEEL RENAMED Viewed

File without changes

{hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hud-python 0.1.0__py3-none-any.whl → 0.1.0b2__py3-none-any.whl

Potentially problematic release.

hud-python 0.1.0py3-none-any.whl → 0.1.0b2py3-none-any.whl