PyPI - hud-python - Versions diffs - 0.1.0b1__tar.gz → 0.1.0b3__tar.gz - Mend

hud-python 0.1.0b1tar.gz → 0.1.0b3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (50) hide show

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.1.0b1
+Version: 0.1.0b3
 Summary: SDK for the HUD evaluation platform.
 Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
 Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
@@ -95,7 +95,7 @@ async def main():
     evalset = await client.load_evalset(id="OSWorld-Ubuntu")
     # Create a run and environment
-    run = client.create_run(name="example-run", gym=gym, evalset=evalset)
+    run = await client.create_run(name="example-run", gym=gym, evalset=evalset)
     env = await run.make(metadata={"agent_id": "OSWORLD-1"})
     await env.wait_for_ready()

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/README.md RENAMED Viewed

@@ -38,7 +38,7 @@ async def main():
     evalset = await client.load_evalset(id="OSWorld-Ubuntu")
     # Create a run and environment
-    run = client.create_run(name="example-run", gym=gym, evalset=evalset)
+    run = await client.create_run(name="example-run", gym=gym, evalset=evalset)
     env = await run.make(metadata={"agent_id": "OSWORLD-1"})
     await env.wait_for_ready()

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/agent/response_agent.py RENAMED Viewed

@@ -1,4 +1,3 @@
-import json
 import os
 import openai
 from typing import Literal, Optional
@@ -11,7 +10,7 @@ class ResponseAgent:
         if not self.api_key:
             raise ValueError("OpenAI API key must be provided or set as OPENAI_API_KEY environment variable")
-        self.client = openai.Client(api_key=self.api_key)
+        self.async_client = openai.AsyncClient(api_key=self.api_key)
         self.system_prompt = """
         You are an assistant that helps determine the appropriate response to an agent's message.
@@ -28,9 +27,9 @@ class ResponseAgent:
         Respond ONLY with one of these two options.
         """
-    def determine_response(self, agent_message: str) -> ResponseType:
+    async def determine_response(self, agent_message: str) -> ResponseType:
         try:
-            response = self.client.chat.completions.create(
+            response = await self.async_client.chat.completions.create(
                 model="gpt-4o",
                 messages=[
                     {"role": "system", "content": self.system_prompt},

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/docs/api-reference/client.mdx RENAMED Viewed

@@ -125,7 +125,7 @@ if run:
 ### create_run
 ```python
-create_run(
+async create_run(
     name: str,
     gym: Gym,
     evalset: EvalSet,
@@ -150,7 +150,7 @@ Creates a new run.
 **Example:**
 ```python
-run = client.create_run(
+run = await client.create_run(
     name="example-run",
     gym=gym,
     evalset=evalset,

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/docs/concepts/environment.mdx RENAMED Viewed

@@ -17,7 +17,7 @@ from hud import HUDClient
 client = HUDClient(api_key="your-api-key")
 gym = await client.load_gym(id="OSWorld-Ubuntu")
 evalset = await client.load_evalset(id="OSWorld-Ubuntu")
-run = client.create_run(name="example-run", gym=gym, evalset=evalset)
+run = await client.create_run(name="example-run", gym=gym, evalset=evalset)
 # Create environment
 env = await run.make(metadata={"agent_id": "example"})

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/docs/concepts/gym.mdx RENAMED Viewed

@@ -43,7 +43,7 @@ Each gym has the following properties:
 Gyms are used when creating a run:
 ```python
-run = client.create_run(name="my-run", gym=gym, evalset=evalset)
+run = await client.create_run(name="my-run", gym=gym, evalset=evalset)
 ```
 This associates the run with the specific environment defined by the gym.

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/docs/examples/basic.mdx RENAMED Viewed

@@ -59,7 +59,7 @@ Create a run to execute tasks:
 ```python
 # Create a run
-run = client.create_run(
+run = await client.create_run(
     name="example-run",
     gym=gym,
     evalset=evalset,
@@ -133,7 +133,7 @@ async def main():
     evalset = await client.load_evalset(id="OSWorld-Ubuntu")
     # Create a run and get tasks
-    run = client.create_run(name="example-run", gym=gym, evalset=evalset)
+    run = await client.create_run(name="example-run", gym=gym, evalset=evalset)
     tasks = await run.fetch_task_ids()
     # Create environment and wait for it to be ready

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/docs/examples/custom-agent.mdx RENAMED Viewed

@@ -96,7 +96,7 @@ async def main():
     evalset = await client.load_evalset(id="OSWorld-Ubuntu")
     # Create the run
-    run = client.create_run(name="simple-agent-run", gym=gym, evalset=evalset)
+    run = await client.create_run(name="simple-agent-run", gym=gym, evalset=evalset)
     tasks = await run.fetch_task_ids()
     # Initialize the agent and adapter

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/docs/introduction.mdx RENAMED Viewed

@@ -45,7 +45,7 @@ async def main():
     evalset = await client.load_evalset(id="OSWorld-Ubuntu")
     # Create a run
-    run = client.create_run(name="example-run", gym=gym, evalset=evalset)
+    run = await client.create_run(name="example-run", gym=gym, evalset=evalset)
     # And more...

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/docs/quickstart.mdx RENAMED Viewed

@@ -31,7 +31,7 @@ async def main():
     evalset = await client.load_evalset(id="OSWorld-Ubuntu")
     # Create a run and get tasks
-    run = client.create_run(name="example-run", gym=gym, evalset=evalset)
+    run = await client.create_run(name="example-run", gym=gym, evalset=evalset)
     tasks = await run.fetch_task_ids()
     # Create environment and wait for it to be ready

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/examples/claude_osworld.ipynb RENAMED Viewed

@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -28,7 +28,6 @@
     "\n",
     "# initalize Claude Computer Use agent\n",
     "anthropic = Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))\n",
-    "agent = ClaudeAgent(anthropic)\n",
     "\n",
     "# initialize adapter to interact with the environment\n",
     "cua_adapter = ClaudeAdapter()"
@@ -36,9 +35,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total tasks in OSWorld: 368\n"
+     ]
+    }
+   ],
    "source": [
     "# load OSWorld environment\n",
     "gym = await client.load_gym(id=\"OSWorld-Ubuntu\")\n",
@@ -47,7 +54,7 @@
     "evalset = await client.load_evalset(id=\"OSWorld-Ubuntu\")\n",
     "\n",
     "# create a run that will host all evaluations\n",
-    "run = client.create_run(name=\"Claude-test-OSWorld\", gym=gym, evalset=evalset)\n",
+    "run = await client.create_run(name=\"Claude-test-OSWorld\", gym=gym, evalset=evalset)\n",
     "\n",
     "# fetch all task ids from the run\n",
     "tasks = await run.fetch_task_ids()\n",
@@ -56,11 +63,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Task description: Can you make my computer bring back the last tab I shut down?\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <div style=\"width: 960px; height: 540px; overflow: hidden;\">\n",
+       "            <div style=\"transform: scale(0.5); transform-origin: top left;\">\n",
+       "                <iframe src=\"http://18.212.230.156:5910/vnc.html\" width=\"1920\" height=\"1080\" style=\"border: 1px solid #ddd;\">\n",
+       "                </iframe>\n",
+       "            </div>\n",
+       "        </div>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
-    "# it may take around 3 minutes to initialize the OSWorld platform and reset to a task\n",
+    "# it may take around 1-2 minutes to initialize the OSWorld platform and reset to a task\n",
     "\n",
     "# make a HUD environment\n",
     "env = await run.make()\n",
@@ -77,11 +111,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent's action: {'action': 'key', 'text': 'ctrl+shift+t'}\n",
+      "Step 1 completed\n"
+     ]
+    }
+   ],
    "source": [
     "# agent loop\n",
+    "agent = ClaudeAgent(anthropic)\n",
+    "\n",
     "for i in range(8):\n",
     "    # rescale screenshot to Claude's resolution\n",
     "    screenshot = cua_adapter.rescale(obs.screenshot)\n",
@@ -107,9 +152,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Evaluation result: 1.0\n"
+     ]
+    }
+   ],
    "source": [
     "# evaluate environment state\n",
     "result = await env.evaluate()\n",
@@ -118,6 +171,34 @@
     "# close environment\n",
     "await env.close()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Run: Claude-test-OSWorld (ID: 7c0df152-e799-4ec6-ac2e-8a4e0aaa5b99)\n",
+      "Created: 2025-03-10 02:45:28\n",
+      "------------------------------------------------------------\n",
+      "Progress: 1/1 tasks completed (\n",
+      "            100.0% completion rate)\n",
+      "\n",
+      "Status Distribution:\n",
+      "completed : ██████████████████████████████████████████████████ 1 (100.0%)\n",
+      "\n",
+      "Average Score: 1.00\n",
+      "Score:  1.00/1.00\n"
+     ]
+    }
+   ],
+   "source": [
+    "analytics = await run.get_analytics()\n",
+    "print(analytics)"
+   ]
   }
  ],
  "metadata": {

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/hud/__init__.py RENAMED Viewed

@@ -9,7 +9,7 @@ from hud.environment import Environment, EvalSet, Observation, TaskResult
 from hud.gym import Gym
 from hud.run import Run
-__version__ = "0.1.0b1"
+__version__ = "0.1.0b3"
 __all__ = [
     "Environment",

{hud_python-0.1.0b1 → hud_python-0.1.0b3}/hud/adapters/claude/__init__.py RENAMED Viewed

@@ -3,4 +3,3 @@ from __future__ import annotations
 from .adapter import ClaudeAdapter
 __all__ = ["ClaudeAdapter"]

hud_python-0.1.0b3/hud/adapters/common/types.py ADDED Viewed

@@ -0,0 +1,293 @@
+from __future__ import annotations
+from typing import Annotated, Literal, Union
+from pydantic import BaseModel, Field
+# Base class for all actions
+class CLAAction(BaseModel):
+    type: str
+# Basic Point model for coordinates
+class Point(BaseModel):
+    x: int
+    y: int
+# CLICK ACTION (supports extra options)
+class ClickAction(CLAAction):
+    type: Literal["click"] = "click"
+    point: Point | None = None
+    selector: str | None = None
+    button: Literal["left", "right", "wheel", "back", "forward"] = "left"
+    pattern: list[int] | None = None  # [delay_1, delay_2, ...]
+    hold_keys: list[CLAKey] | None = None
+# PRESS ACTION for key presses/hotkeys
+class PressAction(CLAAction):
+    type: Literal["press"] = "press"
+    keys: list[CLAKey]
+# TYPE ACTION for text typing
+class TypeAction(CLAAction):
+    type: Literal["type"] = "type"
+    text: str
+    enter_after: bool | None = False
+# SCROLL ACTION
+class ScrollAction(CLAAction):
+    type: Literal["scroll"] = "scroll"
+    point: Point | None = None
+    scroll: Point | None = None
+    hold_keys: list[CLAKey] | None = None
+# MOVE ACTION for mouse movement
+class MoveAction(CLAAction):
+    type: Literal["move"] = "move"
+    point: Point | None = None
+    selector: str | None = None
+    offset: Point | None = None
+# WAIT ACTION
+class WaitAction(CLAAction):
+    type: Literal["wait"] = "wait"
+    time: int  # in milliseconds
+# DRAG ACTION
+class DragAction(CLAAction):
+    type: Literal["drag"] = "drag"
+    path: list[Point]
+    pattern: list[int] | None = None  # [delay_1, delay_2, ...]
+    hold_keys: list[CLAKey] | None = None
+# SCREENSHOT ACTION
+class ScreenshotFetch(CLAAction):
+    type: Literal["screenshot"] = "screenshot"
+class PositionFetch(CLAAction):
+    type: Literal["position"] = "position"
+# Union of all possible actions
+CLA = Annotated[
+    Union[
+        ClickAction,
+        PressAction,
+        TypeAction,
+        ScrollAction,
+        MoveAction,
+        WaitAction,
+        DragAction,
+        ScreenshotFetch,
+        PositionFetch,
+    ],
+    Field(discriminator="type"),
+]
+CLAKey = Literal[
+    # Control keys
+    "backspace",
+    "tab",
+    "enter",
+    "shift",
+    "shiftleft",
+    "shiftright",
+    "ctrl",
+    "ctrlleft",
+    "ctrlright",
+    "alt",
+    "altleft",
+    "altright",
+    "pause",
+    "capslock",
+    "esc",
+    "escape",
+    "space",
+    "pageup",
+    "pagedown",
+    "end",
+    "home",
+    "left",
+    "up",
+    "right",
+    "down",
+    "select",
+    "print",
+    "execute",
+    "printscreen",
+    "prtsc",
+    "insert",
+    "delete",
+    "help",
+    "sleep",
+    # Special keys
+    "numlock",
+    "scrolllock",
+    "clear",
+    "separator",
+    "modechange",
+    "apps",
+    "browserback",
+    "browserfavorites",
+    "browserforward",
+    "browserhome",
+    "browserrefresh",
+    "browsersearch",
+    "browserstop",
+    "launchapp1",
+    "launchapp2",
+    "launchmail",
+    "launchmediaselect",
+    "playpause",
+    "stop",
+    "prevtrack",
+    "nexttrack",
+    "volumemute",
+    "volumeup",
+    "volumedown",
+    "zoom",
+    # Modifier keys
+    "win",
+    "winleft",
+    "winright",
+    "command",
+    "option",
+    "optionleft",
+    "optionright",
+    "fn",
+    # Numpad keys
+    "num0",
+    "num1",
+    "num2",
+    "num3",
+    "num4",
+    "num5",
+    "num6",
+    "num7",
+    "num8",
+    "num9",
+    "multiply",
+    "add",
+    "subtract",
+    "decimal",
+    "divide",
+    # Function keys
+    "f1",
+    "f2",
+    "f3",
+    "f4",
+    "f5",
+    "f6",
+    "f7",
+    "f8",
+    "f9",
+    "f10",
+    "f11",
+    "f12",
+    "f13",
+    "f14",
+    "f15",
+    "f16",
+    "f17",
+    "f18",
+    "f19",
+    "f20",
+    "f21",
+    "f22",
+    "f23",
+    "f24",
+    # Language-specific keys
+    "hanguel",
+    "hangul",
+    "hanja",
+    "kana",
+    "kanji",
+    "junja",
+    "convert",
+    "nonconvert",
+    "yen",
+    # Characters
+    "\t",
+    "\n",
+    "\r",
+    " ",
+    "!",
+    '"',
+    "#",
+    "$",
+    "%",
+    "&",
+    "'",
+    "(",
+    ")",
+    "*",
+    "+",
+    ",",
+    "-",
+    ".",
+    "/",
+    "0",
+    "1",
+    "2",
+    "3",
+    "4",
+    "5",
+    "6",
+    "7",
+    "8",
+    "9",
+    ":",
+    ";",
+    "<",
+    "=",
+    ">",
+    "?",
+    "@",
+    "[",
+    "\\",
+    "]",
+    "^",
+    "_",
+    "`",
+    "a",
+    "b",
+    "c",
+    "d",
+    "e",
+    "f",
+    "g",
+    "h",
+    "i",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "o",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "u",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+    "{",
+    "|",
+    "}",
+    "~",
+]

hud-python 0.1.0b1__tar.gz → 0.1.0b3__tar.gz

Potentially problematic release.

hud-python 0.1.0b1tar.gz → 0.1.0b3tar.gz