PyPI - hud-python - Versions diffs - 0.6.7__py3-none-any.whl → 0.6.8.dev0__py3-none-any.whl - Mend

hud-python 0.6.7py3-none-any.whl → 0.6.8.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

hud/agents/openai_compatible/agent.py CHANGED Viewed

@@ -17,11 +17,13 @@ from hud.types import MCPToolCall, MCPToolResult
 from hud.utils import gateway
 from .tools import (
+    BashTool,
+    EditTool,
     GlobTool,
     GrepTool,
-    ListTool,
     OpenAICompatibleMCPProxyTool,
     ReadTool,
+    WriteTool,
 )
 from .tools.base import format_chat_result
@@ -41,10 +43,12 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
     """OpenAI-compatible agent using the chat.completions protocol."""
     tool_catalog = (
+        BashTool,
         ReadTool,
-        GrepTool,
         GlobTool,
-        ListTool,
+        GrepTool,
+        EditTool,
+        WriteTool,
         OpenAICompatibleMCPProxyTool,
     )

hud/agents/openai_compatible/tools/__init__.py CHANGED Viewed

@@ -2,13 +2,15 @@
 from __future__ import annotations
-from .filesystem import GlobTool, GrepTool, ListTool, ReadTool
+from .filesystem import BashTool, EditTool, GlobTool, GrepTool, ReadTool, WriteTool
 from .mcp_proxy import OpenAICompatibleMCPProxyTool
 __all__ = [
+    "BashTool",
+    "EditTool",
     "GlobTool",
     "GrepTool",
-    "ListTool",
     "OpenAICompatibleMCPProxyTool",
     "ReadTool",
+    "WriteTool",
 ]

hud/agents/openai_compatible/tools/filesystem.py CHANGED Viewed

@@ -1,16 +1,20 @@
-"""OpenAI-compatible filesystem tools — backed by SSHClient."""
+"""OpenAI-compatible OpenCode-style workspace tools backed by SSHClient."""
 from __future__ import annotations
+import math
+import posixpath
 import shlex
 from typing import Any, ClassVar
 import mcp.types as mcp_types
 from hud.agents.tools import SSHTool
-from hud.agents.tools.base import AgentToolSpec, result_text
+from hud.agents.tools.base import AgentToolSpec, result_text, tool_err
 from hud.types import MCPToolResult
+DEFAULT_READ_LIMIT = 2000
 class _FilesystemTool(SSHTool):
     description: ClassVar[str]
@@ -34,16 +38,26 @@ class _FilesystemTool(SSHTool):
 class ReadTool(_FilesystemTool):
     name = "read"
-    description = "Reads a file from the local filesystem. Use offset and limit for pagination."
+    description = (
+        "Reads a file or directory from the workspace. Use offset and limit for pagination."
+    )
     parameters: ClassVar[dict[str, Any]] = {
         "type": "object",
         "properties": {
-            "filePath": {"type": "string", "description": "Absolute path to the file to read."},
+            "filePath": {
+                "type": "string",
+                "description": "The absolute path to the file or directory to read.",
+            },
             "offset": {
                 "type": "integer",
-                "description": "0-based line offset to start reading from.",
+                "description": "The line number to start reading from (1-indexed).",
+                "minimum": 0,
+            },
+            "limit": {
+                "type": "integer",
+                "description": "The maximum number of lines to read (defaults to 2000).",
+                "minimum": 1,
             },
-            "limit": {"type": "integer", "description": "Maximum number of lines to read."},
         },
         "required": ["filePath"],
     }
@@ -52,19 +66,205 @@ class ReadTool(_FilesystemTool):
         path = arguments.get("filePath")
         if not isinstance(path, str) or not path:
             raise ValueError("filePath is required")
+        offset = _read_offset(arguments.get("offset"))
+        limit = _positive_int(arguments.get("limit"), default=DEFAULT_READ_LIMIT, name="limit")
+        if not (await self.bash(f"test -d {shlex.quote(path)}")).isError:
+            return await self._read_directory(path, offset=offset, limit=limit)
         result = await self.file_read(path)
         if result.isError:
             return result
-        offset = arguments.get("offset")
-        limit = arguments.get("limit")
-        if isinstance(offset, int) and offset >= 0:
-            lines = result_text(result).splitlines(keepends=True)
-            end = offset + limit if isinstance(limit, int) and limit > 0 else len(lines)
-            sliced = lines[offset:end]
-            return MCPToolResult(
-                content=[mcp_types.TextContent(type="text", text="".join(sliced))],
+        text = result_text(result)
+        lines = text.splitlines()
+        start = offset - 1
+        if start > len(lines) and not (len(lines) == 0 and offset == 1):
+            return tool_err(f"Offset {offset} is out of range for this file ({len(lines)} lines)")
+        sliced = lines[start : start + limit]
+        last = offset + len(sliced) - 1
+        more = last < len(lines)
+        body = [
+            f"<path>{path}</path>",
+            "<type>file</type>",
+            "<content>",
+            *[f"{i + offset}: {line}" for i, line in enumerate(sliced)],
+        ]
+        if more:
+            body.append(
+                f"\n(Showing lines {offset}-{last} of {len(lines)}. "
+                f"Use offset={last + 1} to continue.)"
+            )
+        else:
+            body.append(f"\n(End of file - total {len(lines)} lines)")
+        body.append("</content>")
+        return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
+    async def _read_directory(self, path: str, *, offset: int, limit: int) -> MCPToolResult:
+        result = await self.file_list(path)
+        if result.isError:
+            return result
+        entries = result_text(result).splitlines()
+        if entries == ["(empty)"]:
+            entries = []
+        start = offset - 1
+        sliced = entries[start : start + limit]
+        truncated = start + len(sliced) < len(entries)
+        body = [
+            f"<path>{path}</path>",
+            "<type>directory</type>",
+            "<entries>",
+            *sliced,
+        ]
+        if truncated:
+            body.append(
+                f"\n(Showing {len(sliced)} of {len(entries)} entries. "
+                f"Use offset={offset + len(sliced)} to continue.)"
             )
-        return result
+        else:
+            body.append(f"\n({len(entries)} entries)")
+        body.append("</entries>")
+        return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
+class BashTool(_FilesystemTool):
+    name = "bash"
+    description = (
+        "Executes a shell command in the workspace. Prefer read, grep, glob, edit, "
+        "and write for filesystem operations."
+    )
+    parameters: ClassVar[dict[str, Any]] = {
+        "type": "object",
+        "properties": {
+            "command": {"type": "string", "description": "The command to execute."},
+            "timeout": {
+                "type": "integer",
+                "description": "Optional timeout in milliseconds.",
+                "minimum": 1,
+            },
+            "workdir": {
+                "type": "string",
+                "description": "The working directory to run the command in.",
+            },
+        },
+        "required": ["command"],
+    }
+    async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
+        command = arguments.get("command")
+        if not isinstance(command, str) or not command:
+            raise ValueError("command is required")
+        timeout = arguments.get("timeout")
+        if timeout is not None:
+            if not isinstance(timeout, int) or timeout < 1:
+                raise ValueError("timeout must be a positive integer")
+            seconds = max(1, math.ceil(timeout / 1000))
+            command = f"timeout {seconds}s bash -lc {shlex.quote(command)}"
+        workdir = arguments.get("workdir")
+        if isinstance(workdir, str) and workdir:
+            command = f"cd {shlex.quote(workdir)} && {command}"
+        return await self.bash(command)
+class EditTool(_FilesystemTool):
+    name = "edit"
+    description = (
+        "Replaces text within a file. Use oldString as exact literal context. "
+        "Set replaceAll to true to replace every occurrence."
+    )
+    parameters: ClassVar[dict[str, Any]] = {
+        "type": "object",
+        "properties": {
+            "filePath": {
+                "type": "string",
+                "description": "The absolute path to the file to modify.",
+            },
+            "oldString": {"type": "string", "description": "The text to replace."},
+            "newString": {
+                "type": "string",
+                "description": "The text to replace it with (must be different from oldString).",
+            },
+            "replaceAll": {
+                "type": "boolean",
+                "description": "Replace all occurrences of oldString (default false).",
+            },
+        },
+        "required": ["filePath", "oldString", "newString"],
+    }
+    async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
+        path = arguments.get("filePath")
+        if not isinstance(path, str) or not path:
+            raise ValueError("filePath is required")
+        old = arguments.get("oldString")
+        new = arguments.get("newString")
+        if not isinstance(old, str):
+            raise ValueError("oldString is required")
+        if not isinstance(new, str):
+            raise ValueError("newString is required")
+        if old == new:
+            return tool_err("No changes to apply: oldString and newString are identical.")
+        if old == "":
+            exists = not (await self.bash(f"test -e {shlex.quote(path)}")).isError
+            if exists:
+                return tool_err(
+                    "oldString cannot be empty when editing an existing file. "
+                    "Provide exact text to replace, or use write for full-file replacement."
+                )
+            mkdir = await self._ensure_parent(path)
+            if mkdir.isError:
+                return mkdir
+            return await self.file_write(path, new)
+        existing = await self.file_read(path)
+        if existing.isError:
+            return existing
+        text = result_text(existing)
+        count = text.count(old)
+        if count == 0:
+            return tool_err(f"oldString not found in {path}")
+        replace_all = arguments.get("replaceAll") is True
+        if count > 1 and not replace_all:
+            return tool_err(f"oldString matches {count} times in {path}; set replaceAll to true")
+        next_text = text.replace(old, new) if replace_all else text.replace(old, new, 1)
+        return await self.file_write(path, next_text)
+    async def _ensure_parent(self, path: str) -> MCPToolResult:
+        parent = posixpath.dirname(path)
+        if not parent or parent in {".", "/"}:
+            return MCPToolResult(content=[])
+        return await self.bash(f"mkdir -p {shlex.quote(parent)}")
+class WriteTool(_FilesystemTool):
+    name = "write"
+    description = "Creates or overwrites a file with the provided content."
+    parameters: ClassVar[dict[str, Any]] = {
+        "type": "object",
+        "properties": {
+            "content": {"type": "string", "description": "The content to write to the file."},
+            "filePath": {
+                "type": "string",
+                "description": "The absolute path to the file to write.",
+            },
+        },
+        "required": ["content", "filePath"],
+    }
+    async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
+        path = arguments.get("filePath")
+        if not isinstance(path, str) or not path:
+            raise ValueError("filePath is required")
+        content = arguments.get("content")
+        if not isinstance(content, str):
+            raise ValueError("content is required")
+        mkdir = await self._ensure_parent(path)
+        if mkdir.isError:
+            return mkdir
+        return await self.file_write(path, content)
+    async def _ensure_parent(self, path: str) -> MCPToolResult:
+        parent = posixpath.dirname(path)
+        if not parent or parent in {".", "/"}:
+            return MCPToolResult(content=[])
+        return await self.bash(f"mkdir -p {shlex.quote(parent)}")
 class GrepTool(_FilesystemTool):
@@ -115,24 +315,18 @@ class GlobTool(_FilesystemTool):
         return await self.bash(f"find {shlex.quote(str(path))} -name {shlex.quote(pattern)}")
-class ListTool(_FilesystemTool):
-    name = "list"
-    description = "Lists files and directories in a given path."
-    parameters: ClassVar[dict[str, Any]] = {
-        "type": "object",
-        "properties": {
-            "path": {"type": "string", "description": "Directory to list."},
-            "ignore": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "Glob patterns to ignore.",
-            },
-        },
-    }
+def _positive_int(value: Any, *, default: int, name: str) -> int:
+    if value is None:
+        return default
+    if not isinstance(value, int) or value < 1:
+        raise ValueError(f"{name} must be a positive integer")
+    return value
-    async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
-        path = arguments.get("path") or "."
-        return await self.file_list(str(path))
+def _read_offset(value: Any) -> int:
+    if value is None or value == 0:
+        return 1
+    return _positive_int(value, default=1, name="offset")
-__all__ = ["GlobTool", "GrepTool", "ListTool", "ReadTool"]
+__all__ = ["BashTool", "EditTool", "GlobTool", "GrepTool", "ReadTool", "WriteTool"]

hud/agents/tests/test_provider_native_tools.py CHANGED Viewed

@@ -7,16 +7,19 @@ client and assert the command translation + result shape, fully offline.
 from __future__ import annotations
-from typing import TYPE_CHECKING, Any, cast
+import shlex
+from typing import Any, cast
 import pytest
 from hud.agents.claude.tools.coding import ClaudeBashTool, ClaudeTextEditorTool
 from hud.agents.gemini.tools.coding import GeminiEditTool, GeminiShellTool
 from hud.agents.openai.tools.coding import OpenAIShellTool
-if TYPE_CHECKING:
-    from hud.capabilities import SSHClient
+from hud.agents.openai_compatible.agent import OpenAIChatAgent
+from hud.agents.openai_compatible.tools import BashTool, EditTool, ReadTool, WriteTool
+from hud.agents.tools.base import result_text
+from hud.agents.types import OpenAIChatConfig
+from hud.capabilities import Capability, SSHClient
 class _Completed:
@@ -61,6 +64,21 @@ class _FakeSFTP:
     def open(self, path: str, mode: str) -> _FakeOpenFile:
         return _FakeOpenFile(self._store, path, mode)
+    async def listdir(self, path: str) -> list[str]:
+        prefix = path.rstrip("/")
+        if not prefix:
+            prefix = "/"
+        if prefix != "/":
+            prefix += "/"
+        names: set[str] = set()
+        for file_path in self._store:
+            if not file_path.startswith(prefix):
+                continue
+            rest = file_path[len(prefix) :]
+            if rest:
+                names.add(rest.split("/", 1)[0])
+        return sorted(names)
 class _Conn:
     def __init__(self, completed: _Completed, store: dict[str, bytes]) -> None:
@@ -70,13 +88,26 @@ class _Conn:
     async def run(self, command: str, check: bool = False) -> _Completed:
         self.commands.append(command)
+        parts = shlex.split(command)
+        if len(parts) == 3 and parts[:2] in (["test", "-d"], ["test", "-e"]):
+            path = parts[2]
+            exists = path in self._store or any(
+                file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
+            )
+            if parts[1] == "-d":
+                exists = any(
+                    file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
+                )
+            return _Completed(exit_status=0 if exists else 1)
+        if len(parts) >= 3 and parts[:2] == ["mkdir", "-p"]:
+            return _Completed(exit_status=0)
         return self._completed
     def start_sftp_client(self) -> _FakeSFTP:
         return _FakeSFTP(self._store)
-class _FakeSSH:
+class _FakeSSH(SSHClient):
     """Duck-typed ``SSHClient``: ``conn.run`` (bash) + ``conn.start_sftp_client`` (files)."""
     def __init__(
@@ -87,7 +118,10 @@ class _FakeSSH:
         files: dict[str, bytes] | None = None,
     ) -> None:
         self.files: dict[str, bytes] = files or {}
-        self.conn = _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)
+        super().__init__(
+            Capability(name="shell", protocol="ssh/2", url="ssh://localhost:22"),
+            cast("Any", _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)),
+        )
 def _ssh(**kwargs: Any) -> SSHClient:
@@ -98,6 +132,11 @@ def _commands(tool: Any) -> list[str]:
     return tool.client.conn.commands
+class _OpenAIChatAgentForTest(OpenAIChatAgent):
+    async def build_tools_for_test(self, ssh: SSHClient) -> tuple[dict[str, Any], list[Any]]:
+        return await self._build_tools({"ssh": ssh})
 # ─── OpenAI shell ─────────────────────────────────────────────────────
@@ -135,6 +174,96 @@ def test_openai_shell_to_params_is_shell_type() -> None:
     assert tool.to_params()["type"] == "shell"
+# ─── OpenAI-compatible OpenCode workspace tools ───────────────────────
+async def test_openai_compatible_catalog_matches_opencode_workspace_tools() -> None:
+    agent = _OpenAIChatAgentForTest(
+        OpenAIChatConfig(model="qwen3.6-plus", model_client=cast("Any", object()))
+    )
+    tools, params = await agent.build_tools_for_test(_ssh())
+    assert list(tools) == ["bash", "read", "glob", "grep", "edit", "write"]
+    assert [param["function"]["name"] for param in params] == [
+        "bash",
+        "read",
+        "glob",
+        "grep",
+        "edit",
+        "write",
+    ]
+async def test_openai_compatible_bash_uses_workdir_and_timeout() -> None:
+    tool = BashTool(spec=BashTool.default_spec("qwen"), client=_ssh())
+    await tool.execute({"command": "echo hi", "workdir": "/tmp/my dir", "timeout": 2500})
+    assert _commands(tool) == ["cd '/tmp/my dir' && timeout 3s bash -lc 'echo hi'"]
+async def test_openai_compatible_write_stores_file_via_workspace_sftp() -> None:
+    ssh = _FakeSSH()
+    tool = WriteTool(spec=WriteTool.default_spec("qwen"), client=cast("SSHClient", ssh))
+    result = await tool.execute({"filePath": "/REPORT.md", "content": "done"})
+    assert result.isError is False
+    assert ssh.files["/REPORT.md"] == b"done"
+async def test_openai_compatible_edit_rewrites_unique_match() -> None:
+    ssh = _FakeSSH(files={"/f.txt": b"hello old world"})
+    tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
+    result = await tool.execute(
+        {"filePath": "/f.txt", "oldString": "old", "newString": "new"},
+    )
+    assert result.isError is False
+    assert ssh.files["/f.txt"] == b"hello new world"
+async def test_openai_compatible_edit_rejects_ambiguous_match() -> None:
+    ssh = _FakeSSH(files={"/f.txt": b"a a a"})
+    tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
+    result = await tool.execute(
+        {"filePath": "/f.txt", "oldString": "a", "newString": "b"},
+    )
+    assert result.isError is True
+    assert ssh.files["/f.txt"] == b"a a a"
+async def test_openai_compatible_read_lists_directories() -> None:
+    tool = ReadTool(
+        spec=ReadTool.default_spec("qwen"),
+        client=_ssh(files={"/work/a.txt": b"a", "/work/nested/b.txt": b"b"}),
+    )
+    result = await tool.execute({"filePath": "/work"})
+    text = result_text(result)
+    assert "<type>directory</type>" in text
+    assert "a.txt" in text
+    assert "nested" in text
+async def test_openai_compatible_read_accepts_zero_offset_for_first_page() -> None:
+    tool = ReadTool(
+        spec=ReadTool.default_spec("qwen"),
+        client=_ssh(files={"/f.txt": b"alpha\nbeta\n"}),
+    )
+    result = await tool.execute({"filePath": "/f.txt", "offset": 0, "limit": 1})
+    text = result_text(result)
+    assert "1: alpha" in text
+    assert "2: beta" not in text
 # ─── Gemini shell ─────────────────────────────────────────────────────

hud/eval/tests/test_rollout.py CHANGED Viewed

@@ -13,14 +13,18 @@ the atom and return a :class:`Job`.
 from __future__ import annotations
 import asyncio
+import json
 import textwrap
 from contextlib import asynccontextmanager
+from types import SimpleNamespace
 from typing import TYPE_CHECKING, Any
 import mcp.types as mcp_types
 import pytest
 from hud.agents.base import Agent
+from hud.agents.openai_compatible import OpenAIChatAgent
+from hud.agents.types import OpenAIChatConfig
 from hud.environment import Environment
 from hud.eval import Job, LocalRuntime, Task, Taskset
 from hud.eval.run import Run, rollout
@@ -63,6 +67,44 @@ class _FnAgent(Agent):
         run.trace.content = self._fn(run.prompt)
+class _SequencedCompletions:
+    def __init__(self, responses: list[Any]) -> None:
+        self._responses = responses
+        self.requests: list[dict[str, Any]] = []
+    async def create(self, **kwargs: Any) -> Any:
+        self.requests.append(kwargs)
+        return self._responses.pop(0)
+class _FakeOpenAI:
+    def __init__(self, responses: list[Any]) -> None:
+        self.chat = SimpleNamespace(completions=_SequencedCompletions(responses))
+def _chat_response(content: str, tool_calls: list[Any] | None = None) -> Any:
+    message = SimpleNamespace(
+        content=content,
+        tool_calls=tool_calls or [],
+        refusal=None,
+        model_dump=lambda exclude_none=True: {"role": "assistant", "content": content},
+    )
+    choice = SimpleNamespace(message=message, finish_reason="stop", logprobs=None)
+    return SimpleNamespace(
+        choices=[choice],
+        model="fake-openai-compatible",
+        usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1, prompt_tokens_details=None),
+    )
+def _tool_call(name: str, arguments: str) -> Any:
+    return SimpleNamespace(
+        type="function",
+        id=f"call_{name}",
+        function=SimpleNamespace(name=name, arguments=arguments),
+    )
 def _add_task(a: int, b: int) -> Task:
     """A pure data row; the env it names is defined by the spawned file."""
     return Task(env="sums", id="add", args={"a": a, "b": b})
@@ -86,6 +128,54 @@ async def test_rollout_returns_graded_run_with_trace_id(env_file: Path) -> None:
     assert run.runtime.startswith("tcp://127.0.0.1:")
+async def test_openai_compatible_write_reaches_workspace_grader(tmp_path: Path) -> None:
+    workspace = tmp_path / "workspace"
+    report = workspace / "REPORT.md"
+    env = Environment("opencode_report")
+    env.workspace(workspace, guest_path=str(workspace))
+    @env.initialize
+    async def seed() -> None:
+        workspace.mkdir(parents=True, exist_ok=True)
+        report.unlink(missing_ok=True)
+    @env.template()
+    async def write_report():
+        yield "Write PASS to REPORT.md."
+        yield 1.0 if report.exists() and report.read_text().strip() == "PASS" else 0.0
+    model_client = _FakeOpenAI(
+        [
+            _chat_response(
+                "",
+                [_tool_call("write", json.dumps({"filePath": str(report), "content": "PASS"}))],
+            ),
+            _chat_response("done"),
+        ]
+    )
+    agent = OpenAIChatAgent(
+        OpenAIChatConfig(model="qwen3.6-plus", model_client=model_client, max_steps=4)
+    )
+    run = await rollout(
+        Task(env="opencode_report", id="write_report"),
+        agent,
+        runtime=lambda _task: _local(env),
+    )
+    assert run.reward == 1.0
+    assert report.read_text() == "PASS"
+    tools = model_client.chat.completions.requests[0]["extra_body"]["tools"]
+    assert [tool["function"]["name"] for tool in tools] == [
+        "bash",
+        "read",
+        "glob",
+        "grep",
+        "edit",
+        "write",
+    ]
 async def test_mid_run_failure_keeps_the_real_run_and_its_evidence(env_file: Path) -> None:
     def boom(prompt: str) -> str:
         raise RuntimeError("agent exploded")

hud/version.py CHANGED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.6.7"
+__version__ = "0.6.8.dev0"

{hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.6.7
+Version: 0.6.8.dev0
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -87,7 +87,7 @@ Description-Content-Type: text/markdown
 HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
-To learn more, see the [documentation](https://docs.hud.ai) and [API reference](https://docs.hud.ai/reference/environment).
+To learn more, see the [documentation](https://docs.hud.ai) and [environment reference](https://docs.hud.ai/v6/core/environment).
 [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
 [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
@@ -120,7 +120,7 @@ Then scaffold your first environment:
 hud init my-env
 ```
-![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
+![Agent running on SheetBench](docs/src/images/trace_sheet.gif)
 ## The protocol
@@ -159,14 +159,14 @@ hud eval my-taskset --remote
 For local iteration, the same protocol works against a container on your laptop:
 ```bash
-hud build .
-docker run -d --name run1 my-env
-docker exec run1 hud task start fix_bug
-docker exec run1 hud task grade fix_bug --answer "…"
+docker build -f Dockerfile.hud -t my-env .
+docker run -d --name run1 -p 8765:8765 my-env
+hud task start fix_bug --url tcp://127.0.0.1:8765
+hud task grade fix_bug --url tcp://127.0.0.1:8765 --answer "..."
 docker rm -f run1
 ```
-→ [Package & deploy](https://docs.hud.ai/run/deploy)
+→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
 ## Environments & templates
@@ -193,7 +193,7 @@ hud eval tasks.py claude --group 3
 Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
-→ [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/reference/tasks)
+→ [Quickstart](https://docs.hud.ai/v6/start/quickstart) · [Tasks & tasksets](https://docs.hud.ai/v6/core/tasks)
 ## Capabilities & harnesses
@@ -211,39 +211,42 @@ A **capability** is a connection the environment exposes; a **harness** attaches
 **Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
-→ [Capabilities](https://docs.hud.ai/reference/capabilities) · [Models](https://docs.hud.ai/run/models) · [Robots](https://docs.hud.ai/reference/robots)
+→ [Capabilities](https://docs.hud.ai/v6/core/capabilities) · [Models](https://docs.hud.ai/v6/core/agents) · [Robots](https://docs.hud.ai/v6/advanced/robots)
 ## Deploy on the platform
 From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
-→ [Deploy](https://docs.hud.ai/run/deploy) · [Leaderboards](https://hud.ai/leaderboards)
+→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
 ## Train on rewards
-Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and turn the rewards into GRPO advantages with `group_relative()`:
+Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and pass the graded runs to `TrainingClient.step()`:
 ```python
+from hud import TrainingClient
 from hud.agents import create_agent
-from hud.eval import Taskset, group_relative
+from hud.eval import Job
-agent = create_agent("claude-sonnet-4-5")
-job = await Taskset(count_letter(word=w) for w in words).run(agent, group=16)
-for runs in job.results.values():
-    advantages = group_relative([r.reward for r in runs], normalize_std=True)
-    ...  # feed (run.trace_id, adv) into your optimizer
+agent = create_agent("arith-rl", completion_kwargs={"extra_body": {"return_token_ids": True}})
+trainer = TrainingClient("arith-rl")
+taskset, runtime = ...  # your Taskset and where rollouts run
+session = await Job.start("arith-rl", group=8)
+start = len(session.runs)
+await taskset.run(agent, runtime=runtime, group=8, job=session)
+await trainer.step(session.runs[start:], learning_rate=1e-5, group_size=8)
 ```
 HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
-→ [Training](https://docs.hud.ai/run/training) · [Designing tasks for signal](https://docs.hud.ai/run/signal)
+→ [Training](https://docs.hud.ai/v6/core/training) · [Designing tasks for signal](https://docs.hud.ai/v6/core/advice)
 ## Links
 - [Documentation](https://docs.hud.ai)
-- [Quickstart](https://docs.hud.ai/quickstart)
-- [CLI reference](https://docs.hud.ai/reference/cli)
-- [Leaderboards](https://hud.ai/leaderboards)
+- [Quickstart](https://docs.hud.ai/v6/start/quickstart)
+- [CLI reference](https://docs.hud.ai/v6/core/cli)
 - [Environment templates](https://hud.ai/environments)
 - [Supported models](https://hud.ai/models)
 - [Discord](https://discord.gg/wkjtmHYYjm)
@@ -268,8 +271,8 @@ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabil
 ```bibtex
 @software{hud2025agentevalplatform,
-  author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
-  title  = {HUD: An Evaluation and RL Envrionments Platform for Agents},
+  author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep Chawla and Nguyen Nhat Minh},
+  title  = {HUD: An Evaluation and RL Environments Platform for Agents},
   date   = {2025-04},
   url    = {https://github.com/hud-evals/hud-python},
   langid = {en}

{hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ hud/conftest.py,sha256=HKbHvmFXLPX6KFSJgPFUAM22auclNNdFmHGwilNzg98,1012
 hud/server.py,sha256=NtSHIjBFr9lYvryfXrCa-VhwqnwkRy7n5fp_OuNhNOI,1235
 hud/settings.py,sha256=eyvMIOOlFk6kIAP8UsHEeoqf_UiOVhb1jhRCM2qv7b8,6393
 hud/types.py,sha256=kFVbQ-CcVhYpdX5jjgacRIppFS0q_nMXahijV_Hhl58,15022
-hud/version.py,sha256=65WfpY5H3Rz9DNOq0DBQjuLZDIP-JPjHy6Y3-Nfc_dc,104
+hud/version.py,sha256=RD_T-I7Yj0KBuadVj2UQF2XmPhTeHn3Lo45gIQTb5e4,109
 hud/agents/__init__.py,sha256=UL1PXucnY1Ln9o_Xf0Y-mvfbNh6NUdMyPJp-_d9Wq7Q,5082
 hud/agents/base.py,sha256=WgEOWUmMioXTxYe6cOvbqnbM4n989Z9kFEZIN6xJ3pU,659
 hud/agents/tool_agent.py,sha256=a0xsh2d8IwvmiPGMs9LCzghi61FHt4vMK_9sW8eNFbA,12557
@@ -54,10 +54,10 @@ hud/agents/openai/tools/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
 hud/agents/openai/tools/tests/test_computer.py,sha256=qEK7h2eD4j6Wg6VjU_YD8kCRpXOXwHDXBv1bz0mh5bo,3488
 hud/agents/openai/tools/tests/test_strict_schema.py,sha256=8dGkCSO7_-TvryEfStKZ7nKEuO3WGLfzsjPUbfdHMhQ,2344
 hud/agents/openai_compatible/__init__.py,sha256=zQZSQHB97g3rtPx4Y8aG_0K1i17MLwGRaTyQLd31Jqk,98
-hud/agents/openai_compatible/agent.py,sha256=YjtQkrlgekhyGRhUoxwkJZqJNaHKKCgUKxU7gnRc2hY,9880
-hud/agents/openai_compatible/tools/__init__.py,sha256=H5zBQbEfT2z1fMs3yRdVVYa5oZ2ejhYnxWLJTH3gx08,307
+hud/agents/openai_compatible/agent.py,sha256=7Zw6wa1ce7kt_xF4R_OfuoDbMPl09TktpjgFc16-_Lo,9946
+hud/agents/openai_compatible/tools/__init__.py,sha256=kOPtrgiqTcnQabZpo1aNfYfnaqip6M3z2OeffJNz-Ak,361
 hud/agents/openai_compatible/tools/base.py,sha256=Jl6Bm9ZgEOqgdOnM7Xm66VN3RpfjeZF9w55of_ZGCMI,5760
-hud/agents/openai_compatible/tools/filesystem.py,sha256=QXJW0-7lYZXbcftxUC7LxXChx-17clsjoRfHJg2DFBA,4905
+hud/agents/openai_compatible/tools/filesystem.py,sha256=hHSVW25OT_zxdJO6fE2kOPnnABOd06kHPWC08epoCNg,12523
 hud/agents/openai_compatible/tools/mcp_proxy.py,sha256=pfJdCvFxTaXkj6qrGK04jxibjeIhm6O-5STHPcB_qL4,844
 hud/agents/robot/__init__.py,sha256=UXyQYaoLMrxFr1QYU2D6UUz6BwK9gsp4-abe5jAOqUU,1620
 hud/agents/robot/_types.py,sha256=byWZMYRwLuzvu2U-ZXMx3TcyRTPcsjGF5HkItbgfcQ4,222
@@ -75,7 +75,7 @@ hud/agents/tests/test_claude_sdk_agent.py,sha256=lSY8wnLQgfJBNzF9BU-PcO4IrKaWtva
 hud/agents/tests/test_gemini_agent.py,sha256=7OdFFVSOkJE8Gb3blptWnEXuFWHuFCNlFAoMXTyV0Ec,4835
 hud/agents/tests/test_openai_agent.py,sha256=-69hoi_Bv9JdGngEnaJ74mSH-JCupg66ny7hODXQF00,4180
 hud/agents/tests/test_openai_compatible_agent.py,sha256=6JxFxkRdPT1O574VYvcsMXiUwhcvBFJQLBx46Utt4QI,2874
-hud/agents/tests/test_provider_native_tools.py,sha256=WjXV2dVNBG1ite6-aigzortgQIar9GMlZrMAE1_guVs,8381
+hud/agents/tests/test_provider_native_tools.py,sha256=dZ4dOT3sUkMh_7p-pGDnTIL7UDdwngNJ8jarlqU0Plk,12989
 hud/agents/tests/test_tool_agent.py,sha256=w8cuBAMcGBbIwiMnjH-tg4ztqhlewQOnXK3h1XLkj5o,5373
 hud/agents/tests/test_trace.py,sha256=rUNbV-y4gI0dH0xluT9COY_epJD69XHAzaC1HO4mX10,4517
 hud/agents/tools/__init__.py,sha256=-fnzzq8qwEXWD8s-T8RUGamuYndXTESeFNNMQxsXH5A,858
@@ -179,7 +179,7 @@ hud/eval/tests/test_docker_provider.py,sha256=1W1xyOzjHti6jfV2eiVnNd5CxKEMAKq8NB
 hud/eval/tests/test_file_tracking_observer.py,sha256=DteazLLWK0LKgtUn_6v4_wMI-1jhENMx7Y87-pdg-I8,4197
 hud/eval/tests/test_hosted.py,sha256=S0gGqAUaizlCGC30XwvaWb-TJhFgLUPlwsMO0WgjVWM,16284
 hud/eval/tests/test_job.py,sha256=UyaqbOY-0pnd2RNIp3glS_L_JJFT0-7GlSkgRhgaU1A,1867
-hud/eval/tests/test_rollout.py,sha256=tsMx9gFRRc0Afzs68wOf-G0_zGYkN4zYZ4KUNU0c8bk,11232
+hud/eval/tests/test_rollout.py,sha256=YUVqzDbIg9Y5LNnDwaNJ40hOL1BVAFgpHRHCyGlcfQw,14027
 hud/eval/tests/test_sync.py,sha256=1gFC65ZiZojeSn9q1v-RMK2Ps130mlh-aXE7G8sn54k,5234
 hud/eval/tests/test_task.py,sha256=n0E3B3TBYV6aM2_KFVGPHuD9nBGlpwq4ZvBu9wpjqtU,9754
 hud/graders/__init__.py,sha256=eccF8MXHQBvmynULljOCEMn82YSK0HSScD1TlS8UoT4,1570
@@ -226,8 +226,8 @@ hud/utils/tests/test_platform.py,sha256=mwhyFkUBvgmHRc43vQ_JgAAW2N9fIaxkQhVo-GB4
 hud/utils/tests/test_requests.py,sha256=ENK6P5xLTuSgWDcCau4zCj_5zPV_EooGwU4P8YYl5Gw,9109
 hud/utils/tests/test_serialization.py,sha256=GY4NiFUJtwLSYQWA0n1zme-Ul4DnBLByHCOOkxn2kLM,819
 hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hud_python-0.6.7.dist-info/METADATA,sha256=o9vcEt4elYRJ6KUPO8whEG-i0dqYl8-lj_L6wr7gKaA,12344
-hud_python-0.6.7.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
-hud_python-0.6.7.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
-hud_python-0.6.7.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
-hud_python-0.6.7.dist-info/RECORD,,
+hud_python-0.6.8.dev0.dist-info/METADATA,sha256=k0BA7OmInHsM-CB-pm5GNc0yYVwF7EbUr0etmiU-xXg,12427
+hud_python-0.6.8.dev0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+hud_python-0.6.8.dev0.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
+hud_python-0.6.8.dev0.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
+hud_python-0.6.8.dev0.dist-info/RECORD,,

{hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/WHEEL RENAMED Viewed

File without changes

{hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hud-python 0.6.7__py3-none-any.whl → 0.6.8.dev0__py3-none-any.whl

hud-python 0.6.7py3-none-any.whl → 0.6.8.dev0py3-none-any.whl