hud-python 0.6.6__py3-none-any.whl → 0.6.8.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,11 +17,13 @@ from hud.types import MCPToolCall, MCPToolResult
17
17
  from hud.utils import gateway
18
18
 
19
19
  from .tools import (
20
+ BashTool,
21
+ EditTool,
20
22
  GlobTool,
21
23
  GrepTool,
22
- ListTool,
23
24
  OpenAICompatibleMCPProxyTool,
24
25
  ReadTool,
26
+ WriteTool,
25
27
  )
26
28
  from .tools.base import format_chat_result
27
29
 
@@ -41,10 +43,12 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
41
43
  """OpenAI-compatible agent using the chat.completions protocol."""
42
44
 
43
45
  tool_catalog = (
46
+ BashTool,
44
47
  ReadTool,
45
- GrepTool,
46
48
  GlobTool,
47
- ListTool,
49
+ GrepTool,
50
+ EditTool,
51
+ WriteTool,
48
52
  OpenAICompatibleMCPProxyTool,
49
53
  )
50
54
 
@@ -2,13 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from .filesystem import GlobTool, GrepTool, ListTool, ReadTool
5
+ from .filesystem import BashTool, EditTool, GlobTool, GrepTool, ReadTool, WriteTool
6
6
  from .mcp_proxy import OpenAICompatibleMCPProxyTool
7
7
 
8
8
  __all__ = [
9
+ "BashTool",
10
+ "EditTool",
9
11
  "GlobTool",
10
12
  "GrepTool",
11
- "ListTool",
12
13
  "OpenAICompatibleMCPProxyTool",
13
14
  "ReadTool",
15
+ "WriteTool",
14
16
  ]
@@ -1,16 +1,20 @@
1
- """OpenAI-compatible filesystem tools backed by SSHClient."""
1
+ """OpenAI-compatible OpenCode-style workspace tools backed by SSHClient."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import math
6
+ import posixpath
5
7
  import shlex
6
8
  from typing import Any, ClassVar
7
9
 
8
10
  import mcp.types as mcp_types
9
11
 
10
12
  from hud.agents.tools import SSHTool
11
- from hud.agents.tools.base import AgentToolSpec, result_text
13
+ from hud.agents.tools.base import AgentToolSpec, result_text, tool_err
12
14
  from hud.types import MCPToolResult
13
15
 
16
+ DEFAULT_READ_LIMIT = 2000
17
+
14
18
 
15
19
  class _FilesystemTool(SSHTool):
16
20
  description: ClassVar[str]
@@ -34,16 +38,26 @@ class _FilesystemTool(SSHTool):
34
38
 
35
39
  class ReadTool(_FilesystemTool):
36
40
  name = "read"
37
- description = "Reads a file from the local filesystem. Use offset and limit for pagination."
41
+ description = (
42
+ "Reads a file or directory from the workspace. Use offset and limit for pagination."
43
+ )
38
44
  parameters: ClassVar[dict[str, Any]] = {
39
45
  "type": "object",
40
46
  "properties": {
41
- "filePath": {"type": "string", "description": "Absolute path to the file to read."},
47
+ "filePath": {
48
+ "type": "string",
49
+ "description": "The absolute path to the file or directory to read.",
50
+ },
42
51
  "offset": {
43
52
  "type": "integer",
44
- "description": "0-based line offset to start reading from.",
53
+ "description": "The line number to start reading from (1-indexed).",
54
+ "minimum": 0,
55
+ },
56
+ "limit": {
57
+ "type": "integer",
58
+ "description": "The maximum number of lines to read (defaults to 2000).",
59
+ "minimum": 1,
45
60
  },
46
- "limit": {"type": "integer", "description": "Maximum number of lines to read."},
47
61
  },
48
62
  "required": ["filePath"],
49
63
  }
@@ -52,19 +66,205 @@ class ReadTool(_FilesystemTool):
52
66
  path = arguments.get("filePath")
53
67
  if not isinstance(path, str) or not path:
54
68
  raise ValueError("filePath is required")
69
+ offset = _read_offset(arguments.get("offset"))
70
+ limit = _positive_int(arguments.get("limit"), default=DEFAULT_READ_LIMIT, name="limit")
71
+ if not (await self.bash(f"test -d {shlex.quote(path)}")).isError:
72
+ return await self._read_directory(path, offset=offset, limit=limit)
55
73
  result = await self.file_read(path)
56
74
  if result.isError:
57
75
  return result
58
- offset = arguments.get("offset")
59
- limit = arguments.get("limit")
60
- if isinstance(offset, int) and offset >= 0:
61
- lines = result_text(result).splitlines(keepends=True)
62
- end = offset + limit if isinstance(limit, int) and limit > 0 else len(lines)
63
- sliced = lines[offset:end]
64
- return MCPToolResult(
65
- content=[mcp_types.TextContent(type="text", text="".join(sliced))],
76
+ text = result_text(result)
77
+ lines = text.splitlines()
78
+ start = offset - 1
79
+ if start > len(lines) and not (len(lines) == 0 and offset == 1):
80
+ return tool_err(f"Offset {offset} is out of range for this file ({len(lines)} lines)")
81
+ sliced = lines[start : start + limit]
82
+ last = offset + len(sliced) - 1
83
+ more = last < len(lines)
84
+ body = [
85
+ f"<path>{path}</path>",
86
+ "<type>file</type>",
87
+ "<content>",
88
+ *[f"{i + offset}: {line}" for i, line in enumerate(sliced)],
89
+ ]
90
+ if more:
91
+ body.append(
92
+ f"\n(Showing lines {offset}-{last} of {len(lines)}. "
93
+ f"Use offset={last + 1} to continue.)"
94
+ )
95
+ else:
96
+ body.append(f"\n(End of file - total {len(lines)} lines)")
97
+ body.append("</content>")
98
+ return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
99
+
100
+ async def _read_directory(self, path: str, *, offset: int, limit: int) -> MCPToolResult:
101
+ result = await self.file_list(path)
102
+ if result.isError:
103
+ return result
104
+ entries = result_text(result).splitlines()
105
+ if entries == ["(empty)"]:
106
+ entries = []
107
+ start = offset - 1
108
+ sliced = entries[start : start + limit]
109
+ truncated = start + len(sliced) < len(entries)
110
+ body = [
111
+ f"<path>{path}</path>",
112
+ "<type>directory</type>",
113
+ "<entries>",
114
+ *sliced,
115
+ ]
116
+ if truncated:
117
+ body.append(
118
+ f"\n(Showing {len(sliced)} of {len(entries)} entries. "
119
+ f"Use offset={offset + len(sliced)} to continue.)"
66
120
  )
67
- return result
121
+ else:
122
+ body.append(f"\n({len(entries)} entries)")
123
+ body.append("</entries>")
124
+ return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
125
+
126
+
127
+ class BashTool(_FilesystemTool):
128
+ name = "bash"
129
+ description = (
130
+ "Executes a shell command in the workspace. Prefer read, grep, glob, edit, "
131
+ "and write for filesystem operations."
132
+ )
133
+ parameters: ClassVar[dict[str, Any]] = {
134
+ "type": "object",
135
+ "properties": {
136
+ "command": {"type": "string", "description": "The command to execute."},
137
+ "timeout": {
138
+ "type": "integer",
139
+ "description": "Optional timeout in milliseconds.",
140
+ "minimum": 1,
141
+ },
142
+ "workdir": {
143
+ "type": "string",
144
+ "description": "The working directory to run the command in.",
145
+ },
146
+ },
147
+ "required": ["command"],
148
+ }
149
+
150
+ async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
151
+ command = arguments.get("command")
152
+ if not isinstance(command, str) or not command:
153
+ raise ValueError("command is required")
154
+ timeout = arguments.get("timeout")
155
+ if timeout is not None:
156
+ if not isinstance(timeout, int) or timeout < 1:
157
+ raise ValueError("timeout must be a positive integer")
158
+ seconds = max(1, math.ceil(timeout / 1000))
159
+ command = f"timeout {seconds}s bash -lc {shlex.quote(command)}"
160
+ workdir = arguments.get("workdir")
161
+ if isinstance(workdir, str) and workdir:
162
+ command = f"cd {shlex.quote(workdir)} && {command}"
163
+ return await self.bash(command)
164
+
165
+
166
+ class EditTool(_FilesystemTool):
167
+ name = "edit"
168
+ description = (
169
+ "Replaces text within a file. Use oldString as exact literal context. "
170
+ "Set replaceAll to true to replace every occurrence."
171
+ )
172
+ parameters: ClassVar[dict[str, Any]] = {
173
+ "type": "object",
174
+ "properties": {
175
+ "filePath": {
176
+ "type": "string",
177
+ "description": "The absolute path to the file to modify.",
178
+ },
179
+ "oldString": {"type": "string", "description": "The text to replace."},
180
+ "newString": {
181
+ "type": "string",
182
+ "description": "The text to replace it with (must be different from oldString).",
183
+ },
184
+ "replaceAll": {
185
+ "type": "boolean",
186
+ "description": "Replace all occurrences of oldString (default false).",
187
+ },
188
+ },
189
+ "required": ["filePath", "oldString", "newString"],
190
+ }
191
+
192
+ async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
193
+ path = arguments.get("filePath")
194
+ if not isinstance(path, str) or not path:
195
+ raise ValueError("filePath is required")
196
+ old = arguments.get("oldString")
197
+ new = arguments.get("newString")
198
+ if not isinstance(old, str):
199
+ raise ValueError("oldString is required")
200
+ if not isinstance(new, str):
201
+ raise ValueError("newString is required")
202
+ if old == new:
203
+ return tool_err("No changes to apply: oldString and newString are identical.")
204
+ if old == "":
205
+ exists = not (await self.bash(f"test -e {shlex.quote(path)}")).isError
206
+ if exists:
207
+ return tool_err(
208
+ "oldString cannot be empty when editing an existing file. "
209
+ "Provide exact text to replace, or use write for full-file replacement."
210
+ )
211
+ mkdir = await self._ensure_parent(path)
212
+ if mkdir.isError:
213
+ return mkdir
214
+ return await self.file_write(path, new)
215
+
216
+ existing = await self.file_read(path)
217
+ if existing.isError:
218
+ return existing
219
+ text = result_text(existing)
220
+ count = text.count(old)
221
+ if count == 0:
222
+ return tool_err(f"oldString not found in {path}")
223
+ replace_all = arguments.get("replaceAll") is True
224
+ if count > 1 and not replace_all:
225
+ return tool_err(f"oldString matches {count} times in {path}; set replaceAll to true")
226
+ next_text = text.replace(old, new) if replace_all else text.replace(old, new, 1)
227
+ return await self.file_write(path, next_text)
228
+
229
+ async def _ensure_parent(self, path: str) -> MCPToolResult:
230
+ parent = posixpath.dirname(path)
231
+ if not parent or parent in {".", "/"}:
232
+ return MCPToolResult(content=[])
233
+ return await self.bash(f"mkdir -p {shlex.quote(parent)}")
234
+
235
+
236
+ class WriteTool(_FilesystemTool):
237
+ name = "write"
238
+ description = "Creates or overwrites a file with the provided content."
239
+ parameters: ClassVar[dict[str, Any]] = {
240
+ "type": "object",
241
+ "properties": {
242
+ "content": {"type": "string", "description": "The content to write to the file."},
243
+ "filePath": {
244
+ "type": "string",
245
+ "description": "The absolute path to the file to write.",
246
+ },
247
+ },
248
+ "required": ["content", "filePath"],
249
+ }
250
+
251
+ async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
252
+ path = arguments.get("filePath")
253
+ if not isinstance(path, str) or not path:
254
+ raise ValueError("filePath is required")
255
+ content = arguments.get("content")
256
+ if not isinstance(content, str):
257
+ raise ValueError("content is required")
258
+ mkdir = await self._ensure_parent(path)
259
+ if mkdir.isError:
260
+ return mkdir
261
+ return await self.file_write(path, content)
262
+
263
+ async def _ensure_parent(self, path: str) -> MCPToolResult:
264
+ parent = posixpath.dirname(path)
265
+ if not parent or parent in {".", "/"}:
266
+ return MCPToolResult(content=[])
267
+ return await self.bash(f"mkdir -p {shlex.quote(parent)}")
68
268
 
69
269
 
70
270
  class GrepTool(_FilesystemTool):
@@ -115,24 +315,18 @@ class GlobTool(_FilesystemTool):
115
315
  return await self.bash(f"find {shlex.quote(str(path))} -name {shlex.quote(pattern)}")
116
316
 
117
317
 
118
- class ListTool(_FilesystemTool):
119
- name = "list"
120
- description = "Lists files and directories in a given path."
121
- parameters: ClassVar[dict[str, Any]] = {
122
- "type": "object",
123
- "properties": {
124
- "path": {"type": "string", "description": "Directory to list."},
125
- "ignore": {
126
- "type": "array",
127
- "items": {"type": "string"},
128
- "description": "Glob patterns to ignore.",
129
- },
130
- },
131
- }
318
+ def _positive_int(value: Any, *, default: int, name: str) -> int:
319
+ if value is None:
320
+ return default
321
+ if not isinstance(value, int) or value < 1:
322
+ raise ValueError(f"{name} must be a positive integer")
323
+ return value
132
324
 
133
- async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
134
- path = arguments.get("path") or "."
135
- return await self.file_list(str(path))
325
+
326
+ def _read_offset(value: Any) -> int:
327
+ if value is None or value == 0:
328
+ return 1
329
+ return _positive_int(value, default=1, name="offset")
136
330
 
137
331
 
138
- __all__ = ["GlobTool", "GrepTool", "ListTool", "ReadTool"]
332
+ __all__ = ["BashTool", "EditTool", "GlobTool", "GrepTool", "ReadTool", "WriteTool"]
@@ -7,16 +7,19 @@ client and assert the command translation + result shape, fully offline.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from typing import TYPE_CHECKING, Any, cast
10
+ import shlex
11
+ from typing import Any, cast
11
12
 
12
13
  import pytest
13
14
 
14
15
  from hud.agents.claude.tools.coding import ClaudeBashTool, ClaudeTextEditorTool
15
16
  from hud.agents.gemini.tools.coding import GeminiEditTool, GeminiShellTool
16
17
  from hud.agents.openai.tools.coding import OpenAIShellTool
17
-
18
- if TYPE_CHECKING:
19
- from hud.capabilities import SSHClient
18
+ from hud.agents.openai_compatible.agent import OpenAIChatAgent
19
+ from hud.agents.openai_compatible.tools import BashTool, EditTool, ReadTool, WriteTool
20
+ from hud.agents.tools.base import result_text
21
+ from hud.agents.types import OpenAIChatConfig
22
+ from hud.capabilities import Capability, SSHClient
20
23
 
21
24
 
22
25
  class _Completed:
@@ -61,6 +64,21 @@ class _FakeSFTP:
61
64
  def open(self, path: str, mode: str) -> _FakeOpenFile:
62
65
  return _FakeOpenFile(self._store, path, mode)
63
66
 
67
+ async def listdir(self, path: str) -> list[str]:
68
+ prefix = path.rstrip("/")
69
+ if not prefix:
70
+ prefix = "/"
71
+ if prefix != "/":
72
+ prefix += "/"
73
+ names: set[str] = set()
74
+ for file_path in self._store:
75
+ if not file_path.startswith(prefix):
76
+ continue
77
+ rest = file_path[len(prefix) :]
78
+ if rest:
79
+ names.add(rest.split("/", 1)[0])
80
+ return sorted(names)
81
+
64
82
 
65
83
  class _Conn:
66
84
  def __init__(self, completed: _Completed, store: dict[str, bytes]) -> None:
@@ -70,13 +88,26 @@ class _Conn:
70
88
 
71
89
  async def run(self, command: str, check: bool = False) -> _Completed:
72
90
  self.commands.append(command)
91
+ parts = shlex.split(command)
92
+ if len(parts) == 3 and parts[:2] in (["test", "-d"], ["test", "-e"]):
93
+ path = parts[2]
94
+ exists = path in self._store or any(
95
+ file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
96
+ )
97
+ if parts[1] == "-d":
98
+ exists = any(
99
+ file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
100
+ )
101
+ return _Completed(exit_status=0 if exists else 1)
102
+ if len(parts) >= 3 and parts[:2] == ["mkdir", "-p"]:
103
+ return _Completed(exit_status=0)
73
104
  return self._completed
74
105
 
75
106
  def start_sftp_client(self) -> _FakeSFTP:
76
107
  return _FakeSFTP(self._store)
77
108
 
78
109
 
79
- class _FakeSSH:
110
+ class _FakeSSH(SSHClient):
80
111
  """Duck-typed ``SSHClient``: ``conn.run`` (bash) + ``conn.start_sftp_client`` (files)."""
81
112
 
82
113
  def __init__(
@@ -87,7 +118,10 @@ class _FakeSSH:
87
118
  files: dict[str, bytes] | None = None,
88
119
  ) -> None:
89
120
  self.files: dict[str, bytes] = files or {}
90
- self.conn = _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)
121
+ super().__init__(
122
+ Capability(name="shell", protocol="ssh/2", url="ssh://localhost:22"),
123
+ cast("Any", _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)),
124
+ )
91
125
 
92
126
 
93
127
  def _ssh(**kwargs: Any) -> SSHClient:
@@ -98,6 +132,11 @@ def _commands(tool: Any) -> list[str]:
98
132
  return tool.client.conn.commands
99
133
 
100
134
 
135
+ class _OpenAIChatAgentForTest(OpenAIChatAgent):
136
+ async def build_tools_for_test(self, ssh: SSHClient) -> tuple[dict[str, Any], list[Any]]:
137
+ return await self._build_tools({"ssh": ssh})
138
+
139
+
101
140
  # ─── OpenAI shell ─────────────────────────────────────────────────────
102
141
 
103
142
 
@@ -135,6 +174,96 @@ def test_openai_shell_to_params_is_shell_type() -> None:
135
174
  assert tool.to_params()["type"] == "shell"
136
175
 
137
176
 
177
+ # ─── OpenAI-compatible OpenCode workspace tools ───────────────────────
178
+
179
+
180
+ async def test_openai_compatible_catalog_matches_opencode_workspace_tools() -> None:
181
+ agent = _OpenAIChatAgentForTest(
182
+ OpenAIChatConfig(model="qwen3.6-plus", model_client=cast("Any", object()))
183
+ )
184
+
185
+ tools, params = await agent.build_tools_for_test(_ssh())
186
+
187
+ assert list(tools) == ["bash", "read", "glob", "grep", "edit", "write"]
188
+ assert [param["function"]["name"] for param in params] == [
189
+ "bash",
190
+ "read",
191
+ "glob",
192
+ "grep",
193
+ "edit",
194
+ "write",
195
+ ]
196
+
197
+
198
+ async def test_openai_compatible_bash_uses_workdir_and_timeout() -> None:
199
+ tool = BashTool(spec=BashTool.default_spec("qwen"), client=_ssh())
200
+
201
+ await tool.execute({"command": "echo hi", "workdir": "/tmp/my dir", "timeout": 2500})
202
+
203
+ assert _commands(tool) == ["cd '/tmp/my dir' && timeout 3s bash -lc 'echo hi'"]
204
+
205
+
206
+ async def test_openai_compatible_write_stores_file_via_workspace_sftp() -> None:
207
+ ssh = _FakeSSH()
208
+ tool = WriteTool(spec=WriteTool.default_spec("qwen"), client=cast("SSHClient", ssh))
209
+
210
+ result = await tool.execute({"filePath": "/REPORT.md", "content": "done"})
211
+
212
+ assert result.isError is False
213
+ assert ssh.files["/REPORT.md"] == b"done"
214
+
215
+
216
+ async def test_openai_compatible_edit_rewrites_unique_match() -> None:
217
+ ssh = _FakeSSH(files={"/f.txt": b"hello old world"})
218
+ tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
219
+
220
+ result = await tool.execute(
221
+ {"filePath": "/f.txt", "oldString": "old", "newString": "new"},
222
+ )
223
+
224
+ assert result.isError is False
225
+ assert ssh.files["/f.txt"] == b"hello new world"
226
+
227
+
228
+ async def test_openai_compatible_edit_rejects_ambiguous_match() -> None:
229
+ ssh = _FakeSSH(files={"/f.txt": b"a a a"})
230
+ tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
231
+
232
+ result = await tool.execute(
233
+ {"filePath": "/f.txt", "oldString": "a", "newString": "b"},
234
+ )
235
+
236
+ assert result.isError is True
237
+ assert ssh.files["/f.txt"] == b"a a a"
238
+
239
+
240
+ async def test_openai_compatible_read_lists_directories() -> None:
241
+ tool = ReadTool(
242
+ spec=ReadTool.default_spec("qwen"),
243
+ client=_ssh(files={"/work/a.txt": b"a", "/work/nested/b.txt": b"b"}),
244
+ )
245
+
246
+ result = await tool.execute({"filePath": "/work"})
247
+
248
+ text = result_text(result)
249
+ assert "<type>directory</type>" in text
250
+ assert "a.txt" in text
251
+ assert "nested" in text
252
+
253
+
254
+ async def test_openai_compatible_read_accepts_zero_offset_for_first_page() -> None:
255
+ tool = ReadTool(
256
+ spec=ReadTool.default_spec("qwen"),
257
+ client=_ssh(files={"/f.txt": b"alpha\nbeta\n"}),
258
+ )
259
+
260
+ result = await tool.execute({"filePath": "/f.txt", "offset": 0, "limit": 1})
261
+
262
+ text = result_text(result)
263
+ assert "1: alpha" in text
264
+ assert "2: beta" not in text
265
+
266
+
138
267
  # ─── Gemini shell ─────────────────────────────────────────────────────
139
268
 
140
269
 
hud/cli/deploy.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import json
6
7
  import logging
7
8
  import os
8
9
  import time
@@ -12,6 +13,7 @@ from typing import Any
12
13
 
13
14
  import httpx
14
15
  import typer
16
+ from pydantic import ValidationError
15
17
 
16
18
  from hud.cli.utils.build_display import display_build_summary
17
19
  from hud.cli.utils.build_logs import poll_build_status, stream_build_logs
@@ -19,6 +21,7 @@ from hud.cli.utils.config import parse_env_file, parse_key_value
19
21
  from hud.cli.utils.context import create_build_context_tarball, format_size
20
22
  from hud.cli.utils.registry import get_registry_environment
21
23
  from hud.cli.utils.source import EnvironmentSource, normalize_environment_name
24
+ from hud.eval.runtime import RuntimeConfig
22
25
  from hud.utils.exceptions import HudRequestError
23
26
  from hud.utils.hud_console import HUDConsole
24
27
  from hud.utils.platform import PlatformClient
@@ -32,6 +35,7 @@ class _DeployPlan:
32
35
  name: str
33
36
  registry_id: str | None
34
37
  runtime: str | None
38
+ runtime_config: dict[str, Any] | None
35
39
  env_vars: dict[str, str]
36
40
  build_args: dict[str, str]
37
41
  build_secrets: dict[str, str]
@@ -75,6 +79,26 @@ def _normalize_runtime(runtime: str | None, console: HUDConsole) -> str | None:
75
79
  raise typer.Exit(1)
76
80
 
77
81
 
82
+ def _load_runtime_config(path: str | None, console: HUDConsole) -> dict[str, Any] | None:
83
+ if path is None:
84
+ return None
85
+ config_path = Path(path).expanduser()
86
+ try:
87
+ raw = json.loads(config_path.read_text(encoding="utf-8"))
88
+ config = RuntimeConfig.model_validate(raw)
89
+ except FileNotFoundError:
90
+ console.error(f"Runtime config file not found: {config_path}")
91
+ raise typer.Exit(1) from None
92
+ except json.JSONDecodeError as exc:
93
+ console.error(f"Invalid runtime config JSON in {config_path}: {exc.msg}")
94
+ raise typer.Exit(1) from exc
95
+ except ValidationError as exc:
96
+ console.error(f"Invalid runtime config in {config_path}: {exc}")
97
+ raise typer.Exit(1) from exc
98
+ payload = config.request_payload()
99
+ return payload or None
100
+
101
+
78
102
  def _load_env_vars(path: Path, console: HUDConsole, *, warn_missing: bool) -> dict[str, str]:
79
103
  if not path.exists():
80
104
  if warn_missing:
@@ -322,6 +346,7 @@ def _prepare_deploy_plan(
322
346
  build_args: list[str] | None,
323
347
  build_secrets: list[str] | None,
324
348
  runtime: str | None,
349
+ runtime_config: str | None,
325
350
  verbose: bool,
326
351
  platform: PlatformClient,
327
352
  console: HUDConsole,
@@ -357,11 +382,13 @@ def _prepare_deploy_plan(
357
382
  build_args_dict = _parse_key_value_flags(build_args, option="--build-arg", console=console)
358
383
  if build_args_dict and verbose:
359
384
  console.info(f"Build arguments: {', '.join(build_args_dict.keys())}")
385
+ normalized_runtime = _normalize_runtime(runtime, console)
360
386
 
361
387
  return _DeployPlan(
362
388
  name=resolved_name,
363
389
  registry_id=registry_id,
364
- runtime=_normalize_runtime(runtime, console),
390
+ runtime=normalized_runtime,
391
+ runtime_config=_load_runtime_config(runtime_config, console),
365
392
  env_vars=env_vars,
366
393
  build_args=build_args_dict,
367
394
  build_secrets=_collect_build_secrets(build_secrets, env_dir=env_dir, console=console),
@@ -379,6 +406,7 @@ def deploy_environment(
379
406
  build_args: list[str] | None = None,
380
407
  build_secrets: list[str] | None = None,
381
408
  runtime: str | None = None,
409
+ runtime_config: str | None = None,
382
410
  ) -> None:
383
411
  """Deploy one HUD environment to the platform."""
384
412
  hud_console = HUDConsole()
@@ -411,6 +439,7 @@ def deploy_environment(
411
439
  build_args=build_args,
412
440
  build_secrets=build_secrets,
413
441
  runtime=runtime,
442
+ runtime_config=runtime_config,
414
443
  verbose=verbose,
415
444
  platform=platform,
416
445
  console=hud_console,
@@ -485,6 +514,8 @@ async def _trigger_build(
485
514
  payload["registry_id"] = plan.registry_id
486
515
  if plan.runtime:
487
516
  payload["runtime_provider"] = plan.runtime
517
+ if plan.runtime_config:
518
+ payload["runtime_config"] = plan.runtime_config
488
519
  if plan.env_vars:
489
520
  payload["environment_variables"] = plan.env_vars
490
521
  if plan.build_args:
@@ -644,6 +675,7 @@ def deploy_all(
644
675
  build_args: list[str] | None = None,
645
676
  build_secrets: list[str] | None = None,
646
677
  runtime: str | None = None,
678
+ runtime_config: str | None = None,
647
679
  ) -> None:
648
680
  """Deploy each HUD environment under a parent directory."""
649
681
  hud_console = HUDConsole()
@@ -683,6 +715,7 @@ def deploy_all(
683
715
  build_args=build_args,
684
716
  build_secrets=build_secrets,
685
717
  runtime=runtime,
718
+ runtime_config=runtime_config,
686
719
  )
687
720
  succeeded.append(env_dir.name)
688
721
  except (typer.Exit, SystemExit):
@@ -762,6 +795,11 @@ def deploy_command(
762
795
  "--runtime",
763
796
  help="Persist Modal as the hosted runtime for this registry",
764
797
  ),
798
+ runtime_config: str | None = typer.Option(
799
+ None,
800
+ "--runtime-config",
801
+ help="Path to a JSON RuntimeConfig for hosted runs",
802
+ ),
765
803
  ) -> None:
766
804
  """Deploy HUD environment to the platform.
767
805
 
@@ -781,6 +819,7 @@ def deploy_command(
781
819
  build_args=build_args,
782
820
  build_secrets=secrets,
783
821
  runtime=runtime,
822
+ runtime_config=runtime_config,
784
823
  )
785
824
  return
786
825
 
@@ -795,4 +834,5 @@ def deploy_command(
795
834
  build_args=build_args,
796
835
  build_secrets=secrets,
797
836
  runtime=runtime,
837
+ runtime_config=runtime_config,
798
838
  )
@@ -179,6 +179,47 @@ class TestCollectEnvironmentVariables:
179
179
  assert "INVALID_FORMAT" not in result
180
180
 
181
181
 
182
+ class TestRuntimeConfigFile:
183
+ def test_load_runtime_config_uses_sdk_shape(self, tmp_path: Path) -> None:
184
+ from hud.cli.deploy import _load_runtime_config
185
+ from hud.utils.hud_console import HUDConsole
186
+
187
+ config_path = tmp_path / "runtime.json"
188
+ config_path.write_text(
189
+ json.dumps(
190
+ {
191
+ "resources": {"gpu": {"type": "A10G", "count": 2}},
192
+ "limits": {"startup_timeout_s": 300},
193
+ }
194
+ ),
195
+ encoding="utf-8",
196
+ )
197
+
198
+ assert _load_runtime_config(str(config_path), HUDConsole()) == {
199
+ "resources": {"gpu": {"type": "A10G", "count": 2}},
200
+ "limits": {"startup_timeout_s": 300},
201
+ }
202
+
203
+ def test_load_runtime_config_preserves_null_override(self, tmp_path: Path) -> None:
204
+ from hud.cli.deploy import _load_runtime_config
205
+ from hud.utils.hud_console import HUDConsole
206
+
207
+ config_path = tmp_path / "runtime.json"
208
+ config_path.write_text(json.dumps({"resources": None}), encoding="utf-8")
209
+
210
+ assert _load_runtime_config(str(config_path), HUDConsole()) == {"resources": None}
211
+
212
+ def test_load_runtime_config_rejects_unknown_fields(self, tmp_path: Path) -> None:
213
+ from hud.cli.deploy import _load_runtime_config
214
+ from hud.utils.hud_console import HUDConsole
215
+
216
+ config_path = tmp_path / "runtime.json"
217
+ config_path.write_text(json.dumps({"provider_config": {}}), encoding="utf-8")
218
+
219
+ with pytest.raises(typer.Exit):
220
+ _load_runtime_config(str(config_path), HUDConsole())
221
+
222
+
182
223
  class TestDeployEnvironment:
183
224
  """Tests for deploy_environment function."""
184
225
 
@@ -262,6 +303,7 @@ class TestDeployAsync:
262
303
  name="test-env",
263
304
  registry_id=None,
264
305
  runtime=None,
306
+ runtime_config=None,
265
307
  env_vars={},
266
308
  build_args={},
267
309
  build_secrets={},
@@ -292,6 +334,7 @@ class TestDeployAsync:
292
334
  name="test-env",
293
335
  registry_id=None,
294
336
  runtime=None,
337
+ runtime_config=None,
295
338
  env_vars={},
296
339
  build_args={},
297
340
  build_secrets={},
@@ -331,6 +374,7 @@ class TestDeployAsync:
331
374
  name="test-env",
332
375
  registry_id=None,
333
376
  runtime="modal",
377
+ runtime_config=None,
334
378
  env_vars={},
335
379
  build_args={},
336
380
  build_secrets={},
@@ -343,6 +387,48 @@ class TestDeployAsync:
343
387
  assert platform.payload is not None
344
388
  assert platform.payload["runtime_provider"] == "modal"
345
389
 
390
+ @pytest.mark.asyncio
391
+ async def test_trigger_build_sends_runtime_config(self) -> None:
392
+ from hud.cli.deploy import _DeployPlan, _trigger_build
393
+ from hud.utils.hud_console import HUDConsole
394
+ from hud.utils.platform import PlatformClient
395
+
396
+ class FakePlatform(PlatformClient):
397
+ payload: dict[str, object] | None = None
398
+
399
+ async def apost(
400
+ self,
401
+ path: str,
402
+ *,
403
+ json: object | None = None,
404
+ ) -> dict[str, object]:
405
+ assert path == "/builds/trigger"
406
+ assert isinstance(json, dict)
407
+ object.__setattr__(self, "payload", json)
408
+ return {"id": "build-1", "registry_id": "registry-1"}
409
+
410
+ runtime_config = {"resources": {"gpu": {"type": "A10G", "count": 1}}}
411
+ platform = FakePlatform("https://api.example", "key")
412
+ result = await _trigger_build(
413
+ platform,
414
+ build_id="build-1",
415
+ plan=_DeployPlan(
416
+ name="test-env",
417
+ registry_id=None,
418
+ runtime="modal",
419
+ runtime_config=runtime_config,
420
+ env_vars={},
421
+ build_args={},
422
+ build_secrets={},
423
+ ),
424
+ no_cache=False,
425
+ console=HUDConsole(),
426
+ )
427
+
428
+ assert result == {"id": "build-1", "registry_id": "registry-1"}
429
+ assert platform.payload is not None
430
+ assert platform.payload["runtime_config"] == runtime_config
431
+
346
432
 
347
433
  class TestSaveDeployLink:
348
434
  """Tests for _save_deploy_link function."""
hud/eval/runtime.py CHANGED
@@ -108,6 +108,9 @@ class RuntimeConfig(BaseModel):
108
108
  self.model_dump() | override.model_dump(exclude_unset=True)
109
109
  )
110
110
 
111
+ def request_payload(self) -> dict[str, Any]:
112
+ return self.model_dump(mode="json", exclude_unset=True)
113
+
111
114
 
112
115
  class Provider(Protocol):
113
116
  """Server placement: called with the task row being placed, acquire one
@@ -925,7 +928,7 @@ class HostedRuntime:
925
928
  if group_id is not None:
926
929
  payload["group_id"] = group_id
927
930
  if task.runtime_config is not None:
928
- runtime_config = task.runtime_config.model_dump(mode="json", exclude_none=True)
931
+ runtime_config = task.runtime_config.request_payload()
929
932
  if runtime_config:
930
933
  payload["runtime_config"] = runtime_config
931
934
  await platform.apost("/rollouts/submit", json=payload)
hud/eval/sync.py CHANGED
@@ -163,7 +163,7 @@ def task_upload_payload(task: Task) -> dict[str, Any]:
163
163
  if task.columns:
164
164
  payload["columns"] = task.columns
165
165
  if task.runtime_config is not None:
166
- payload["runtime_config"] = task.runtime_config.model_dump(exclude_none=True)
166
+ payload["runtime_config"] = task.runtime_config.request_payload()
167
167
  return payload
168
168
 
169
169
 
@@ -176,7 +176,7 @@ def _task_signature(task: Task) -> str:
176
176
  if task.columns:
177
177
  sig_data["columns"] = task.columns
178
178
  if task.runtime_config is not None:
179
- sig_data["runtime_config"] = task.runtime_config.model_dump(exclude_none=True)
179
+ sig_data["runtime_config"] = task.runtime_config.request_payload()
180
180
  return f"{task.id}|" + json.dumps(
181
181
  sig_data,
182
182
  sort_keys=True,
@@ -164,6 +164,25 @@ async def test_run_submits_and_polls_to_terminal(monkeypatch: pytest.MonkeyPatch
164
164
  assert payload["agent"]["config"]["model"] == "test-model"
165
165
 
166
166
 
167
+ @pytest.mark.asyncio
168
+ async def test_run_preserves_runtime_config_null_override(
169
+ monkeypatch: pytest.MonkeyPatch,
170
+ ) -> None:
171
+ platform = _FakePlatform([{"status": "completed", "reward": 0.5}])
172
+ monkeypatch.setattr(
173
+ "hud.eval.runtime.PlatformClient.from_settings", classmethod(lambda cls: platform)
174
+ )
175
+
176
+ await HostedRuntime(poll_interval=0.0).run(
177
+ Task(env="sums", id="add", runtime_config=RuntimeConfig(resources=None)),
178
+ _agent(),
179
+ job_id=uuid.uuid4().hex,
180
+ trace_id=uuid.uuid4().hex,
181
+ )
182
+
183
+ assert platform.posts[0][1]["runtime_config"] == {"resources": None}
184
+
185
+
167
186
  @pytest.mark.asyncio
168
187
  async def test_run_timeout_requests_platform_cancel(monkeypatch: pytest.MonkeyPatch) -> None:
169
188
  platform = _FakePlatform([{"status": "running"}])
@@ -13,14 +13,18 @@ the atom and return a :class:`Job`.
13
13
  from __future__ import annotations
14
14
 
15
15
  import asyncio
16
+ import json
16
17
  import textwrap
17
18
  from contextlib import asynccontextmanager
19
+ from types import SimpleNamespace
18
20
  from typing import TYPE_CHECKING, Any
19
21
 
20
22
  import mcp.types as mcp_types
21
23
  import pytest
22
24
 
23
25
  from hud.agents.base import Agent
26
+ from hud.agents.openai_compatible import OpenAIChatAgent
27
+ from hud.agents.types import OpenAIChatConfig
24
28
  from hud.environment import Environment
25
29
  from hud.eval import Job, LocalRuntime, Task, Taskset
26
30
  from hud.eval.run import Run, rollout
@@ -63,6 +67,44 @@ class _FnAgent(Agent):
63
67
  run.trace.content = self._fn(run.prompt)
64
68
 
65
69
 
70
+ class _SequencedCompletions:
71
+ def __init__(self, responses: list[Any]) -> None:
72
+ self._responses = responses
73
+ self.requests: list[dict[str, Any]] = []
74
+
75
+ async def create(self, **kwargs: Any) -> Any:
76
+ self.requests.append(kwargs)
77
+ return self._responses.pop(0)
78
+
79
+
80
+ class _FakeOpenAI:
81
+ def __init__(self, responses: list[Any]) -> None:
82
+ self.chat = SimpleNamespace(completions=_SequencedCompletions(responses))
83
+
84
+
85
+ def _chat_response(content: str, tool_calls: list[Any] | None = None) -> Any:
86
+ message = SimpleNamespace(
87
+ content=content,
88
+ tool_calls=tool_calls or [],
89
+ refusal=None,
90
+ model_dump=lambda exclude_none=True: {"role": "assistant", "content": content},
91
+ )
92
+ choice = SimpleNamespace(message=message, finish_reason="stop", logprobs=None)
93
+ return SimpleNamespace(
94
+ choices=[choice],
95
+ model="fake-openai-compatible",
96
+ usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1, prompt_tokens_details=None),
97
+ )
98
+
99
+
100
+ def _tool_call(name: str, arguments: str) -> Any:
101
+ return SimpleNamespace(
102
+ type="function",
103
+ id=f"call_{name}",
104
+ function=SimpleNamespace(name=name, arguments=arguments),
105
+ )
106
+
107
+
66
108
  def _add_task(a: int, b: int) -> Task:
67
109
  """A pure data row; the env it names is defined by the spawned file."""
68
110
  return Task(env="sums", id="add", args={"a": a, "b": b})
@@ -86,6 +128,54 @@ async def test_rollout_returns_graded_run_with_trace_id(env_file: Path) -> None:
86
128
  assert run.runtime.startswith("tcp://127.0.0.1:")
87
129
 
88
130
 
131
+ async def test_openai_compatible_write_reaches_workspace_grader(tmp_path: Path) -> None:
132
+ workspace = tmp_path / "workspace"
133
+ report = workspace / "REPORT.md"
134
+ env = Environment("opencode_report")
135
+ env.workspace(workspace, guest_path=str(workspace))
136
+
137
+ @env.initialize
138
+ async def seed() -> None:
139
+ workspace.mkdir(parents=True, exist_ok=True)
140
+ report.unlink(missing_ok=True)
141
+
142
+ @env.template()
143
+ async def write_report():
144
+ yield "Write PASS to REPORT.md."
145
+ yield 1.0 if report.exists() and report.read_text().strip() == "PASS" else 0.0
146
+
147
+ model_client = _FakeOpenAI(
148
+ [
149
+ _chat_response(
150
+ "",
151
+ [_tool_call("write", json.dumps({"filePath": str(report), "content": "PASS"}))],
152
+ ),
153
+ _chat_response("done"),
154
+ ]
155
+ )
156
+ agent = OpenAIChatAgent(
157
+ OpenAIChatConfig(model="qwen3.6-plus", model_client=model_client, max_steps=4)
158
+ )
159
+
160
+ run = await rollout(
161
+ Task(env="opencode_report", id="write_report"),
162
+ agent,
163
+ runtime=lambda _task: _local(env),
164
+ )
165
+
166
+ assert run.reward == 1.0
167
+ assert report.read_text() == "PASS"
168
+ tools = model_client.chat.completions.requests[0]["extra_body"]["tools"]
169
+ assert [tool["function"]["name"] for tool in tools] == [
170
+ "bash",
171
+ "read",
172
+ "glob",
173
+ "grep",
174
+ "edit",
175
+ "write",
176
+ ]
177
+
178
+
89
179
  async def test_mid_run_failure_keeps_the_real_run_and_its_evidence(env_file: Path) -> None:
90
180
  def boom(prompt: str) -> str:
91
181
  raise RuntimeError("agent exploded")
@@ -148,3 +148,15 @@ def test_task_upload_payload_includes_runtime_config() -> None:
148
148
  payload = task_upload_payload(task)
149
149
 
150
150
  assert payload["runtime_config"] == {"image": "img:tag"}
151
+
152
+
153
+ def test_task_upload_payload_preserves_runtime_config_null_override() -> None:
154
+ task = Task(
155
+ env="e",
156
+ id="solve",
157
+ runtime_config=RuntimeConfig(resources=None),
158
+ )
159
+
160
+ payload = task_upload_payload(task)
161
+
162
+ assert payload["runtime_config"] == {"resources": None}
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.6.6"
7
+ __version__ = "0.6.8.dev0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.6.6
3
+ Version: 0.6.8.dev0
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -87,7 +87,7 @@ Description-Content-Type: text/markdown
87
87
 
88
88
  HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
89
89
 
90
- To learn more, see the [documentation](https://docs.hud.ai) and [API reference](https://docs.hud.ai/reference/environment).
90
+ To learn more, see the [documentation](https://docs.hud.ai) and [environment reference](https://docs.hud.ai/v6/core/environment).
91
91
 
92
92
  [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
93
93
  [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
@@ -120,7 +120,7 @@ Then scaffold your first environment:
120
120
  hud init my-env
121
121
  ```
122
122
 
123
- ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
123
+ ![Agent running on SheetBench](docs/src/images/trace_sheet.gif)
124
124
 
125
125
  ## The protocol
126
126
 
@@ -159,14 +159,14 @@ hud eval my-taskset --remote
159
159
  For local iteration, the same protocol works against a container on your laptop:
160
160
 
161
161
  ```bash
162
- hud build .
163
- docker run -d --name run1 my-env
164
- docker exec run1 hud task start fix_bug
165
- docker exec run1 hud task grade fix_bug --answer ""
162
+ docker build -f Dockerfile.hud -t my-env .
163
+ docker run -d --name run1 -p 8765:8765 my-env
164
+ hud task start fix_bug --url tcp://127.0.0.1:8765
165
+ hud task grade fix_bug --url tcp://127.0.0.1:8765 --answer "..."
166
166
  docker rm -f run1
167
167
  ```
168
168
 
169
- → [Package & deploy](https://docs.hud.ai/run/deploy)
169
+ → [Run & deploy](https://docs.hud.ai/v6/core/runtime)
170
170
 
171
171
  ## Environments & templates
172
172
 
@@ -193,7 +193,7 @@ hud eval tasks.py claude --group 3
193
193
 
194
194
  Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
195
195
 
196
- → [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/reference/tasks)
196
+ → [Quickstart](https://docs.hud.ai/v6/start/quickstart) · [Tasks & tasksets](https://docs.hud.ai/v6/core/tasks)
197
197
 
198
198
  ## Capabilities & harnesses
199
199
 
@@ -211,39 +211,42 @@ A **capability** is a connection the environment exposes; a **harness** attaches
211
211
 
212
212
  **Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
213
213
 
214
- → [Capabilities](https://docs.hud.ai/reference/capabilities) · [Models](https://docs.hud.ai/run/models) · [Robots](https://docs.hud.ai/reference/robots)
214
+ → [Capabilities](https://docs.hud.ai/v6/core/capabilities) · [Models](https://docs.hud.ai/v6/core/agents) · [Robots](https://docs.hud.ai/v6/advanced/robots)
215
215
 
216
216
  ## Deploy on the platform
217
217
 
218
218
  From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
219
219
 
220
- → [Deploy](https://docs.hud.ai/run/deploy) · [Leaderboards](https://hud.ai/leaderboards)
220
+ → [Run & deploy](https://docs.hud.ai/v6/core/runtime)
221
221
 
222
222
  ## Train on rewards
223
223
 
224
- Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and turn the rewards into GRPO advantages with `group_relative()`:
224
+ Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and pass the graded runs to `TrainingClient.step()`:
225
225
 
226
226
  ```python
227
+ from hud import TrainingClient
227
228
  from hud.agents import create_agent
228
- from hud.eval import Taskset, group_relative
229
+ from hud.eval import Job
229
230
 
230
- agent = create_agent("claude-sonnet-4-5")
231
- job = await Taskset(count_letter(word=w) for w in words).run(agent, group=16)
232
- for runs in job.results.values():
233
- advantages = group_relative([r.reward for r in runs], normalize_std=True)
234
- ... # feed (run.trace_id, adv) into your optimizer
231
+ agent = create_agent("arith-rl", completion_kwargs={"extra_body": {"return_token_ids": True}})
232
+ trainer = TrainingClient("arith-rl")
233
+ taskset, runtime = ... # your Taskset and where rollouts run
234
+
235
+ session = await Job.start("arith-rl", group=8)
236
+ start = len(session.runs)
237
+ await taskset.run(agent, runtime=runtime, group=8, job=session)
238
+ await trainer.step(session.runs[start:], learning_rate=1e-5, group_size=8)
235
239
  ```
236
240
 
237
241
  HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
238
242
 
239
- → [Training](https://docs.hud.ai/run/training) · [Designing tasks for signal](https://docs.hud.ai/run/signal)
243
+ → [Training](https://docs.hud.ai/v6/core/training) · [Designing tasks for signal](https://docs.hud.ai/v6/core/advice)
240
244
 
241
245
  ## Links
242
246
 
243
247
  - [Documentation](https://docs.hud.ai)
244
- - [Quickstart](https://docs.hud.ai/quickstart)
245
- - [CLI reference](https://docs.hud.ai/reference/cli)
246
- - [Leaderboards](https://hud.ai/leaderboards)
248
+ - [Quickstart](https://docs.hud.ai/v6/start/quickstart)
249
+ - [CLI reference](https://docs.hud.ai/v6/core/cli)
247
250
  - [Environment templates](https://hud.ai/environments)
248
251
  - [Supported models](https://hud.ai/models)
249
252
  - [Discord](https://discord.gg/wkjtmHYYjm)
@@ -268,8 +271,8 @@ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabil
268
271
 
269
272
  ```bibtex
270
273
  @software{hud2025agentevalplatform,
271
- author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
272
- title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
274
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep Chawla and Nguyen Nhat Minh},
275
+ title = {HUD: An Evaluation and RL Environments Platform for Agents},
273
276
  date = {2025-04},
274
277
  url = {https://github.com/hud-evals/hud-python},
275
278
  langid = {en}
@@ -5,7 +5,7 @@ hud/conftest.py,sha256=HKbHvmFXLPX6KFSJgPFUAM22auclNNdFmHGwilNzg98,1012
5
5
  hud/server.py,sha256=NtSHIjBFr9lYvryfXrCa-VhwqnwkRy7n5fp_OuNhNOI,1235
6
6
  hud/settings.py,sha256=eyvMIOOlFk6kIAP8UsHEeoqf_UiOVhb1jhRCM2qv7b8,6393
7
7
  hud/types.py,sha256=kFVbQ-CcVhYpdX5jjgacRIppFS0q_nMXahijV_Hhl58,15022
8
- hud/version.py,sha256=ckVAuEx_EefCJWiVftVuNFTXEg3EOXe9V_teExAAm34,104
8
+ hud/version.py,sha256=RD_T-I7Yj0KBuadVj2UQF2XmPhTeHn3Lo45gIQTb5e4,109
9
9
  hud/agents/__init__.py,sha256=UL1PXucnY1Ln9o_Xf0Y-mvfbNh6NUdMyPJp-_d9Wq7Q,5082
10
10
  hud/agents/base.py,sha256=WgEOWUmMioXTxYe6cOvbqnbM4n989Z9kFEZIN6xJ3pU,659
11
11
  hud/agents/tool_agent.py,sha256=a0xsh2d8IwvmiPGMs9LCzghi61FHt4vMK_9sW8eNFbA,12557
@@ -54,10 +54,10 @@ hud/agents/openai/tools/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
54
54
  hud/agents/openai/tools/tests/test_computer.py,sha256=qEK7h2eD4j6Wg6VjU_YD8kCRpXOXwHDXBv1bz0mh5bo,3488
55
55
  hud/agents/openai/tools/tests/test_strict_schema.py,sha256=8dGkCSO7_-TvryEfStKZ7nKEuO3WGLfzsjPUbfdHMhQ,2344
56
56
  hud/agents/openai_compatible/__init__.py,sha256=zQZSQHB97g3rtPx4Y8aG_0K1i17MLwGRaTyQLd31Jqk,98
57
- hud/agents/openai_compatible/agent.py,sha256=YjtQkrlgekhyGRhUoxwkJZqJNaHKKCgUKxU7gnRc2hY,9880
58
- hud/agents/openai_compatible/tools/__init__.py,sha256=H5zBQbEfT2z1fMs3yRdVVYa5oZ2ejhYnxWLJTH3gx08,307
57
+ hud/agents/openai_compatible/agent.py,sha256=7Zw6wa1ce7kt_xF4R_OfuoDbMPl09TktpjgFc16-_Lo,9946
58
+ hud/agents/openai_compatible/tools/__init__.py,sha256=kOPtrgiqTcnQabZpo1aNfYfnaqip6M3z2OeffJNz-Ak,361
59
59
  hud/agents/openai_compatible/tools/base.py,sha256=Jl6Bm9ZgEOqgdOnM7Xm66VN3RpfjeZF9w55of_ZGCMI,5760
60
- hud/agents/openai_compatible/tools/filesystem.py,sha256=QXJW0-7lYZXbcftxUC7LxXChx-17clsjoRfHJg2DFBA,4905
60
+ hud/agents/openai_compatible/tools/filesystem.py,sha256=hHSVW25OT_zxdJO6fE2kOPnnABOd06kHPWC08epoCNg,12523
61
61
  hud/agents/openai_compatible/tools/mcp_proxy.py,sha256=pfJdCvFxTaXkj6qrGK04jxibjeIhm6O-5STHPcB_qL4,844
62
62
  hud/agents/robot/__init__.py,sha256=UXyQYaoLMrxFr1QYU2D6UUz6BwK9gsp4-abe5jAOqUU,1620
63
63
  hud/agents/robot/_types.py,sha256=byWZMYRwLuzvu2U-ZXMx3TcyRTPcsjGF5HkItbgfcQ4,222
@@ -75,7 +75,7 @@ hud/agents/tests/test_claude_sdk_agent.py,sha256=lSY8wnLQgfJBNzF9BU-PcO4IrKaWtva
75
75
  hud/agents/tests/test_gemini_agent.py,sha256=7OdFFVSOkJE8Gb3blptWnEXuFWHuFCNlFAoMXTyV0Ec,4835
76
76
  hud/agents/tests/test_openai_agent.py,sha256=-69hoi_Bv9JdGngEnaJ74mSH-JCupg66ny7hODXQF00,4180
77
77
  hud/agents/tests/test_openai_compatible_agent.py,sha256=6JxFxkRdPT1O574VYvcsMXiUwhcvBFJQLBx46Utt4QI,2874
78
- hud/agents/tests/test_provider_native_tools.py,sha256=WjXV2dVNBG1ite6-aigzortgQIar9GMlZrMAE1_guVs,8381
78
+ hud/agents/tests/test_provider_native_tools.py,sha256=dZ4dOT3sUkMh_7p-pGDnTIL7UDdwngNJ8jarlqU0Plk,12989
79
79
  hud/agents/tests/test_tool_agent.py,sha256=w8cuBAMcGBbIwiMnjH-tg4ztqhlewQOnXK3h1XLkj5o,5373
80
80
  hud/agents/tests/test_trace.py,sha256=rUNbV-y4gI0dH0xluT9COY_epJD69XHAzaC1HO4mX10,4517
81
81
  hud/agents/tools/__init__.py,sha256=-fnzzq8qwEXWD8s-T8RUGamuYndXTESeFNNMQxsXH5A,858
@@ -96,7 +96,7 @@ hud/cli/__init__.py,sha256=i_R0izloGfZfDa7gXmxxfqfSOMmZR-qUp4wMtD9ulZY,5936
96
96
  hud/cli/__main__.py,sha256=fDH7XITyuDITwSDIVwRso06aouADO0CzTHKqp5TOwJE,143
97
97
  hud/cli/cancel.py,sha256=MHRdZ6IoFy3iac9NtTT_c91IB_iQbYISsCb7ph7u298,3785
98
98
  hud/cli/client.py,sha256=cC23TUTo7w4S63jtk8SWkFWzG9h8Z2AKwYEqaefBWqg,2946
99
- hud/cli/deploy.py,sha256=QytQKmGY60YKNnQnhv0gXjMl9BVdwsvMRq0yZypVgpY,26082
99
+ hud/cli/deploy.py,sha256=Uh-Sfp-fCezV5U7yNleOUBVEfzoVhIvxMVEb8b9le1U,27659
100
100
  hud/cli/eval.py,sha256=-pNxIdQWqzFraRhtpbk9Z0kXYVn3Ia6qBidHJ0KbLqw,35596
101
101
  hud/cli/init.py,sha256=FmvQ2hPhkKDkyr8krxuGPYDFBzeeLlMqMnkxfNo9nQ4,8258
102
102
  hud/cli/jobs.py,sha256=FtLRys2UwccovvQju37sXNTllOQnSJ4rWhpkeTbkjv4,4884
@@ -112,7 +112,7 @@ hud/cli/tests/__init__.py,sha256=ZrGVkmH7DHXGqOvjOSNGZeMYaFIRB2K8c6hwr8FPJ-8,68
112
112
  hud/cli/tests/test_cli_init.py,sha256=e4pHfw4jLt2TY5n_fSTfA7XRcMsu1hKxOK7PNfVRO7U,3002
113
113
  hud/cli/tests/test_cli_main.py,sha256=0wMho9p9NcGjp0jLiUtCQh_FYdbMaCJtSY3sBbSgPwA,697
114
114
  hud/cli/tests/test_cli_more_wrappers.py,sha256=EEFrqTMg3yknQdmb8hWcHJAbinGAOGyqx3CGdiVX1b8,115
115
- hud/cli/tests/test_deploy.py,sha256=atjacSnfZGf2DVG82Hf1vWswweqDKl6OBxb_Gmddma0,13985
115
+ hud/cli/tests/test_deploy.py,sha256=vgEs66oYfsrbfDfrZgf1CuIPQ7f_JCo6RrPoPAKVd7I,17247
116
116
  hud/cli/tests/test_eval_bedrock.py,sha256=UBGakgIV4kzXUj0Jtbr3t05xBss66YPqruKsovUYyoo,1900
117
117
  hud/cli/tests/test_eval_config.py,sha256=12ZCqFW-4NCVqEU-GheD8M3AIgm9UZAE-dDnD3UXlWM,10386
118
118
  hud/cli/tests/test_init.py,sha256=GsUKhZ9p5Gmg_0J0D9j35M4Ks569_M2bWPkcIAEVEtg,3821
@@ -169,18 +169,18 @@ hud/eval/chat.py,sha256=Gu6aDv1UkDAZ8qs41TXLUrFlcX4cav4HHSa7Yyovifw,5853
169
169
  hud/eval/file_tracking.py,sha256=MqLNl_lYfp74KCr4ZbwW9yGHmIyJ6GeOgAN9NMhELgs,4278
170
170
  hud/eval/job.py,sha256=QMck-M_0HWtEd56hvfdYaNnStLlIOuNOGBStdcLr2vI,5606
171
171
  hud/eval/run.py,sha256=wsG7xvvr-gJ778xWVXDMOA6BNlnHH9OiFZTzy6AyORo,18039
172
- hud/eval/runtime.py,sha256=mt6z9ZgRK0mRBM7T2DlIuyIhIrr1eyxD8Lqboj7_rvQ,42895
173
- hud/eval/sync.py,sha256=i7kxOoLyX5t6Jf9AoT9IRiDVhAfvY2bPIuYyDdqtc-Y,6469
172
+ hud/eval/runtime.py,sha256=fcpGNH3UhKAyTwm_qBNv6dJCMv2eGAtVfmK8MZJuI3w,42984
173
+ hud/eval/sync.py,sha256=dptdsSZvtC0O5Qyk4VQir1PuYVFDxw2DMZ-Q9wCdah0,6445
174
174
  hud/eval/task.py,sha256=9xQ_x4AjY26M-sVRH6HhDZpKeofBw1hd8LZ0csBy53k,4617
175
175
  hud/eval/taskset.py,sha256=9KVYxoeMnMHa2syR4fRGh9r1IuKoDVyMe7WEUFvaGxQ,12086
176
176
  hud/eval/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
177
177
  hud/eval/tests/test_chat.py,sha256=yQt_dM1UJu-Cw8JS8di102h06ee7QI9CqNA27U66_Qg,4310
178
178
  hud/eval/tests/test_docker_provider.py,sha256=1W1xyOzjHti6jfV2eiVnNd5CxKEMAKq8NBgBy-m5ftA,23741
179
179
  hud/eval/tests/test_file_tracking_observer.py,sha256=DteazLLWK0LKgtUn_6v4_wMI-1jhENMx7Y87-pdg-I8,4197
180
- hud/eval/tests/test_hosted.py,sha256=Kv5XoxWQ_8DLgMysu5XaZU8uIuVr6hXuYhmYopduGT4,15664
180
+ hud/eval/tests/test_hosted.py,sha256=S0gGqAUaizlCGC30XwvaWb-TJhFgLUPlwsMO0WgjVWM,16284
181
181
  hud/eval/tests/test_job.py,sha256=UyaqbOY-0pnd2RNIp3glS_L_JJFT0-7GlSkgRhgaU1A,1867
182
- hud/eval/tests/test_rollout.py,sha256=tsMx9gFRRc0Afzs68wOf-G0_zGYkN4zYZ4KUNU0c8bk,11232
183
- hud/eval/tests/test_sync.py,sha256=rRLJrEn_0OEhkAxgShpfmjiVBeADDwqgb_yOSZWxj80,4937
182
+ hud/eval/tests/test_rollout.py,sha256=YUVqzDbIg9Y5LNnDwaNJ40hOL1BVAFgpHRHCyGlcfQw,14027
183
+ hud/eval/tests/test_sync.py,sha256=1gFC65ZiZojeSn9q1v-RMK2Ps130mlh-aXE7G8sn54k,5234
184
184
  hud/eval/tests/test_task.py,sha256=n0E3B3TBYV6aM2_KFVGPHuD9nBGlpwq4ZvBu9wpjqtU,9754
185
185
  hud/graders/__init__.py,sha256=eccF8MXHQBvmynULljOCEMn82YSK0HSScD1TlS8UoT4,1570
186
186
  hud/graders/base.py,sha256=fDGrdWfMyu4cvoNkWwDPtN5oRchLFqhqcZPO50jJtA8,1428
@@ -226,8 +226,8 @@ hud/utils/tests/test_platform.py,sha256=mwhyFkUBvgmHRc43vQ_JgAAW2N9fIaxkQhVo-GB4
226
226
  hud/utils/tests/test_requests.py,sha256=ENK6P5xLTuSgWDcCau4zCj_5zPV_EooGwU4P8YYl5Gw,9109
227
227
  hud/utils/tests/test_serialization.py,sha256=GY4NiFUJtwLSYQWA0n1zme-Ul4DnBLByHCOOkxn2kLM,819
228
228
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
- hud_python-0.6.6.dist-info/METADATA,sha256=fw-skaVneGXagTR9XbMdir9TPlB9hW0Hd4rg-XdHEis,12344
230
- hud_python-0.6.6.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
231
- hud_python-0.6.6.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
232
- hud_python-0.6.6.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
233
- hud_python-0.6.6.dist-info/RECORD,,
229
+ hud_python-0.6.8.dev0.dist-info/METADATA,sha256=k0BA7OmInHsM-CB-pm5GNc0yYVwF7EbUr0etmiU-xXg,12427
230
+ hud_python-0.6.8.dev0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
231
+ hud_python-0.6.8.dev0.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
232
+ hud_python-0.6.8.dev0.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
233
+ hud_python-0.6.8.dev0.dist-info/RECORD,,