hud-python 0.6.7__py3-none-any.whl → 0.6.8.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,11 +17,13 @@ from hud.types import MCPToolCall, MCPToolResult
17
17
  from hud.utils import gateway
18
18
 
19
19
  from .tools import (
20
+ BashTool,
21
+ EditTool,
20
22
  GlobTool,
21
23
  GrepTool,
22
- ListTool,
23
24
  OpenAICompatibleMCPProxyTool,
24
25
  ReadTool,
26
+ WriteTool,
25
27
  )
26
28
  from .tools.base import format_chat_result
27
29
 
@@ -41,10 +43,12 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
41
43
  """OpenAI-compatible agent using the chat.completions protocol."""
42
44
 
43
45
  tool_catalog = (
46
+ BashTool,
44
47
  ReadTool,
45
- GrepTool,
46
48
  GlobTool,
47
- ListTool,
49
+ GrepTool,
50
+ EditTool,
51
+ WriteTool,
48
52
  OpenAICompatibleMCPProxyTool,
49
53
  )
50
54
 
@@ -2,13 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from .filesystem import GlobTool, GrepTool, ListTool, ReadTool
5
+ from .filesystem import BashTool, EditTool, GlobTool, GrepTool, ReadTool, WriteTool
6
6
  from .mcp_proxy import OpenAICompatibleMCPProxyTool
7
7
 
8
8
  __all__ = [
9
+ "BashTool",
10
+ "EditTool",
9
11
  "GlobTool",
10
12
  "GrepTool",
11
- "ListTool",
12
13
  "OpenAICompatibleMCPProxyTool",
13
14
  "ReadTool",
15
+ "WriteTool",
14
16
  ]
@@ -1,16 +1,20 @@
1
- """OpenAI-compatible filesystem tools backed by SSHClient."""
1
+ """OpenAI-compatible OpenCode-style workspace tools backed by SSHClient."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import math
6
+ import posixpath
5
7
  import shlex
6
8
  from typing import Any, ClassVar
7
9
 
8
10
  import mcp.types as mcp_types
9
11
 
10
12
  from hud.agents.tools import SSHTool
11
- from hud.agents.tools.base import AgentToolSpec, result_text
13
+ from hud.agents.tools.base import AgentToolSpec, result_text, tool_err
12
14
  from hud.types import MCPToolResult
13
15
 
16
+ DEFAULT_READ_LIMIT = 2000
17
+
14
18
 
15
19
  class _FilesystemTool(SSHTool):
16
20
  description: ClassVar[str]
@@ -34,16 +38,26 @@ class _FilesystemTool(SSHTool):
34
38
 
35
39
  class ReadTool(_FilesystemTool):
36
40
  name = "read"
37
- description = "Reads a file from the local filesystem. Use offset and limit for pagination."
41
+ description = (
42
+ "Reads a file or directory from the workspace. Use offset and limit for pagination."
43
+ )
38
44
  parameters: ClassVar[dict[str, Any]] = {
39
45
  "type": "object",
40
46
  "properties": {
41
- "filePath": {"type": "string", "description": "Absolute path to the file to read."},
47
+ "filePath": {
48
+ "type": "string",
49
+ "description": "The absolute path to the file or directory to read.",
50
+ },
42
51
  "offset": {
43
52
  "type": "integer",
44
- "description": "0-based line offset to start reading from.",
53
+ "description": "The line number to start reading from (1-indexed).",
54
+ "minimum": 0,
55
+ },
56
+ "limit": {
57
+ "type": "integer",
58
+ "description": "The maximum number of lines to read (defaults to 2000).",
59
+ "minimum": 1,
45
60
  },
46
- "limit": {"type": "integer", "description": "Maximum number of lines to read."},
47
61
  },
48
62
  "required": ["filePath"],
49
63
  }
@@ -52,19 +66,205 @@ class ReadTool(_FilesystemTool):
52
66
  path = arguments.get("filePath")
53
67
  if not isinstance(path, str) or not path:
54
68
  raise ValueError("filePath is required")
69
+ offset = _read_offset(arguments.get("offset"))
70
+ limit = _positive_int(arguments.get("limit"), default=DEFAULT_READ_LIMIT, name="limit")
71
+ if not (await self.bash(f"test -d {shlex.quote(path)}")).isError:
72
+ return await self._read_directory(path, offset=offset, limit=limit)
55
73
  result = await self.file_read(path)
56
74
  if result.isError:
57
75
  return result
58
- offset = arguments.get("offset")
59
- limit = arguments.get("limit")
60
- if isinstance(offset, int) and offset >= 0:
61
- lines = result_text(result).splitlines(keepends=True)
62
- end = offset + limit if isinstance(limit, int) and limit > 0 else len(lines)
63
- sliced = lines[offset:end]
64
- return MCPToolResult(
65
- content=[mcp_types.TextContent(type="text", text="".join(sliced))],
76
+ text = result_text(result)
77
+ lines = text.splitlines()
78
+ start = offset - 1
79
+ if start > len(lines) and not (len(lines) == 0 and offset == 1):
80
+ return tool_err(f"Offset {offset} is out of range for this file ({len(lines)} lines)")
81
+ sliced = lines[start : start + limit]
82
+ last = offset + len(sliced) - 1
83
+ more = last < len(lines)
84
+ body = [
85
+ f"<path>{path}</path>",
86
+ "<type>file</type>",
87
+ "<content>",
88
+ *[f"{i + offset}: {line}" for i, line in enumerate(sliced)],
89
+ ]
90
+ if more:
91
+ body.append(
92
+ f"\n(Showing lines {offset}-{last} of {len(lines)}. "
93
+ f"Use offset={last + 1} to continue.)"
94
+ )
95
+ else:
96
+ body.append(f"\n(End of file - total {len(lines)} lines)")
97
+ body.append("</content>")
98
+ return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
99
+
100
+ async def _read_directory(self, path: str, *, offset: int, limit: int) -> MCPToolResult:
101
+ result = await self.file_list(path)
102
+ if result.isError:
103
+ return result
104
+ entries = result_text(result).splitlines()
105
+ if entries == ["(empty)"]:
106
+ entries = []
107
+ start = offset - 1
108
+ sliced = entries[start : start + limit]
109
+ truncated = start + len(sliced) < len(entries)
110
+ body = [
111
+ f"<path>{path}</path>",
112
+ "<type>directory</type>",
113
+ "<entries>",
114
+ *sliced,
115
+ ]
116
+ if truncated:
117
+ body.append(
118
+ f"\n(Showing {len(sliced)} of {len(entries)} entries. "
119
+ f"Use offset={offset + len(sliced)} to continue.)"
66
120
  )
67
- return result
121
+ else:
122
+ body.append(f"\n({len(entries)} entries)")
123
+ body.append("</entries>")
124
+ return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
125
+
126
+
127
+ class BashTool(_FilesystemTool):
128
+ name = "bash"
129
+ description = (
130
+ "Executes a shell command in the workspace. Prefer read, grep, glob, edit, "
131
+ "and write for filesystem operations."
132
+ )
133
+ parameters: ClassVar[dict[str, Any]] = {
134
+ "type": "object",
135
+ "properties": {
136
+ "command": {"type": "string", "description": "The command to execute."},
137
+ "timeout": {
138
+ "type": "integer",
139
+ "description": "Optional timeout in milliseconds.",
140
+ "minimum": 1,
141
+ },
142
+ "workdir": {
143
+ "type": "string",
144
+ "description": "The working directory to run the command in.",
145
+ },
146
+ },
147
+ "required": ["command"],
148
+ }
149
+
150
+ async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
151
+ command = arguments.get("command")
152
+ if not isinstance(command, str) or not command:
153
+ raise ValueError("command is required")
154
+ timeout = arguments.get("timeout")
155
+ if timeout is not None:
156
+ if not isinstance(timeout, int) or timeout < 1:
157
+ raise ValueError("timeout must be a positive integer")
158
+ seconds = max(1, math.ceil(timeout / 1000))
159
+ command = f"timeout {seconds}s bash -lc {shlex.quote(command)}"
160
+ workdir = arguments.get("workdir")
161
+ if isinstance(workdir, str) and workdir:
162
+ command = f"cd {shlex.quote(workdir)} && {command}"
163
+ return await self.bash(command)
164
+
165
+
166
+ class EditTool(_FilesystemTool):
167
+ name = "edit"
168
+ description = (
169
+ "Replaces text within a file. Use oldString as exact literal context. "
170
+ "Set replaceAll to true to replace every occurrence."
171
+ )
172
+ parameters: ClassVar[dict[str, Any]] = {
173
+ "type": "object",
174
+ "properties": {
175
+ "filePath": {
176
+ "type": "string",
177
+ "description": "The absolute path to the file to modify.",
178
+ },
179
+ "oldString": {"type": "string", "description": "The text to replace."},
180
+ "newString": {
181
+ "type": "string",
182
+ "description": "The text to replace it with (must be different from oldString).",
183
+ },
184
+ "replaceAll": {
185
+ "type": "boolean",
186
+ "description": "Replace all occurrences of oldString (default false).",
187
+ },
188
+ },
189
+ "required": ["filePath", "oldString", "newString"],
190
+ }
191
+
192
+ async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
193
+ path = arguments.get("filePath")
194
+ if not isinstance(path, str) or not path:
195
+ raise ValueError("filePath is required")
196
+ old = arguments.get("oldString")
197
+ new = arguments.get("newString")
198
+ if not isinstance(old, str):
199
+ raise ValueError("oldString is required")
200
+ if not isinstance(new, str):
201
+ raise ValueError("newString is required")
202
+ if old == new:
203
+ return tool_err("No changes to apply: oldString and newString are identical.")
204
+ if old == "":
205
+ exists = not (await self.bash(f"test -e {shlex.quote(path)}")).isError
206
+ if exists:
207
+ return tool_err(
208
+ "oldString cannot be empty when editing an existing file. "
209
+ "Provide exact text to replace, or use write for full-file replacement."
210
+ )
211
+ mkdir = await self._ensure_parent(path)
212
+ if mkdir.isError:
213
+ return mkdir
214
+ return await self.file_write(path, new)
215
+
216
+ existing = await self.file_read(path)
217
+ if existing.isError:
218
+ return existing
219
+ text = result_text(existing)
220
+ count = text.count(old)
221
+ if count == 0:
222
+ return tool_err(f"oldString not found in {path}")
223
+ replace_all = arguments.get("replaceAll") is True
224
+ if count > 1 and not replace_all:
225
+ return tool_err(f"oldString matches {count} times in {path}; set replaceAll to true")
226
+ next_text = text.replace(old, new) if replace_all else text.replace(old, new, 1)
227
+ return await self.file_write(path, next_text)
228
+
229
+ async def _ensure_parent(self, path: str) -> MCPToolResult:
230
+ parent = posixpath.dirname(path)
231
+ if not parent or parent in {".", "/"}:
232
+ return MCPToolResult(content=[])
233
+ return await self.bash(f"mkdir -p {shlex.quote(parent)}")
234
+
235
+
236
+ class WriteTool(_FilesystemTool):
237
+ name = "write"
238
+ description = "Creates or overwrites a file with the provided content."
239
+ parameters: ClassVar[dict[str, Any]] = {
240
+ "type": "object",
241
+ "properties": {
242
+ "content": {"type": "string", "description": "The content to write to the file."},
243
+ "filePath": {
244
+ "type": "string",
245
+ "description": "The absolute path to the file to write.",
246
+ },
247
+ },
248
+ "required": ["content", "filePath"],
249
+ }
250
+
251
+ async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
252
+ path = arguments.get("filePath")
253
+ if not isinstance(path, str) or not path:
254
+ raise ValueError("filePath is required")
255
+ content = arguments.get("content")
256
+ if not isinstance(content, str):
257
+ raise ValueError("content is required")
258
+ mkdir = await self._ensure_parent(path)
259
+ if mkdir.isError:
260
+ return mkdir
261
+ return await self.file_write(path, content)
262
+
263
+ async def _ensure_parent(self, path: str) -> MCPToolResult:
264
+ parent = posixpath.dirname(path)
265
+ if not parent or parent in {".", "/"}:
266
+ return MCPToolResult(content=[])
267
+ return await self.bash(f"mkdir -p {shlex.quote(parent)}")
68
268
 
69
269
 
70
270
  class GrepTool(_FilesystemTool):
@@ -115,24 +315,18 @@ class GlobTool(_FilesystemTool):
115
315
  return await self.bash(f"find {shlex.quote(str(path))} -name {shlex.quote(pattern)}")
116
316
 
117
317
 
118
- class ListTool(_FilesystemTool):
119
- name = "list"
120
- description = "Lists files and directories in a given path."
121
- parameters: ClassVar[dict[str, Any]] = {
122
- "type": "object",
123
- "properties": {
124
- "path": {"type": "string", "description": "Directory to list."},
125
- "ignore": {
126
- "type": "array",
127
- "items": {"type": "string"},
128
- "description": "Glob patterns to ignore.",
129
- },
130
- },
131
- }
318
+ def _positive_int(value: Any, *, default: int, name: str) -> int:
319
+ if value is None:
320
+ return default
321
+ if not isinstance(value, int) or value < 1:
322
+ raise ValueError(f"{name} must be a positive integer")
323
+ return value
132
324
 
133
- async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
134
- path = arguments.get("path") or "."
135
- return await self.file_list(str(path))
325
+
326
+ def _read_offset(value: Any) -> int:
327
+ if value is None or value == 0:
328
+ return 1
329
+ return _positive_int(value, default=1, name="offset")
136
330
 
137
331
 
138
- __all__ = ["GlobTool", "GrepTool", "ListTool", "ReadTool"]
332
+ __all__ = ["BashTool", "EditTool", "GlobTool", "GrepTool", "ReadTool", "WriteTool"]
@@ -7,16 +7,19 @@ client and assert the command translation + result shape, fully offline.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from typing import TYPE_CHECKING, Any, cast
10
+ import shlex
11
+ from typing import Any, cast
11
12
 
12
13
  import pytest
13
14
 
14
15
  from hud.agents.claude.tools.coding import ClaudeBashTool, ClaudeTextEditorTool
15
16
  from hud.agents.gemini.tools.coding import GeminiEditTool, GeminiShellTool
16
17
  from hud.agents.openai.tools.coding import OpenAIShellTool
17
-
18
- if TYPE_CHECKING:
19
- from hud.capabilities import SSHClient
18
+ from hud.agents.openai_compatible.agent import OpenAIChatAgent
19
+ from hud.agents.openai_compatible.tools import BashTool, EditTool, ReadTool, WriteTool
20
+ from hud.agents.tools.base import result_text
21
+ from hud.agents.types import OpenAIChatConfig
22
+ from hud.capabilities import Capability, SSHClient
20
23
 
21
24
 
22
25
  class _Completed:
@@ -61,6 +64,21 @@ class _FakeSFTP:
61
64
  def open(self, path: str, mode: str) -> _FakeOpenFile:
62
65
  return _FakeOpenFile(self._store, path, mode)
63
66
 
67
+ async def listdir(self, path: str) -> list[str]:
68
+ prefix = path.rstrip("/")
69
+ if not prefix:
70
+ prefix = "/"
71
+ if prefix != "/":
72
+ prefix += "/"
73
+ names: set[str] = set()
74
+ for file_path in self._store:
75
+ if not file_path.startswith(prefix):
76
+ continue
77
+ rest = file_path[len(prefix) :]
78
+ if rest:
79
+ names.add(rest.split("/", 1)[0])
80
+ return sorted(names)
81
+
64
82
 
65
83
  class _Conn:
66
84
  def __init__(self, completed: _Completed, store: dict[str, bytes]) -> None:
@@ -70,13 +88,26 @@ class _Conn:
70
88
 
71
89
  async def run(self, command: str, check: bool = False) -> _Completed:
72
90
  self.commands.append(command)
91
+ parts = shlex.split(command)
92
+ if len(parts) == 3 and parts[:2] in (["test", "-d"], ["test", "-e"]):
93
+ path = parts[2]
94
+ exists = path in self._store or any(
95
+ file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
96
+ )
97
+ if parts[1] == "-d":
98
+ exists = any(
99
+ file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
100
+ )
101
+ return _Completed(exit_status=0 if exists else 1)
102
+ if len(parts) >= 3 and parts[:2] == ["mkdir", "-p"]:
103
+ return _Completed(exit_status=0)
73
104
  return self._completed
74
105
 
75
106
  def start_sftp_client(self) -> _FakeSFTP:
76
107
  return _FakeSFTP(self._store)
77
108
 
78
109
 
79
- class _FakeSSH:
110
+ class _FakeSSH(SSHClient):
80
111
  """Duck-typed ``SSHClient``: ``conn.run`` (bash) + ``conn.start_sftp_client`` (files)."""
81
112
 
82
113
  def __init__(
@@ -87,7 +118,10 @@ class _FakeSSH:
87
118
  files: dict[str, bytes] | None = None,
88
119
  ) -> None:
89
120
  self.files: dict[str, bytes] = files or {}
90
- self.conn = _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)
121
+ super().__init__(
122
+ Capability(name="shell", protocol="ssh/2", url="ssh://localhost:22"),
123
+ cast("Any", _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)),
124
+ )
91
125
 
92
126
 
93
127
  def _ssh(**kwargs: Any) -> SSHClient:
@@ -98,6 +132,11 @@ def _commands(tool: Any) -> list[str]:
98
132
  return tool.client.conn.commands
99
133
 
100
134
 
135
+ class _OpenAIChatAgentForTest(OpenAIChatAgent):
136
+ async def build_tools_for_test(self, ssh: SSHClient) -> tuple[dict[str, Any], list[Any]]:
137
+ return await self._build_tools({"ssh": ssh})
138
+
139
+
101
140
  # ─── OpenAI shell ─────────────────────────────────────────────────────
102
141
 
103
142
 
@@ -135,6 +174,96 @@ def test_openai_shell_to_params_is_shell_type() -> None:
135
174
  assert tool.to_params()["type"] == "shell"
136
175
 
137
176
 
177
+ # ─── OpenAI-compatible OpenCode workspace tools ───────────────────────
178
+
179
+
180
+ async def test_openai_compatible_catalog_matches_opencode_workspace_tools() -> None:
181
+ agent = _OpenAIChatAgentForTest(
182
+ OpenAIChatConfig(model="qwen3.6-plus", model_client=cast("Any", object()))
183
+ )
184
+
185
+ tools, params = await agent.build_tools_for_test(_ssh())
186
+
187
+ assert list(tools) == ["bash", "read", "glob", "grep", "edit", "write"]
188
+ assert [param["function"]["name"] for param in params] == [
189
+ "bash",
190
+ "read",
191
+ "glob",
192
+ "grep",
193
+ "edit",
194
+ "write",
195
+ ]
196
+
197
+
198
+ async def test_openai_compatible_bash_uses_workdir_and_timeout() -> None:
199
+ tool = BashTool(spec=BashTool.default_spec("qwen"), client=_ssh())
200
+
201
+ await tool.execute({"command": "echo hi", "workdir": "/tmp/my dir", "timeout": 2500})
202
+
203
+ assert _commands(tool) == ["cd '/tmp/my dir' && timeout 3s bash -lc 'echo hi'"]
204
+
205
+
206
+ async def test_openai_compatible_write_stores_file_via_workspace_sftp() -> None:
207
+ ssh = _FakeSSH()
208
+ tool = WriteTool(spec=WriteTool.default_spec("qwen"), client=cast("SSHClient", ssh))
209
+
210
+ result = await tool.execute({"filePath": "/REPORT.md", "content": "done"})
211
+
212
+ assert result.isError is False
213
+ assert ssh.files["/REPORT.md"] == b"done"
214
+
215
+
216
+ async def test_openai_compatible_edit_rewrites_unique_match() -> None:
217
+ ssh = _FakeSSH(files={"/f.txt": b"hello old world"})
218
+ tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
219
+
220
+ result = await tool.execute(
221
+ {"filePath": "/f.txt", "oldString": "old", "newString": "new"},
222
+ )
223
+
224
+ assert result.isError is False
225
+ assert ssh.files["/f.txt"] == b"hello new world"
226
+
227
+
228
+ async def test_openai_compatible_edit_rejects_ambiguous_match() -> None:
229
+ ssh = _FakeSSH(files={"/f.txt": b"a a a"})
230
+ tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
231
+
232
+ result = await tool.execute(
233
+ {"filePath": "/f.txt", "oldString": "a", "newString": "b"},
234
+ )
235
+
236
+ assert result.isError is True
237
+ assert ssh.files["/f.txt"] == b"a a a"
238
+
239
+
240
+ async def test_openai_compatible_read_lists_directories() -> None:
241
+ tool = ReadTool(
242
+ spec=ReadTool.default_spec("qwen"),
243
+ client=_ssh(files={"/work/a.txt": b"a", "/work/nested/b.txt": b"b"}),
244
+ )
245
+
246
+ result = await tool.execute({"filePath": "/work"})
247
+
248
+ text = result_text(result)
249
+ assert "<type>directory</type>" in text
250
+ assert "a.txt" in text
251
+ assert "nested" in text
252
+
253
+
254
+ async def test_openai_compatible_read_accepts_zero_offset_for_first_page() -> None:
255
+ tool = ReadTool(
256
+ spec=ReadTool.default_spec("qwen"),
257
+ client=_ssh(files={"/f.txt": b"alpha\nbeta\n"}),
258
+ )
259
+
260
+ result = await tool.execute({"filePath": "/f.txt", "offset": 0, "limit": 1})
261
+
262
+ text = result_text(result)
263
+ assert "1: alpha" in text
264
+ assert "2: beta" not in text
265
+
266
+
138
267
  # ─── Gemini shell ─────────────────────────────────────────────────────
139
268
 
140
269
 
@@ -13,14 +13,18 @@ the atom and return a :class:`Job`.
13
13
  from __future__ import annotations
14
14
 
15
15
  import asyncio
16
+ import json
16
17
  import textwrap
17
18
  from contextlib import asynccontextmanager
19
+ from types import SimpleNamespace
18
20
  from typing import TYPE_CHECKING, Any
19
21
 
20
22
  import mcp.types as mcp_types
21
23
  import pytest
22
24
 
23
25
  from hud.agents.base import Agent
26
+ from hud.agents.openai_compatible import OpenAIChatAgent
27
+ from hud.agents.types import OpenAIChatConfig
24
28
  from hud.environment import Environment
25
29
  from hud.eval import Job, LocalRuntime, Task, Taskset
26
30
  from hud.eval.run import Run, rollout
@@ -63,6 +67,44 @@ class _FnAgent(Agent):
63
67
  run.trace.content = self._fn(run.prompt)
64
68
 
65
69
 
70
+ class _SequencedCompletions:
71
+ def __init__(self, responses: list[Any]) -> None:
72
+ self._responses = responses
73
+ self.requests: list[dict[str, Any]] = []
74
+
75
+ async def create(self, **kwargs: Any) -> Any:
76
+ self.requests.append(kwargs)
77
+ return self._responses.pop(0)
78
+
79
+
80
+ class _FakeOpenAI:
81
+ def __init__(self, responses: list[Any]) -> None:
82
+ self.chat = SimpleNamespace(completions=_SequencedCompletions(responses))
83
+
84
+
85
+ def _chat_response(content: str, tool_calls: list[Any] | None = None) -> Any:
86
+ message = SimpleNamespace(
87
+ content=content,
88
+ tool_calls=tool_calls or [],
89
+ refusal=None,
90
+ model_dump=lambda exclude_none=True: {"role": "assistant", "content": content},
91
+ )
92
+ choice = SimpleNamespace(message=message, finish_reason="stop", logprobs=None)
93
+ return SimpleNamespace(
94
+ choices=[choice],
95
+ model="fake-openai-compatible",
96
+ usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1, prompt_tokens_details=None),
97
+ )
98
+
99
+
100
+ def _tool_call(name: str, arguments: str) -> Any:
101
+ return SimpleNamespace(
102
+ type="function",
103
+ id=f"call_{name}",
104
+ function=SimpleNamespace(name=name, arguments=arguments),
105
+ )
106
+
107
+
66
108
  def _add_task(a: int, b: int) -> Task:
67
109
  """A pure data row; the env it names is defined by the spawned file."""
68
110
  return Task(env="sums", id="add", args={"a": a, "b": b})
@@ -86,6 +128,54 @@ async def test_rollout_returns_graded_run_with_trace_id(env_file: Path) -> None:
86
128
  assert run.runtime.startswith("tcp://127.0.0.1:")
87
129
 
88
130
 
131
+ async def test_openai_compatible_write_reaches_workspace_grader(tmp_path: Path) -> None:
132
+ workspace = tmp_path / "workspace"
133
+ report = workspace / "REPORT.md"
134
+ env = Environment("opencode_report")
135
+ env.workspace(workspace, guest_path=str(workspace))
136
+
137
+ @env.initialize
138
+ async def seed() -> None:
139
+ workspace.mkdir(parents=True, exist_ok=True)
140
+ report.unlink(missing_ok=True)
141
+
142
+ @env.template()
143
+ async def write_report():
144
+ yield "Write PASS to REPORT.md."
145
+ yield 1.0 if report.exists() and report.read_text().strip() == "PASS" else 0.0
146
+
147
+ model_client = _FakeOpenAI(
148
+ [
149
+ _chat_response(
150
+ "",
151
+ [_tool_call("write", json.dumps({"filePath": str(report), "content": "PASS"}))],
152
+ ),
153
+ _chat_response("done"),
154
+ ]
155
+ )
156
+ agent = OpenAIChatAgent(
157
+ OpenAIChatConfig(model="qwen3.6-plus", model_client=model_client, max_steps=4)
158
+ )
159
+
160
+ run = await rollout(
161
+ Task(env="opencode_report", id="write_report"),
162
+ agent,
163
+ runtime=lambda _task: _local(env),
164
+ )
165
+
166
+ assert run.reward == 1.0
167
+ assert report.read_text() == "PASS"
168
+ tools = model_client.chat.completions.requests[0]["extra_body"]["tools"]
169
+ assert [tool["function"]["name"] for tool in tools] == [
170
+ "bash",
171
+ "read",
172
+ "glob",
173
+ "grep",
174
+ "edit",
175
+ "write",
176
+ ]
177
+
178
+
89
179
  async def test_mid_run_failure_keeps_the_real_run_and_its_evidence(env_file: Path) -> None:
90
180
  def boom(prompt: str) -> str:
91
181
  raise RuntimeError("agent exploded")
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.6.7"
7
+ __version__ = "0.6.8.dev0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.6.7
3
+ Version: 0.6.8.dev0
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -87,7 +87,7 @@ Description-Content-Type: text/markdown
87
87
 
88
88
  HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
89
89
 
90
- To learn more, see the [documentation](https://docs.hud.ai) and [API reference](https://docs.hud.ai/reference/environment).
90
+ To learn more, see the [documentation](https://docs.hud.ai) and [environment reference](https://docs.hud.ai/v6/core/environment).
91
91
 
92
92
  [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
93
93
  [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
@@ -120,7 +120,7 @@ Then scaffold your first environment:
120
120
  hud init my-env
121
121
  ```
122
122
 
123
- ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
123
+ ![Agent running on SheetBench](docs/src/images/trace_sheet.gif)
124
124
 
125
125
  ## The protocol
126
126
 
@@ -159,14 +159,14 @@ hud eval my-taskset --remote
159
159
  For local iteration, the same protocol works against a container on your laptop:
160
160
 
161
161
  ```bash
162
- hud build .
163
- docker run -d --name run1 my-env
164
- docker exec run1 hud task start fix_bug
165
- docker exec run1 hud task grade fix_bug --answer ""
162
+ docker build -f Dockerfile.hud -t my-env .
163
+ docker run -d --name run1 -p 8765:8765 my-env
164
+ hud task start fix_bug --url tcp://127.0.0.1:8765
165
+ hud task grade fix_bug --url tcp://127.0.0.1:8765 --answer "..."
166
166
  docker rm -f run1
167
167
  ```
168
168
 
169
- → [Package & deploy](https://docs.hud.ai/run/deploy)
169
+ → [Run & deploy](https://docs.hud.ai/v6/core/runtime)
170
170
 
171
171
  ## Environments & templates
172
172
 
@@ -193,7 +193,7 @@ hud eval tasks.py claude --group 3
193
193
 
194
194
  Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
195
195
 
196
- → [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/reference/tasks)
196
+ → [Quickstart](https://docs.hud.ai/v6/start/quickstart) · [Tasks & tasksets](https://docs.hud.ai/v6/core/tasks)
197
197
 
198
198
  ## Capabilities & harnesses
199
199
 
@@ -211,39 +211,42 @@ A **capability** is a connection the environment exposes; a **harness** attaches
211
211
 
212
212
  **Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
213
213
 
214
- → [Capabilities](https://docs.hud.ai/reference/capabilities) · [Models](https://docs.hud.ai/run/models) · [Robots](https://docs.hud.ai/reference/robots)
214
+ → [Capabilities](https://docs.hud.ai/v6/core/capabilities) · [Models](https://docs.hud.ai/v6/core/agents) · [Robots](https://docs.hud.ai/v6/advanced/robots)
215
215
 
216
216
  ## Deploy on the platform
217
217
 
218
218
  From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
219
219
 
220
- → [Deploy](https://docs.hud.ai/run/deploy) · [Leaderboards](https://hud.ai/leaderboards)
220
+ → [Run & deploy](https://docs.hud.ai/v6/core/runtime)
221
221
 
222
222
  ## Train on rewards
223
223
 
224
- Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and turn the rewards into GRPO advantages with `group_relative()`:
224
+ Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and pass the graded runs to `TrainingClient.step()`:
225
225
 
226
226
  ```python
227
+ from hud import TrainingClient
227
228
  from hud.agents import create_agent
228
- from hud.eval import Taskset, group_relative
229
+ from hud.eval import Job
229
230
 
230
- agent = create_agent("claude-sonnet-4-5")
231
- job = await Taskset(count_letter(word=w) for w in words).run(agent, group=16)
232
- for runs in job.results.values():
233
- advantages = group_relative([r.reward for r in runs], normalize_std=True)
234
- ... # feed (run.trace_id, adv) into your optimizer
231
+ agent = create_agent("arith-rl", completion_kwargs={"extra_body": {"return_token_ids": True}})
232
+ trainer = TrainingClient("arith-rl")
233
+ taskset, runtime = ... # your Taskset and where rollouts run
234
+
235
+ session = await Job.start("arith-rl", group=8)
236
+ start = len(session.runs)
237
+ await taskset.run(agent, runtime=runtime, group=8, job=session)
238
+ await trainer.step(session.runs[start:], learning_rate=1e-5, group_size=8)
235
239
  ```
236
240
 
237
241
  HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
238
242
 
239
- → [Training](https://docs.hud.ai/run/training) · [Designing tasks for signal](https://docs.hud.ai/run/signal)
243
+ → [Training](https://docs.hud.ai/v6/core/training) · [Designing tasks for signal](https://docs.hud.ai/v6/core/advice)
240
244
 
241
245
  ## Links
242
246
 
243
247
  - [Documentation](https://docs.hud.ai)
244
- - [Quickstart](https://docs.hud.ai/quickstart)
245
- - [CLI reference](https://docs.hud.ai/reference/cli)
246
- - [Leaderboards](https://hud.ai/leaderboards)
248
+ - [Quickstart](https://docs.hud.ai/v6/start/quickstart)
249
+ - [CLI reference](https://docs.hud.ai/v6/core/cli)
247
250
  - [Environment templates](https://hud.ai/environments)
248
251
  - [Supported models](https://hud.ai/models)
249
252
  - [Discord](https://discord.gg/wkjtmHYYjm)
@@ -268,8 +271,8 @@ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabil
268
271
 
269
272
  ```bibtex
270
273
  @software{hud2025agentevalplatform,
271
- author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
272
- title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
274
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep Chawla and Nguyen Nhat Minh},
275
+ title = {HUD: An Evaluation and RL Environments Platform for Agents},
273
276
  date = {2025-04},
274
277
  url = {https://github.com/hud-evals/hud-python},
275
278
  langid = {en}
@@ -5,7 +5,7 @@ hud/conftest.py,sha256=HKbHvmFXLPX6KFSJgPFUAM22auclNNdFmHGwilNzg98,1012
5
5
  hud/server.py,sha256=NtSHIjBFr9lYvryfXrCa-VhwqnwkRy7n5fp_OuNhNOI,1235
6
6
  hud/settings.py,sha256=eyvMIOOlFk6kIAP8UsHEeoqf_UiOVhb1jhRCM2qv7b8,6393
7
7
  hud/types.py,sha256=kFVbQ-CcVhYpdX5jjgacRIppFS0q_nMXahijV_Hhl58,15022
8
- hud/version.py,sha256=65WfpY5H3Rz9DNOq0DBQjuLZDIP-JPjHy6Y3-Nfc_dc,104
8
+ hud/version.py,sha256=RD_T-I7Yj0KBuadVj2UQF2XmPhTeHn3Lo45gIQTb5e4,109
9
9
  hud/agents/__init__.py,sha256=UL1PXucnY1Ln9o_Xf0Y-mvfbNh6NUdMyPJp-_d9Wq7Q,5082
10
10
  hud/agents/base.py,sha256=WgEOWUmMioXTxYe6cOvbqnbM4n989Z9kFEZIN6xJ3pU,659
11
11
  hud/agents/tool_agent.py,sha256=a0xsh2d8IwvmiPGMs9LCzghi61FHt4vMK_9sW8eNFbA,12557
@@ -54,10 +54,10 @@ hud/agents/openai/tools/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
54
54
  hud/agents/openai/tools/tests/test_computer.py,sha256=qEK7h2eD4j6Wg6VjU_YD8kCRpXOXwHDXBv1bz0mh5bo,3488
55
55
  hud/agents/openai/tools/tests/test_strict_schema.py,sha256=8dGkCSO7_-TvryEfStKZ7nKEuO3WGLfzsjPUbfdHMhQ,2344
56
56
  hud/agents/openai_compatible/__init__.py,sha256=zQZSQHB97g3rtPx4Y8aG_0K1i17MLwGRaTyQLd31Jqk,98
57
- hud/agents/openai_compatible/agent.py,sha256=YjtQkrlgekhyGRhUoxwkJZqJNaHKKCgUKxU7gnRc2hY,9880
58
- hud/agents/openai_compatible/tools/__init__.py,sha256=H5zBQbEfT2z1fMs3yRdVVYa5oZ2ejhYnxWLJTH3gx08,307
57
+ hud/agents/openai_compatible/agent.py,sha256=7Zw6wa1ce7kt_xF4R_OfuoDbMPl09TktpjgFc16-_Lo,9946
58
+ hud/agents/openai_compatible/tools/__init__.py,sha256=kOPtrgiqTcnQabZpo1aNfYfnaqip6M3z2OeffJNz-Ak,361
59
59
  hud/agents/openai_compatible/tools/base.py,sha256=Jl6Bm9ZgEOqgdOnM7Xm66VN3RpfjeZF9w55of_ZGCMI,5760
60
- hud/agents/openai_compatible/tools/filesystem.py,sha256=QXJW0-7lYZXbcftxUC7LxXChx-17clsjoRfHJg2DFBA,4905
60
+ hud/agents/openai_compatible/tools/filesystem.py,sha256=hHSVW25OT_zxdJO6fE2kOPnnABOd06kHPWC08epoCNg,12523
61
61
  hud/agents/openai_compatible/tools/mcp_proxy.py,sha256=pfJdCvFxTaXkj6qrGK04jxibjeIhm6O-5STHPcB_qL4,844
62
62
  hud/agents/robot/__init__.py,sha256=UXyQYaoLMrxFr1QYU2D6UUz6BwK9gsp4-abe5jAOqUU,1620
63
63
  hud/agents/robot/_types.py,sha256=byWZMYRwLuzvu2U-ZXMx3TcyRTPcsjGF5HkItbgfcQ4,222
@@ -75,7 +75,7 @@ hud/agents/tests/test_claude_sdk_agent.py,sha256=lSY8wnLQgfJBNzF9BU-PcO4IrKaWtva
75
75
  hud/agents/tests/test_gemini_agent.py,sha256=7OdFFVSOkJE8Gb3blptWnEXuFWHuFCNlFAoMXTyV0Ec,4835
76
76
  hud/agents/tests/test_openai_agent.py,sha256=-69hoi_Bv9JdGngEnaJ74mSH-JCupg66ny7hODXQF00,4180
77
77
  hud/agents/tests/test_openai_compatible_agent.py,sha256=6JxFxkRdPT1O574VYvcsMXiUwhcvBFJQLBx46Utt4QI,2874
78
- hud/agents/tests/test_provider_native_tools.py,sha256=WjXV2dVNBG1ite6-aigzortgQIar9GMlZrMAE1_guVs,8381
78
+ hud/agents/tests/test_provider_native_tools.py,sha256=dZ4dOT3sUkMh_7p-pGDnTIL7UDdwngNJ8jarlqU0Plk,12989
79
79
  hud/agents/tests/test_tool_agent.py,sha256=w8cuBAMcGBbIwiMnjH-tg4ztqhlewQOnXK3h1XLkj5o,5373
80
80
  hud/agents/tests/test_trace.py,sha256=rUNbV-y4gI0dH0xluT9COY_epJD69XHAzaC1HO4mX10,4517
81
81
  hud/agents/tools/__init__.py,sha256=-fnzzq8qwEXWD8s-T8RUGamuYndXTESeFNNMQxsXH5A,858
@@ -179,7 +179,7 @@ hud/eval/tests/test_docker_provider.py,sha256=1W1xyOzjHti6jfV2eiVnNd5CxKEMAKq8NB
179
179
  hud/eval/tests/test_file_tracking_observer.py,sha256=DteazLLWK0LKgtUn_6v4_wMI-1jhENMx7Y87-pdg-I8,4197
180
180
  hud/eval/tests/test_hosted.py,sha256=S0gGqAUaizlCGC30XwvaWb-TJhFgLUPlwsMO0WgjVWM,16284
181
181
  hud/eval/tests/test_job.py,sha256=UyaqbOY-0pnd2RNIp3glS_L_JJFT0-7GlSkgRhgaU1A,1867
182
- hud/eval/tests/test_rollout.py,sha256=tsMx9gFRRc0Afzs68wOf-G0_zGYkN4zYZ4KUNU0c8bk,11232
182
+ hud/eval/tests/test_rollout.py,sha256=YUVqzDbIg9Y5LNnDwaNJ40hOL1BVAFgpHRHCyGlcfQw,14027
183
183
  hud/eval/tests/test_sync.py,sha256=1gFC65ZiZojeSn9q1v-RMK2Ps130mlh-aXE7G8sn54k,5234
184
184
  hud/eval/tests/test_task.py,sha256=n0E3B3TBYV6aM2_KFVGPHuD9nBGlpwq4ZvBu9wpjqtU,9754
185
185
  hud/graders/__init__.py,sha256=eccF8MXHQBvmynULljOCEMn82YSK0HSScD1TlS8UoT4,1570
@@ -226,8 +226,8 @@ hud/utils/tests/test_platform.py,sha256=mwhyFkUBvgmHRc43vQ_JgAAW2N9fIaxkQhVo-GB4
226
226
  hud/utils/tests/test_requests.py,sha256=ENK6P5xLTuSgWDcCau4zCj_5zPV_EooGwU4P8YYl5Gw,9109
227
227
  hud/utils/tests/test_serialization.py,sha256=GY4NiFUJtwLSYQWA0n1zme-Ul4DnBLByHCOOkxn2kLM,819
228
228
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
- hud_python-0.6.7.dist-info/METADATA,sha256=o9vcEt4elYRJ6KUPO8whEG-i0dqYl8-lj_L6wr7gKaA,12344
230
- hud_python-0.6.7.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
231
- hud_python-0.6.7.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
232
- hud_python-0.6.7.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
233
- hud_python-0.6.7.dist-info/RECORD,,
229
+ hud_python-0.6.8.dev0.dist-info/METADATA,sha256=k0BA7OmInHsM-CB-pm5GNc0yYVwF7EbUr0etmiU-xXg,12427
230
+ hud_python-0.6.8.dev0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
231
+ hud_python-0.6.8.dev0.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
232
+ hud_python-0.6.8.dev0.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
233
+ hud_python-0.6.8.dev0.dist-info/RECORD,,