hud-python 0.6.6__py3-none-any.whl → 0.6.8.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/agents/openai_compatible/agent.py +7 -3
- hud/agents/openai_compatible/tools/__init__.py +4 -2
- hud/agents/openai_compatible/tools/filesystem.py +227 -33
- hud/agents/tests/test_provider_native_tools.py +135 -6
- hud/cli/deploy.py +41 -1
- hud/cli/tests/test_deploy.py +86 -0
- hud/eval/runtime.py +4 -1
- hud/eval/sync.py +2 -2
- hud/eval/tests/test_hosted.py +19 -0
- hud/eval/tests/test_rollout.py +90 -0
- hud/eval/tests/test_sync.py +12 -0
- hud/version.py +1 -1
- {hud_python-0.6.6.dist-info → hud_python-0.6.8.dev0.dist-info}/METADATA +27 -24
- {hud_python-0.6.6.dist-info → hud_python-0.6.8.dev0.dist-info}/RECORD +17 -17
- {hud_python-0.6.6.dist-info → hud_python-0.6.8.dev0.dist-info}/WHEEL +0 -0
- {hud_python-0.6.6.dist-info → hud_python-0.6.8.dev0.dist-info}/entry_points.txt +0 -0
- {hud_python-0.6.6.dist-info → hud_python-0.6.8.dev0.dist-info}/licenses/LICENSE +0 -0
|
@@ -17,11 +17,13 @@ from hud.types import MCPToolCall, MCPToolResult
|
|
|
17
17
|
from hud.utils import gateway
|
|
18
18
|
|
|
19
19
|
from .tools import (
|
|
20
|
+
BashTool,
|
|
21
|
+
EditTool,
|
|
20
22
|
GlobTool,
|
|
21
23
|
GrepTool,
|
|
22
|
-
ListTool,
|
|
23
24
|
OpenAICompatibleMCPProxyTool,
|
|
24
25
|
ReadTool,
|
|
26
|
+
WriteTool,
|
|
25
27
|
)
|
|
26
28
|
from .tools.base import format_chat_result
|
|
27
29
|
|
|
@@ -41,10 +43,12 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
|
|
|
41
43
|
"""OpenAI-compatible agent using the chat.completions protocol."""
|
|
42
44
|
|
|
43
45
|
tool_catalog = (
|
|
46
|
+
BashTool,
|
|
44
47
|
ReadTool,
|
|
45
|
-
GrepTool,
|
|
46
48
|
GlobTool,
|
|
47
|
-
|
|
49
|
+
GrepTool,
|
|
50
|
+
EditTool,
|
|
51
|
+
WriteTool,
|
|
48
52
|
OpenAICompatibleMCPProxyTool,
|
|
49
53
|
)
|
|
50
54
|
|
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from .filesystem import GlobTool, GrepTool,
|
|
5
|
+
from .filesystem import BashTool, EditTool, GlobTool, GrepTool, ReadTool, WriteTool
|
|
6
6
|
from .mcp_proxy import OpenAICompatibleMCPProxyTool
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
|
+
"BashTool",
|
|
10
|
+
"EditTool",
|
|
9
11
|
"GlobTool",
|
|
10
12
|
"GrepTool",
|
|
11
|
-
"ListTool",
|
|
12
13
|
"OpenAICompatibleMCPProxyTool",
|
|
13
14
|
"ReadTool",
|
|
15
|
+
"WriteTool",
|
|
14
16
|
]
|
|
@@ -1,16 +1,20 @@
|
|
|
1
|
-
"""OpenAI-compatible
|
|
1
|
+
"""OpenAI-compatible OpenCode-style workspace tools backed by SSHClient."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import math
|
|
6
|
+
import posixpath
|
|
5
7
|
import shlex
|
|
6
8
|
from typing import Any, ClassVar
|
|
7
9
|
|
|
8
10
|
import mcp.types as mcp_types
|
|
9
11
|
|
|
10
12
|
from hud.agents.tools import SSHTool
|
|
11
|
-
from hud.agents.tools.base import AgentToolSpec, result_text
|
|
13
|
+
from hud.agents.tools.base import AgentToolSpec, result_text, tool_err
|
|
12
14
|
from hud.types import MCPToolResult
|
|
13
15
|
|
|
16
|
+
DEFAULT_READ_LIMIT = 2000
|
|
17
|
+
|
|
14
18
|
|
|
15
19
|
class _FilesystemTool(SSHTool):
|
|
16
20
|
description: ClassVar[str]
|
|
@@ -34,16 +38,26 @@ class _FilesystemTool(SSHTool):
|
|
|
34
38
|
|
|
35
39
|
class ReadTool(_FilesystemTool):
|
|
36
40
|
name = "read"
|
|
37
|
-
description =
|
|
41
|
+
description = (
|
|
42
|
+
"Reads a file or directory from the workspace. Use offset and limit for pagination."
|
|
43
|
+
)
|
|
38
44
|
parameters: ClassVar[dict[str, Any]] = {
|
|
39
45
|
"type": "object",
|
|
40
46
|
"properties": {
|
|
41
|
-
"filePath": {
|
|
47
|
+
"filePath": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"description": "The absolute path to the file or directory to read.",
|
|
50
|
+
},
|
|
42
51
|
"offset": {
|
|
43
52
|
"type": "integer",
|
|
44
|
-
"description": "
|
|
53
|
+
"description": "The line number to start reading from (1-indexed).",
|
|
54
|
+
"minimum": 0,
|
|
55
|
+
},
|
|
56
|
+
"limit": {
|
|
57
|
+
"type": "integer",
|
|
58
|
+
"description": "The maximum number of lines to read (defaults to 2000).",
|
|
59
|
+
"minimum": 1,
|
|
45
60
|
},
|
|
46
|
-
"limit": {"type": "integer", "description": "Maximum number of lines to read."},
|
|
47
61
|
},
|
|
48
62
|
"required": ["filePath"],
|
|
49
63
|
}
|
|
@@ -52,19 +66,205 @@ class ReadTool(_FilesystemTool):
|
|
|
52
66
|
path = arguments.get("filePath")
|
|
53
67
|
if not isinstance(path, str) or not path:
|
|
54
68
|
raise ValueError("filePath is required")
|
|
69
|
+
offset = _read_offset(arguments.get("offset"))
|
|
70
|
+
limit = _positive_int(arguments.get("limit"), default=DEFAULT_READ_LIMIT, name="limit")
|
|
71
|
+
if not (await self.bash(f"test -d {shlex.quote(path)}")).isError:
|
|
72
|
+
return await self._read_directory(path, offset=offset, limit=limit)
|
|
55
73
|
result = await self.file_read(path)
|
|
56
74
|
if result.isError:
|
|
57
75
|
return result
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
76
|
+
text = result_text(result)
|
|
77
|
+
lines = text.splitlines()
|
|
78
|
+
start = offset - 1
|
|
79
|
+
if start > len(lines) and not (len(lines) == 0 and offset == 1):
|
|
80
|
+
return tool_err(f"Offset {offset} is out of range for this file ({len(lines)} lines)")
|
|
81
|
+
sliced = lines[start : start + limit]
|
|
82
|
+
last = offset + len(sliced) - 1
|
|
83
|
+
more = last < len(lines)
|
|
84
|
+
body = [
|
|
85
|
+
f"<path>{path}</path>",
|
|
86
|
+
"<type>file</type>",
|
|
87
|
+
"<content>",
|
|
88
|
+
*[f"{i + offset}: {line}" for i, line in enumerate(sliced)],
|
|
89
|
+
]
|
|
90
|
+
if more:
|
|
91
|
+
body.append(
|
|
92
|
+
f"\n(Showing lines {offset}-{last} of {len(lines)}. "
|
|
93
|
+
f"Use offset={last + 1} to continue.)"
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
body.append(f"\n(End of file - total {len(lines)} lines)")
|
|
97
|
+
body.append("</content>")
|
|
98
|
+
return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
|
|
99
|
+
|
|
100
|
+
async def _read_directory(self, path: str, *, offset: int, limit: int) -> MCPToolResult:
|
|
101
|
+
result = await self.file_list(path)
|
|
102
|
+
if result.isError:
|
|
103
|
+
return result
|
|
104
|
+
entries = result_text(result).splitlines()
|
|
105
|
+
if entries == ["(empty)"]:
|
|
106
|
+
entries = []
|
|
107
|
+
start = offset - 1
|
|
108
|
+
sliced = entries[start : start + limit]
|
|
109
|
+
truncated = start + len(sliced) < len(entries)
|
|
110
|
+
body = [
|
|
111
|
+
f"<path>{path}</path>",
|
|
112
|
+
"<type>directory</type>",
|
|
113
|
+
"<entries>",
|
|
114
|
+
*sliced,
|
|
115
|
+
]
|
|
116
|
+
if truncated:
|
|
117
|
+
body.append(
|
|
118
|
+
f"\n(Showing {len(sliced)} of {len(entries)} entries. "
|
|
119
|
+
f"Use offset={offset + len(sliced)} to continue.)"
|
|
66
120
|
)
|
|
67
|
-
|
|
121
|
+
else:
|
|
122
|
+
body.append(f"\n({len(entries)} entries)")
|
|
123
|
+
body.append("</entries>")
|
|
124
|
+
return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class BashTool(_FilesystemTool):
|
|
128
|
+
name = "bash"
|
|
129
|
+
description = (
|
|
130
|
+
"Executes a shell command in the workspace. Prefer read, grep, glob, edit, "
|
|
131
|
+
"and write for filesystem operations."
|
|
132
|
+
)
|
|
133
|
+
parameters: ClassVar[dict[str, Any]] = {
|
|
134
|
+
"type": "object",
|
|
135
|
+
"properties": {
|
|
136
|
+
"command": {"type": "string", "description": "The command to execute."},
|
|
137
|
+
"timeout": {
|
|
138
|
+
"type": "integer",
|
|
139
|
+
"description": "Optional timeout in milliseconds.",
|
|
140
|
+
"minimum": 1,
|
|
141
|
+
},
|
|
142
|
+
"workdir": {
|
|
143
|
+
"type": "string",
|
|
144
|
+
"description": "The working directory to run the command in.",
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
"required": ["command"],
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
|
|
151
|
+
command = arguments.get("command")
|
|
152
|
+
if not isinstance(command, str) or not command:
|
|
153
|
+
raise ValueError("command is required")
|
|
154
|
+
timeout = arguments.get("timeout")
|
|
155
|
+
if timeout is not None:
|
|
156
|
+
if not isinstance(timeout, int) or timeout < 1:
|
|
157
|
+
raise ValueError("timeout must be a positive integer")
|
|
158
|
+
seconds = max(1, math.ceil(timeout / 1000))
|
|
159
|
+
command = f"timeout {seconds}s bash -lc {shlex.quote(command)}"
|
|
160
|
+
workdir = arguments.get("workdir")
|
|
161
|
+
if isinstance(workdir, str) and workdir:
|
|
162
|
+
command = f"cd {shlex.quote(workdir)} && {command}"
|
|
163
|
+
return await self.bash(command)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class EditTool(_FilesystemTool):
|
|
167
|
+
name = "edit"
|
|
168
|
+
description = (
|
|
169
|
+
"Replaces text within a file. Use oldString as exact literal context. "
|
|
170
|
+
"Set replaceAll to true to replace every occurrence."
|
|
171
|
+
)
|
|
172
|
+
parameters: ClassVar[dict[str, Any]] = {
|
|
173
|
+
"type": "object",
|
|
174
|
+
"properties": {
|
|
175
|
+
"filePath": {
|
|
176
|
+
"type": "string",
|
|
177
|
+
"description": "The absolute path to the file to modify.",
|
|
178
|
+
},
|
|
179
|
+
"oldString": {"type": "string", "description": "The text to replace."},
|
|
180
|
+
"newString": {
|
|
181
|
+
"type": "string",
|
|
182
|
+
"description": "The text to replace it with (must be different from oldString).",
|
|
183
|
+
},
|
|
184
|
+
"replaceAll": {
|
|
185
|
+
"type": "boolean",
|
|
186
|
+
"description": "Replace all occurrences of oldString (default false).",
|
|
187
|
+
},
|
|
188
|
+
},
|
|
189
|
+
"required": ["filePath", "oldString", "newString"],
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
|
|
193
|
+
path = arguments.get("filePath")
|
|
194
|
+
if not isinstance(path, str) or not path:
|
|
195
|
+
raise ValueError("filePath is required")
|
|
196
|
+
old = arguments.get("oldString")
|
|
197
|
+
new = arguments.get("newString")
|
|
198
|
+
if not isinstance(old, str):
|
|
199
|
+
raise ValueError("oldString is required")
|
|
200
|
+
if not isinstance(new, str):
|
|
201
|
+
raise ValueError("newString is required")
|
|
202
|
+
if old == new:
|
|
203
|
+
return tool_err("No changes to apply: oldString and newString are identical.")
|
|
204
|
+
if old == "":
|
|
205
|
+
exists = not (await self.bash(f"test -e {shlex.quote(path)}")).isError
|
|
206
|
+
if exists:
|
|
207
|
+
return tool_err(
|
|
208
|
+
"oldString cannot be empty when editing an existing file. "
|
|
209
|
+
"Provide exact text to replace, or use write for full-file replacement."
|
|
210
|
+
)
|
|
211
|
+
mkdir = await self._ensure_parent(path)
|
|
212
|
+
if mkdir.isError:
|
|
213
|
+
return mkdir
|
|
214
|
+
return await self.file_write(path, new)
|
|
215
|
+
|
|
216
|
+
existing = await self.file_read(path)
|
|
217
|
+
if existing.isError:
|
|
218
|
+
return existing
|
|
219
|
+
text = result_text(existing)
|
|
220
|
+
count = text.count(old)
|
|
221
|
+
if count == 0:
|
|
222
|
+
return tool_err(f"oldString not found in {path}")
|
|
223
|
+
replace_all = arguments.get("replaceAll") is True
|
|
224
|
+
if count > 1 and not replace_all:
|
|
225
|
+
return tool_err(f"oldString matches {count} times in {path}; set replaceAll to true")
|
|
226
|
+
next_text = text.replace(old, new) if replace_all else text.replace(old, new, 1)
|
|
227
|
+
return await self.file_write(path, next_text)
|
|
228
|
+
|
|
229
|
+
async def _ensure_parent(self, path: str) -> MCPToolResult:
|
|
230
|
+
parent = posixpath.dirname(path)
|
|
231
|
+
if not parent or parent in {".", "/"}:
|
|
232
|
+
return MCPToolResult(content=[])
|
|
233
|
+
return await self.bash(f"mkdir -p {shlex.quote(parent)}")
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class WriteTool(_FilesystemTool):
|
|
237
|
+
name = "write"
|
|
238
|
+
description = "Creates or overwrites a file with the provided content."
|
|
239
|
+
parameters: ClassVar[dict[str, Any]] = {
|
|
240
|
+
"type": "object",
|
|
241
|
+
"properties": {
|
|
242
|
+
"content": {"type": "string", "description": "The content to write to the file."},
|
|
243
|
+
"filePath": {
|
|
244
|
+
"type": "string",
|
|
245
|
+
"description": "The absolute path to the file to write.",
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
"required": ["content", "filePath"],
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
|
|
252
|
+
path = arguments.get("filePath")
|
|
253
|
+
if not isinstance(path, str) or not path:
|
|
254
|
+
raise ValueError("filePath is required")
|
|
255
|
+
content = arguments.get("content")
|
|
256
|
+
if not isinstance(content, str):
|
|
257
|
+
raise ValueError("content is required")
|
|
258
|
+
mkdir = await self._ensure_parent(path)
|
|
259
|
+
if mkdir.isError:
|
|
260
|
+
return mkdir
|
|
261
|
+
return await self.file_write(path, content)
|
|
262
|
+
|
|
263
|
+
async def _ensure_parent(self, path: str) -> MCPToolResult:
|
|
264
|
+
parent = posixpath.dirname(path)
|
|
265
|
+
if not parent or parent in {".", "/"}:
|
|
266
|
+
return MCPToolResult(content=[])
|
|
267
|
+
return await self.bash(f"mkdir -p {shlex.quote(parent)}")
|
|
68
268
|
|
|
69
269
|
|
|
70
270
|
class GrepTool(_FilesystemTool):
|
|
@@ -115,24 +315,18 @@ class GlobTool(_FilesystemTool):
|
|
|
115
315
|
return await self.bash(f"find {shlex.quote(str(path))} -name {shlex.quote(pattern)}")
|
|
116
316
|
|
|
117
317
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
"
|
|
123
|
-
|
|
124
|
-
"path": {"type": "string", "description": "Directory to list."},
|
|
125
|
-
"ignore": {
|
|
126
|
-
"type": "array",
|
|
127
|
-
"items": {"type": "string"},
|
|
128
|
-
"description": "Glob patterns to ignore.",
|
|
129
|
-
},
|
|
130
|
-
},
|
|
131
|
-
}
|
|
318
|
+
def _positive_int(value: Any, *, default: int, name: str) -> int:
|
|
319
|
+
if value is None:
|
|
320
|
+
return default
|
|
321
|
+
if not isinstance(value, int) or value < 1:
|
|
322
|
+
raise ValueError(f"{name} must be a positive integer")
|
|
323
|
+
return value
|
|
132
324
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
325
|
+
|
|
326
|
+
def _read_offset(value: Any) -> int:
|
|
327
|
+
if value is None or value == 0:
|
|
328
|
+
return 1
|
|
329
|
+
return _positive_int(value, default=1, name="offset")
|
|
136
330
|
|
|
137
331
|
|
|
138
|
-
__all__ = ["GlobTool", "GrepTool", "
|
|
332
|
+
__all__ = ["BashTool", "EditTool", "GlobTool", "GrepTool", "ReadTool", "WriteTool"]
|
|
@@ -7,16 +7,19 @@ client and assert the command translation + result shape, fully offline.
|
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
import shlex
|
|
11
|
+
from typing import Any, cast
|
|
11
12
|
|
|
12
13
|
import pytest
|
|
13
14
|
|
|
14
15
|
from hud.agents.claude.tools.coding import ClaudeBashTool, ClaudeTextEditorTool
|
|
15
16
|
from hud.agents.gemini.tools.coding import GeminiEditTool, GeminiShellTool
|
|
16
17
|
from hud.agents.openai.tools.coding import OpenAIShellTool
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
from hud.agents.openai_compatible.agent import OpenAIChatAgent
|
|
19
|
+
from hud.agents.openai_compatible.tools import BashTool, EditTool, ReadTool, WriteTool
|
|
20
|
+
from hud.agents.tools.base import result_text
|
|
21
|
+
from hud.agents.types import OpenAIChatConfig
|
|
22
|
+
from hud.capabilities import Capability, SSHClient
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class _Completed:
|
|
@@ -61,6 +64,21 @@ class _FakeSFTP:
|
|
|
61
64
|
def open(self, path: str, mode: str) -> _FakeOpenFile:
|
|
62
65
|
return _FakeOpenFile(self._store, path, mode)
|
|
63
66
|
|
|
67
|
+
async def listdir(self, path: str) -> list[str]:
|
|
68
|
+
prefix = path.rstrip("/")
|
|
69
|
+
if not prefix:
|
|
70
|
+
prefix = "/"
|
|
71
|
+
if prefix != "/":
|
|
72
|
+
prefix += "/"
|
|
73
|
+
names: set[str] = set()
|
|
74
|
+
for file_path in self._store:
|
|
75
|
+
if not file_path.startswith(prefix):
|
|
76
|
+
continue
|
|
77
|
+
rest = file_path[len(prefix) :]
|
|
78
|
+
if rest:
|
|
79
|
+
names.add(rest.split("/", 1)[0])
|
|
80
|
+
return sorted(names)
|
|
81
|
+
|
|
64
82
|
|
|
65
83
|
class _Conn:
|
|
66
84
|
def __init__(self, completed: _Completed, store: dict[str, bytes]) -> None:
|
|
@@ -70,13 +88,26 @@ class _Conn:
|
|
|
70
88
|
|
|
71
89
|
async def run(self, command: str, check: bool = False) -> _Completed:
|
|
72
90
|
self.commands.append(command)
|
|
91
|
+
parts = shlex.split(command)
|
|
92
|
+
if len(parts) == 3 and parts[:2] in (["test", "-d"], ["test", "-e"]):
|
|
93
|
+
path = parts[2]
|
|
94
|
+
exists = path in self._store or any(
|
|
95
|
+
file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
|
|
96
|
+
)
|
|
97
|
+
if parts[1] == "-d":
|
|
98
|
+
exists = any(
|
|
99
|
+
file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
|
|
100
|
+
)
|
|
101
|
+
return _Completed(exit_status=0 if exists else 1)
|
|
102
|
+
if len(parts) >= 3 and parts[:2] == ["mkdir", "-p"]:
|
|
103
|
+
return _Completed(exit_status=0)
|
|
73
104
|
return self._completed
|
|
74
105
|
|
|
75
106
|
def start_sftp_client(self) -> _FakeSFTP:
|
|
76
107
|
return _FakeSFTP(self._store)
|
|
77
108
|
|
|
78
109
|
|
|
79
|
-
class _FakeSSH:
|
|
110
|
+
class _FakeSSH(SSHClient):
|
|
80
111
|
"""Duck-typed ``SSHClient``: ``conn.run`` (bash) + ``conn.start_sftp_client`` (files)."""
|
|
81
112
|
|
|
82
113
|
def __init__(
|
|
@@ -87,7 +118,10 @@ class _FakeSSH:
|
|
|
87
118
|
files: dict[str, bytes] | None = None,
|
|
88
119
|
) -> None:
|
|
89
120
|
self.files: dict[str, bytes] = files or {}
|
|
90
|
-
|
|
121
|
+
super().__init__(
|
|
122
|
+
Capability(name="shell", protocol="ssh/2", url="ssh://localhost:22"),
|
|
123
|
+
cast("Any", _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)),
|
|
124
|
+
)
|
|
91
125
|
|
|
92
126
|
|
|
93
127
|
def _ssh(**kwargs: Any) -> SSHClient:
|
|
@@ -98,6 +132,11 @@ def _commands(tool: Any) -> list[str]:
|
|
|
98
132
|
return tool.client.conn.commands
|
|
99
133
|
|
|
100
134
|
|
|
135
|
+
class _OpenAIChatAgentForTest(OpenAIChatAgent):
|
|
136
|
+
async def build_tools_for_test(self, ssh: SSHClient) -> tuple[dict[str, Any], list[Any]]:
|
|
137
|
+
return await self._build_tools({"ssh": ssh})
|
|
138
|
+
|
|
139
|
+
|
|
101
140
|
# ─── OpenAI shell ─────────────────────────────────────────────────────
|
|
102
141
|
|
|
103
142
|
|
|
@@ -135,6 +174,96 @@ def test_openai_shell_to_params_is_shell_type() -> None:
|
|
|
135
174
|
assert tool.to_params()["type"] == "shell"
|
|
136
175
|
|
|
137
176
|
|
|
177
|
+
# ─── OpenAI-compatible OpenCode workspace tools ───────────────────────
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
async def test_openai_compatible_catalog_matches_opencode_workspace_tools() -> None:
|
|
181
|
+
agent = _OpenAIChatAgentForTest(
|
|
182
|
+
OpenAIChatConfig(model="qwen3.6-plus", model_client=cast("Any", object()))
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
tools, params = await agent.build_tools_for_test(_ssh())
|
|
186
|
+
|
|
187
|
+
assert list(tools) == ["bash", "read", "glob", "grep", "edit", "write"]
|
|
188
|
+
assert [param["function"]["name"] for param in params] == [
|
|
189
|
+
"bash",
|
|
190
|
+
"read",
|
|
191
|
+
"glob",
|
|
192
|
+
"grep",
|
|
193
|
+
"edit",
|
|
194
|
+
"write",
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
async def test_openai_compatible_bash_uses_workdir_and_timeout() -> None:
|
|
199
|
+
tool = BashTool(spec=BashTool.default_spec("qwen"), client=_ssh())
|
|
200
|
+
|
|
201
|
+
await tool.execute({"command": "echo hi", "workdir": "/tmp/my dir", "timeout": 2500})
|
|
202
|
+
|
|
203
|
+
assert _commands(tool) == ["cd '/tmp/my dir' && timeout 3s bash -lc 'echo hi'"]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
async def test_openai_compatible_write_stores_file_via_workspace_sftp() -> None:
|
|
207
|
+
ssh = _FakeSSH()
|
|
208
|
+
tool = WriteTool(spec=WriteTool.default_spec("qwen"), client=cast("SSHClient", ssh))
|
|
209
|
+
|
|
210
|
+
result = await tool.execute({"filePath": "/REPORT.md", "content": "done"})
|
|
211
|
+
|
|
212
|
+
assert result.isError is False
|
|
213
|
+
assert ssh.files["/REPORT.md"] == b"done"
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def test_openai_compatible_edit_rewrites_unique_match() -> None:
|
|
217
|
+
ssh = _FakeSSH(files={"/f.txt": b"hello old world"})
|
|
218
|
+
tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
|
|
219
|
+
|
|
220
|
+
result = await tool.execute(
|
|
221
|
+
{"filePath": "/f.txt", "oldString": "old", "newString": "new"},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
assert result.isError is False
|
|
225
|
+
assert ssh.files["/f.txt"] == b"hello new world"
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
async def test_openai_compatible_edit_rejects_ambiguous_match() -> None:
|
|
229
|
+
ssh = _FakeSSH(files={"/f.txt": b"a a a"})
|
|
230
|
+
tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
|
|
231
|
+
|
|
232
|
+
result = await tool.execute(
|
|
233
|
+
{"filePath": "/f.txt", "oldString": "a", "newString": "b"},
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
assert result.isError is True
|
|
237
|
+
assert ssh.files["/f.txt"] == b"a a a"
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
async def test_openai_compatible_read_lists_directories() -> None:
|
|
241
|
+
tool = ReadTool(
|
|
242
|
+
spec=ReadTool.default_spec("qwen"),
|
|
243
|
+
client=_ssh(files={"/work/a.txt": b"a", "/work/nested/b.txt": b"b"}),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
result = await tool.execute({"filePath": "/work"})
|
|
247
|
+
|
|
248
|
+
text = result_text(result)
|
|
249
|
+
assert "<type>directory</type>" in text
|
|
250
|
+
assert "a.txt" in text
|
|
251
|
+
assert "nested" in text
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
async def test_openai_compatible_read_accepts_zero_offset_for_first_page() -> None:
|
|
255
|
+
tool = ReadTool(
|
|
256
|
+
spec=ReadTool.default_spec("qwen"),
|
|
257
|
+
client=_ssh(files={"/f.txt": b"alpha\nbeta\n"}),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
result = await tool.execute({"filePath": "/f.txt", "offset": 0, "limit": 1})
|
|
261
|
+
|
|
262
|
+
text = result_text(result)
|
|
263
|
+
assert "1: alpha" in text
|
|
264
|
+
assert "2: beta" not in text
|
|
265
|
+
|
|
266
|
+
|
|
138
267
|
# ─── Gemini shell ─────────────────────────────────────────────────────
|
|
139
268
|
|
|
140
269
|
|
hud/cli/deploy.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
import json
|
|
6
7
|
import logging
|
|
7
8
|
import os
|
|
8
9
|
import time
|
|
@@ -12,6 +13,7 @@ from typing import Any
|
|
|
12
13
|
|
|
13
14
|
import httpx
|
|
14
15
|
import typer
|
|
16
|
+
from pydantic import ValidationError
|
|
15
17
|
|
|
16
18
|
from hud.cli.utils.build_display import display_build_summary
|
|
17
19
|
from hud.cli.utils.build_logs import poll_build_status, stream_build_logs
|
|
@@ -19,6 +21,7 @@ from hud.cli.utils.config import parse_env_file, parse_key_value
|
|
|
19
21
|
from hud.cli.utils.context import create_build_context_tarball, format_size
|
|
20
22
|
from hud.cli.utils.registry import get_registry_environment
|
|
21
23
|
from hud.cli.utils.source import EnvironmentSource, normalize_environment_name
|
|
24
|
+
from hud.eval.runtime import RuntimeConfig
|
|
22
25
|
from hud.utils.exceptions import HudRequestError
|
|
23
26
|
from hud.utils.hud_console import HUDConsole
|
|
24
27
|
from hud.utils.platform import PlatformClient
|
|
@@ -32,6 +35,7 @@ class _DeployPlan:
|
|
|
32
35
|
name: str
|
|
33
36
|
registry_id: str | None
|
|
34
37
|
runtime: str | None
|
|
38
|
+
runtime_config: dict[str, Any] | None
|
|
35
39
|
env_vars: dict[str, str]
|
|
36
40
|
build_args: dict[str, str]
|
|
37
41
|
build_secrets: dict[str, str]
|
|
@@ -75,6 +79,26 @@ def _normalize_runtime(runtime: str | None, console: HUDConsole) -> str | None:
|
|
|
75
79
|
raise typer.Exit(1)
|
|
76
80
|
|
|
77
81
|
|
|
82
|
+
def _load_runtime_config(path: str | None, console: HUDConsole) -> dict[str, Any] | None:
|
|
83
|
+
if path is None:
|
|
84
|
+
return None
|
|
85
|
+
config_path = Path(path).expanduser()
|
|
86
|
+
try:
|
|
87
|
+
raw = json.loads(config_path.read_text(encoding="utf-8"))
|
|
88
|
+
config = RuntimeConfig.model_validate(raw)
|
|
89
|
+
except FileNotFoundError:
|
|
90
|
+
console.error(f"Runtime config file not found: {config_path}")
|
|
91
|
+
raise typer.Exit(1) from None
|
|
92
|
+
except json.JSONDecodeError as exc:
|
|
93
|
+
console.error(f"Invalid runtime config JSON in {config_path}: {exc.msg}")
|
|
94
|
+
raise typer.Exit(1) from exc
|
|
95
|
+
except ValidationError as exc:
|
|
96
|
+
console.error(f"Invalid runtime config in {config_path}: {exc}")
|
|
97
|
+
raise typer.Exit(1) from exc
|
|
98
|
+
payload = config.request_payload()
|
|
99
|
+
return payload or None
|
|
100
|
+
|
|
101
|
+
|
|
78
102
|
def _load_env_vars(path: Path, console: HUDConsole, *, warn_missing: bool) -> dict[str, str]:
|
|
79
103
|
if not path.exists():
|
|
80
104
|
if warn_missing:
|
|
@@ -322,6 +346,7 @@ def _prepare_deploy_plan(
|
|
|
322
346
|
build_args: list[str] | None,
|
|
323
347
|
build_secrets: list[str] | None,
|
|
324
348
|
runtime: str | None,
|
|
349
|
+
runtime_config: str | None,
|
|
325
350
|
verbose: bool,
|
|
326
351
|
platform: PlatformClient,
|
|
327
352
|
console: HUDConsole,
|
|
@@ -357,11 +382,13 @@ def _prepare_deploy_plan(
|
|
|
357
382
|
build_args_dict = _parse_key_value_flags(build_args, option="--build-arg", console=console)
|
|
358
383
|
if build_args_dict and verbose:
|
|
359
384
|
console.info(f"Build arguments: {', '.join(build_args_dict.keys())}")
|
|
385
|
+
normalized_runtime = _normalize_runtime(runtime, console)
|
|
360
386
|
|
|
361
387
|
return _DeployPlan(
|
|
362
388
|
name=resolved_name,
|
|
363
389
|
registry_id=registry_id,
|
|
364
|
-
runtime=
|
|
390
|
+
runtime=normalized_runtime,
|
|
391
|
+
runtime_config=_load_runtime_config(runtime_config, console),
|
|
365
392
|
env_vars=env_vars,
|
|
366
393
|
build_args=build_args_dict,
|
|
367
394
|
build_secrets=_collect_build_secrets(build_secrets, env_dir=env_dir, console=console),
|
|
@@ -379,6 +406,7 @@ def deploy_environment(
|
|
|
379
406
|
build_args: list[str] | None = None,
|
|
380
407
|
build_secrets: list[str] | None = None,
|
|
381
408
|
runtime: str | None = None,
|
|
409
|
+
runtime_config: str | None = None,
|
|
382
410
|
) -> None:
|
|
383
411
|
"""Deploy one HUD environment to the platform."""
|
|
384
412
|
hud_console = HUDConsole()
|
|
@@ -411,6 +439,7 @@ def deploy_environment(
|
|
|
411
439
|
build_args=build_args,
|
|
412
440
|
build_secrets=build_secrets,
|
|
413
441
|
runtime=runtime,
|
|
442
|
+
runtime_config=runtime_config,
|
|
414
443
|
verbose=verbose,
|
|
415
444
|
platform=platform,
|
|
416
445
|
console=hud_console,
|
|
@@ -485,6 +514,8 @@ async def _trigger_build(
|
|
|
485
514
|
payload["registry_id"] = plan.registry_id
|
|
486
515
|
if plan.runtime:
|
|
487
516
|
payload["runtime_provider"] = plan.runtime
|
|
517
|
+
if plan.runtime_config:
|
|
518
|
+
payload["runtime_config"] = plan.runtime_config
|
|
488
519
|
if plan.env_vars:
|
|
489
520
|
payload["environment_variables"] = plan.env_vars
|
|
490
521
|
if plan.build_args:
|
|
@@ -644,6 +675,7 @@ def deploy_all(
|
|
|
644
675
|
build_args: list[str] | None = None,
|
|
645
676
|
build_secrets: list[str] | None = None,
|
|
646
677
|
runtime: str | None = None,
|
|
678
|
+
runtime_config: str | None = None,
|
|
647
679
|
) -> None:
|
|
648
680
|
"""Deploy each HUD environment under a parent directory."""
|
|
649
681
|
hud_console = HUDConsole()
|
|
@@ -683,6 +715,7 @@ def deploy_all(
|
|
|
683
715
|
build_args=build_args,
|
|
684
716
|
build_secrets=build_secrets,
|
|
685
717
|
runtime=runtime,
|
|
718
|
+
runtime_config=runtime_config,
|
|
686
719
|
)
|
|
687
720
|
succeeded.append(env_dir.name)
|
|
688
721
|
except (typer.Exit, SystemExit):
|
|
@@ -762,6 +795,11 @@ def deploy_command(
|
|
|
762
795
|
"--runtime",
|
|
763
796
|
help="Persist Modal as the hosted runtime for this registry",
|
|
764
797
|
),
|
|
798
|
+
runtime_config: str | None = typer.Option(
|
|
799
|
+
None,
|
|
800
|
+
"--runtime-config",
|
|
801
|
+
help="Path to a JSON RuntimeConfig for hosted runs",
|
|
802
|
+
),
|
|
765
803
|
) -> None:
|
|
766
804
|
"""Deploy HUD environment to the platform.
|
|
767
805
|
|
|
@@ -781,6 +819,7 @@ def deploy_command(
|
|
|
781
819
|
build_args=build_args,
|
|
782
820
|
build_secrets=secrets,
|
|
783
821
|
runtime=runtime,
|
|
822
|
+
runtime_config=runtime_config,
|
|
784
823
|
)
|
|
785
824
|
return
|
|
786
825
|
|
|
@@ -795,4 +834,5 @@ def deploy_command(
|
|
|
795
834
|
build_args=build_args,
|
|
796
835
|
build_secrets=secrets,
|
|
797
836
|
runtime=runtime,
|
|
837
|
+
runtime_config=runtime_config,
|
|
798
838
|
)
|
hud/cli/tests/test_deploy.py
CHANGED
|
@@ -179,6 +179,47 @@ class TestCollectEnvironmentVariables:
|
|
|
179
179
|
assert "INVALID_FORMAT" not in result
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
class TestRuntimeConfigFile:
|
|
183
|
+
def test_load_runtime_config_uses_sdk_shape(self, tmp_path: Path) -> None:
|
|
184
|
+
from hud.cli.deploy import _load_runtime_config
|
|
185
|
+
from hud.utils.hud_console import HUDConsole
|
|
186
|
+
|
|
187
|
+
config_path = tmp_path / "runtime.json"
|
|
188
|
+
config_path.write_text(
|
|
189
|
+
json.dumps(
|
|
190
|
+
{
|
|
191
|
+
"resources": {"gpu": {"type": "A10G", "count": 2}},
|
|
192
|
+
"limits": {"startup_timeout_s": 300},
|
|
193
|
+
}
|
|
194
|
+
),
|
|
195
|
+
encoding="utf-8",
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
assert _load_runtime_config(str(config_path), HUDConsole()) == {
|
|
199
|
+
"resources": {"gpu": {"type": "A10G", "count": 2}},
|
|
200
|
+
"limits": {"startup_timeout_s": 300},
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
def test_load_runtime_config_preserves_null_override(self, tmp_path: Path) -> None:
|
|
204
|
+
from hud.cli.deploy import _load_runtime_config
|
|
205
|
+
from hud.utils.hud_console import HUDConsole
|
|
206
|
+
|
|
207
|
+
config_path = tmp_path / "runtime.json"
|
|
208
|
+
config_path.write_text(json.dumps({"resources": None}), encoding="utf-8")
|
|
209
|
+
|
|
210
|
+
assert _load_runtime_config(str(config_path), HUDConsole()) == {"resources": None}
|
|
211
|
+
|
|
212
|
+
def test_load_runtime_config_rejects_unknown_fields(self, tmp_path: Path) -> None:
|
|
213
|
+
from hud.cli.deploy import _load_runtime_config
|
|
214
|
+
from hud.utils.hud_console import HUDConsole
|
|
215
|
+
|
|
216
|
+
config_path = tmp_path / "runtime.json"
|
|
217
|
+
config_path.write_text(json.dumps({"provider_config": {}}), encoding="utf-8")
|
|
218
|
+
|
|
219
|
+
with pytest.raises(typer.Exit):
|
|
220
|
+
_load_runtime_config(str(config_path), HUDConsole())
|
|
221
|
+
|
|
222
|
+
|
|
182
223
|
class TestDeployEnvironment:
|
|
183
224
|
"""Tests for deploy_environment function."""
|
|
184
225
|
|
|
@@ -262,6 +303,7 @@ class TestDeployAsync:
|
|
|
262
303
|
name="test-env",
|
|
263
304
|
registry_id=None,
|
|
264
305
|
runtime=None,
|
|
306
|
+
runtime_config=None,
|
|
265
307
|
env_vars={},
|
|
266
308
|
build_args={},
|
|
267
309
|
build_secrets={},
|
|
@@ -292,6 +334,7 @@ class TestDeployAsync:
|
|
|
292
334
|
name="test-env",
|
|
293
335
|
registry_id=None,
|
|
294
336
|
runtime=None,
|
|
337
|
+
runtime_config=None,
|
|
295
338
|
env_vars={},
|
|
296
339
|
build_args={},
|
|
297
340
|
build_secrets={},
|
|
@@ -331,6 +374,7 @@ class TestDeployAsync:
|
|
|
331
374
|
name="test-env",
|
|
332
375
|
registry_id=None,
|
|
333
376
|
runtime="modal",
|
|
377
|
+
runtime_config=None,
|
|
334
378
|
env_vars={},
|
|
335
379
|
build_args={},
|
|
336
380
|
build_secrets={},
|
|
@@ -343,6 +387,48 @@ class TestDeployAsync:
|
|
|
343
387
|
assert platform.payload is not None
|
|
344
388
|
assert platform.payload["runtime_provider"] == "modal"
|
|
345
389
|
|
|
390
|
+
@pytest.mark.asyncio
|
|
391
|
+
async def test_trigger_build_sends_runtime_config(self) -> None:
|
|
392
|
+
from hud.cli.deploy import _DeployPlan, _trigger_build
|
|
393
|
+
from hud.utils.hud_console import HUDConsole
|
|
394
|
+
from hud.utils.platform import PlatformClient
|
|
395
|
+
|
|
396
|
+
class FakePlatform(PlatformClient):
|
|
397
|
+
payload: dict[str, object] | None = None
|
|
398
|
+
|
|
399
|
+
async def apost(
|
|
400
|
+
self,
|
|
401
|
+
path: str,
|
|
402
|
+
*,
|
|
403
|
+
json: object | None = None,
|
|
404
|
+
) -> dict[str, object]:
|
|
405
|
+
assert path == "/builds/trigger"
|
|
406
|
+
assert isinstance(json, dict)
|
|
407
|
+
object.__setattr__(self, "payload", json)
|
|
408
|
+
return {"id": "build-1", "registry_id": "registry-1"}
|
|
409
|
+
|
|
410
|
+
runtime_config = {"resources": {"gpu": {"type": "A10G", "count": 1}}}
|
|
411
|
+
platform = FakePlatform("https://api.example", "key")
|
|
412
|
+
result = await _trigger_build(
|
|
413
|
+
platform,
|
|
414
|
+
build_id="build-1",
|
|
415
|
+
plan=_DeployPlan(
|
|
416
|
+
name="test-env",
|
|
417
|
+
registry_id=None,
|
|
418
|
+
runtime="modal",
|
|
419
|
+
runtime_config=runtime_config,
|
|
420
|
+
env_vars={},
|
|
421
|
+
build_args={},
|
|
422
|
+
build_secrets={},
|
|
423
|
+
),
|
|
424
|
+
no_cache=False,
|
|
425
|
+
console=HUDConsole(),
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
assert result == {"id": "build-1", "registry_id": "registry-1"}
|
|
429
|
+
assert platform.payload is not None
|
|
430
|
+
assert platform.payload["runtime_config"] == runtime_config
|
|
431
|
+
|
|
346
432
|
|
|
347
433
|
class TestSaveDeployLink:
|
|
348
434
|
"""Tests for _save_deploy_link function."""
|
hud/eval/runtime.py
CHANGED
|
@@ -108,6 +108,9 @@ class RuntimeConfig(BaseModel):
|
|
|
108
108
|
self.model_dump() | override.model_dump(exclude_unset=True)
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
+
def request_payload(self) -> dict[str, Any]:
|
|
112
|
+
return self.model_dump(mode="json", exclude_unset=True)
|
|
113
|
+
|
|
111
114
|
|
|
112
115
|
class Provider(Protocol):
|
|
113
116
|
"""Server placement: called with the task row being placed, acquire one
|
|
@@ -925,7 +928,7 @@ class HostedRuntime:
|
|
|
925
928
|
if group_id is not None:
|
|
926
929
|
payload["group_id"] = group_id
|
|
927
930
|
if task.runtime_config is not None:
|
|
928
|
-
runtime_config = task.runtime_config.
|
|
931
|
+
runtime_config = task.runtime_config.request_payload()
|
|
929
932
|
if runtime_config:
|
|
930
933
|
payload["runtime_config"] = runtime_config
|
|
931
934
|
await platform.apost("/rollouts/submit", json=payload)
|
hud/eval/sync.py
CHANGED
|
@@ -163,7 +163,7 @@ def task_upload_payload(task: Task) -> dict[str, Any]:
|
|
|
163
163
|
if task.columns:
|
|
164
164
|
payload["columns"] = task.columns
|
|
165
165
|
if task.runtime_config is not None:
|
|
166
|
-
payload["runtime_config"] = task.runtime_config.
|
|
166
|
+
payload["runtime_config"] = task.runtime_config.request_payload()
|
|
167
167
|
return payload
|
|
168
168
|
|
|
169
169
|
|
|
@@ -176,7 +176,7 @@ def _task_signature(task: Task) -> str:
|
|
|
176
176
|
if task.columns:
|
|
177
177
|
sig_data["columns"] = task.columns
|
|
178
178
|
if task.runtime_config is not None:
|
|
179
|
-
sig_data["runtime_config"] = task.runtime_config.
|
|
179
|
+
sig_data["runtime_config"] = task.runtime_config.request_payload()
|
|
180
180
|
return f"{task.id}|" + json.dumps(
|
|
181
181
|
sig_data,
|
|
182
182
|
sort_keys=True,
|
hud/eval/tests/test_hosted.py
CHANGED
|
@@ -164,6 +164,25 @@ async def test_run_submits_and_polls_to_terminal(monkeypatch: pytest.MonkeyPatch
|
|
|
164
164
|
assert payload["agent"]["config"]["model"] == "test-model"
|
|
165
165
|
|
|
166
166
|
|
|
167
|
+
@pytest.mark.asyncio
|
|
168
|
+
async def test_run_preserves_runtime_config_null_override(
|
|
169
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
170
|
+
) -> None:
|
|
171
|
+
platform = _FakePlatform([{"status": "completed", "reward": 0.5}])
|
|
172
|
+
monkeypatch.setattr(
|
|
173
|
+
"hud.eval.runtime.PlatformClient.from_settings", classmethod(lambda cls: platform)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
await HostedRuntime(poll_interval=0.0).run(
|
|
177
|
+
Task(env="sums", id="add", runtime_config=RuntimeConfig(resources=None)),
|
|
178
|
+
_agent(),
|
|
179
|
+
job_id=uuid.uuid4().hex,
|
|
180
|
+
trace_id=uuid.uuid4().hex,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
assert platform.posts[0][1]["runtime_config"] == {"resources": None}
|
|
184
|
+
|
|
185
|
+
|
|
167
186
|
@pytest.mark.asyncio
|
|
168
187
|
async def test_run_timeout_requests_platform_cancel(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
169
188
|
platform = _FakePlatform([{"status": "running"}])
|
hud/eval/tests/test_rollout.py
CHANGED
|
@@ -13,14 +13,18 @@ the atom and return a :class:`Job`.
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import json
|
|
16
17
|
import textwrap
|
|
17
18
|
from contextlib import asynccontextmanager
|
|
19
|
+
from types import SimpleNamespace
|
|
18
20
|
from typing import TYPE_CHECKING, Any
|
|
19
21
|
|
|
20
22
|
import mcp.types as mcp_types
|
|
21
23
|
import pytest
|
|
22
24
|
|
|
23
25
|
from hud.agents.base import Agent
|
|
26
|
+
from hud.agents.openai_compatible import OpenAIChatAgent
|
|
27
|
+
from hud.agents.types import OpenAIChatConfig
|
|
24
28
|
from hud.environment import Environment
|
|
25
29
|
from hud.eval import Job, LocalRuntime, Task, Taskset
|
|
26
30
|
from hud.eval.run import Run, rollout
|
|
@@ -63,6 +67,44 @@ class _FnAgent(Agent):
|
|
|
63
67
|
run.trace.content = self._fn(run.prompt)
|
|
64
68
|
|
|
65
69
|
|
|
70
|
+
class _SequencedCompletions:
|
|
71
|
+
def __init__(self, responses: list[Any]) -> None:
|
|
72
|
+
self._responses = responses
|
|
73
|
+
self.requests: list[dict[str, Any]] = []
|
|
74
|
+
|
|
75
|
+
async def create(self, **kwargs: Any) -> Any:
|
|
76
|
+
self.requests.append(kwargs)
|
|
77
|
+
return self._responses.pop(0)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class _FakeOpenAI:
|
|
81
|
+
def __init__(self, responses: list[Any]) -> None:
|
|
82
|
+
self.chat = SimpleNamespace(completions=_SequencedCompletions(responses))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _chat_response(content: str, tool_calls: list[Any] | None = None) -> Any:
|
|
86
|
+
message = SimpleNamespace(
|
|
87
|
+
content=content,
|
|
88
|
+
tool_calls=tool_calls or [],
|
|
89
|
+
refusal=None,
|
|
90
|
+
model_dump=lambda exclude_none=True: {"role": "assistant", "content": content},
|
|
91
|
+
)
|
|
92
|
+
choice = SimpleNamespace(message=message, finish_reason="stop", logprobs=None)
|
|
93
|
+
return SimpleNamespace(
|
|
94
|
+
choices=[choice],
|
|
95
|
+
model="fake-openai-compatible",
|
|
96
|
+
usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1, prompt_tokens_details=None),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _tool_call(name: str, arguments: str) -> Any:
|
|
101
|
+
return SimpleNamespace(
|
|
102
|
+
type="function",
|
|
103
|
+
id=f"call_{name}",
|
|
104
|
+
function=SimpleNamespace(name=name, arguments=arguments),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
66
108
|
def _add_task(a: int, b: int) -> Task:
|
|
67
109
|
"""A pure data row; the env it names is defined by the spawned file."""
|
|
68
110
|
return Task(env="sums", id="add", args={"a": a, "b": b})
|
|
@@ -86,6 +128,54 @@ async def test_rollout_returns_graded_run_with_trace_id(env_file: Path) -> None:
|
|
|
86
128
|
assert run.runtime.startswith("tcp://127.0.0.1:")
|
|
87
129
|
|
|
88
130
|
|
|
131
|
+
async def test_openai_compatible_write_reaches_workspace_grader(tmp_path: Path) -> None:
|
|
132
|
+
workspace = tmp_path / "workspace"
|
|
133
|
+
report = workspace / "REPORT.md"
|
|
134
|
+
env = Environment("opencode_report")
|
|
135
|
+
env.workspace(workspace, guest_path=str(workspace))
|
|
136
|
+
|
|
137
|
+
@env.initialize
|
|
138
|
+
async def seed() -> None:
|
|
139
|
+
workspace.mkdir(parents=True, exist_ok=True)
|
|
140
|
+
report.unlink(missing_ok=True)
|
|
141
|
+
|
|
142
|
+
@env.template()
|
|
143
|
+
async def write_report():
|
|
144
|
+
yield "Write PASS to REPORT.md."
|
|
145
|
+
yield 1.0 if report.exists() and report.read_text().strip() == "PASS" else 0.0
|
|
146
|
+
|
|
147
|
+
model_client = _FakeOpenAI(
|
|
148
|
+
[
|
|
149
|
+
_chat_response(
|
|
150
|
+
"",
|
|
151
|
+
[_tool_call("write", json.dumps({"filePath": str(report), "content": "PASS"}))],
|
|
152
|
+
),
|
|
153
|
+
_chat_response("done"),
|
|
154
|
+
]
|
|
155
|
+
)
|
|
156
|
+
agent = OpenAIChatAgent(
|
|
157
|
+
OpenAIChatConfig(model="qwen3.6-plus", model_client=model_client, max_steps=4)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
run = await rollout(
|
|
161
|
+
Task(env="opencode_report", id="write_report"),
|
|
162
|
+
agent,
|
|
163
|
+
runtime=lambda _task: _local(env),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
assert run.reward == 1.0
|
|
167
|
+
assert report.read_text() == "PASS"
|
|
168
|
+
tools = model_client.chat.completions.requests[0]["extra_body"]["tools"]
|
|
169
|
+
assert [tool["function"]["name"] for tool in tools] == [
|
|
170
|
+
"bash",
|
|
171
|
+
"read",
|
|
172
|
+
"glob",
|
|
173
|
+
"grep",
|
|
174
|
+
"edit",
|
|
175
|
+
"write",
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
|
|
89
179
|
async def test_mid_run_failure_keeps_the_real_run_and_its_evidence(env_file: Path) -> None:
|
|
90
180
|
def boom(prompt: str) -> str:
|
|
91
181
|
raise RuntimeError("agent exploded")
|
hud/eval/tests/test_sync.py
CHANGED
|
@@ -148,3 +148,15 @@ def test_task_upload_payload_includes_runtime_config() -> None:
|
|
|
148
148
|
payload = task_upload_payload(task)
|
|
149
149
|
|
|
150
150
|
assert payload["runtime_config"] == {"image": "img:tag"}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_task_upload_payload_preserves_runtime_config_null_override() -> None:
|
|
154
|
+
task = Task(
|
|
155
|
+
env="e",
|
|
156
|
+
id="solve",
|
|
157
|
+
runtime_config=RuntimeConfig(resources=None),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
payload = task_upload_payload(task)
|
|
161
|
+
|
|
162
|
+
assert payload["runtime_config"] == {"resources": None}
|
hud/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.8.dev0
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -87,7 +87,7 @@ Description-Content-Type: text/markdown
|
|
|
87
87
|
|
|
88
88
|
HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
|
|
89
89
|
|
|
90
|
-
To learn more, see the [documentation](https://docs.hud.ai) and [
|
|
90
|
+
To learn more, see the [documentation](https://docs.hud.ai) and [environment reference](https://docs.hud.ai/v6/core/environment).
|
|
91
91
|
|
|
92
92
|
[](https://pypi.org/project/hud-python/)
|
|
93
93
|
[](LICENSE)
|
|
@@ -120,7 +120,7 @@ Then scaffold your first environment:
|
|
|
120
120
|
hud init my-env
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
-

|
|
124
124
|
|
|
125
125
|
## The protocol
|
|
126
126
|
|
|
@@ -159,14 +159,14 @@ hud eval my-taskset --remote
|
|
|
159
159
|
For local iteration, the same protocol works against a container on your laptop:
|
|
160
160
|
|
|
161
161
|
```bash
|
|
162
|
-
|
|
163
|
-
docker run -d --name run1 my-env
|
|
164
|
-
|
|
165
|
-
|
|
162
|
+
docker build -f Dockerfile.hud -t my-env .
|
|
163
|
+
docker run -d --name run1 -p 8765:8765 my-env
|
|
164
|
+
hud task start fix_bug --url tcp://127.0.0.1:8765
|
|
165
|
+
hud task grade fix_bug --url tcp://127.0.0.1:8765 --answer "..."
|
|
166
166
|
docker rm -f run1
|
|
167
167
|
```
|
|
168
168
|
|
|
169
|
-
→ [
|
|
169
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
170
170
|
|
|
171
171
|
## Environments & templates
|
|
172
172
|
|
|
@@ -193,7 +193,7 @@ hud eval tasks.py claude --group 3
|
|
|
193
193
|
|
|
194
194
|
Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
|
|
195
195
|
|
|
196
|
-
→ [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/
|
|
196
|
+
→ [Quickstart](https://docs.hud.ai/v6/start/quickstart) · [Tasks & tasksets](https://docs.hud.ai/v6/core/tasks)
|
|
197
197
|
|
|
198
198
|
## Capabilities & harnesses
|
|
199
199
|
|
|
@@ -211,39 +211,42 @@ A **capability** is a connection the environment exposes; a **harness** attaches
|
|
|
211
211
|
|
|
212
212
|
**Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
|
|
213
213
|
|
|
214
|
-
→ [Capabilities](https://docs.hud.ai/
|
|
214
|
+
→ [Capabilities](https://docs.hud.ai/v6/core/capabilities) · [Models](https://docs.hud.ai/v6/core/agents) · [Robots](https://docs.hud.ai/v6/advanced/robots)
|
|
215
215
|
|
|
216
216
|
## Deploy on the platform
|
|
217
217
|
|
|
218
218
|
From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
|
|
219
219
|
|
|
220
|
-
→ [
|
|
220
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
221
221
|
|
|
222
222
|
## Train on rewards
|
|
223
223
|
|
|
224
|
-
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and
|
|
224
|
+
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and pass the graded runs to `TrainingClient.step()`:
|
|
225
225
|
|
|
226
226
|
```python
|
|
227
|
+
from hud import TrainingClient
|
|
227
228
|
from hud.agents import create_agent
|
|
228
|
-
from hud.eval import
|
|
229
|
+
from hud.eval import Job
|
|
229
230
|
|
|
230
|
-
agent = create_agent("
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
231
|
+
agent = create_agent("arith-rl", completion_kwargs={"extra_body": {"return_token_ids": True}})
|
|
232
|
+
trainer = TrainingClient("arith-rl")
|
|
233
|
+
taskset, runtime = ... # your Taskset and where rollouts run
|
|
234
|
+
|
|
235
|
+
session = await Job.start("arith-rl", group=8)
|
|
236
|
+
start = len(session.runs)
|
|
237
|
+
await taskset.run(agent, runtime=runtime, group=8, job=session)
|
|
238
|
+
await trainer.step(session.runs[start:], learning_rate=1e-5, group_size=8)
|
|
235
239
|
```
|
|
236
240
|
|
|
237
241
|
HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
|
|
238
242
|
|
|
239
|
-
→ [Training](https://docs.hud.ai/
|
|
243
|
+
→ [Training](https://docs.hud.ai/v6/core/training) · [Designing tasks for signal](https://docs.hud.ai/v6/core/advice)
|
|
240
244
|
|
|
241
245
|
## Links
|
|
242
246
|
|
|
243
247
|
- [Documentation](https://docs.hud.ai)
|
|
244
|
-
- [Quickstart](https://docs.hud.ai/quickstart)
|
|
245
|
-
- [CLI reference](https://docs.hud.ai/
|
|
246
|
-
- [Leaderboards](https://hud.ai/leaderboards)
|
|
248
|
+
- [Quickstart](https://docs.hud.ai/v6/start/quickstart)
|
|
249
|
+
- [CLI reference](https://docs.hud.ai/v6/core/cli)
|
|
247
250
|
- [Environment templates](https://hud.ai/environments)
|
|
248
251
|
- [Supported models](https://hud.ai/models)
|
|
249
252
|
- [Discord](https://discord.gg/wkjtmHYYjm)
|
|
@@ -268,8 +271,8 @@ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabil
|
|
|
268
271
|
|
|
269
272
|
```bibtex
|
|
270
273
|
@software{hud2025agentevalplatform,
|
|
271
|
-
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
|
|
272
|
-
title = {HUD: An Evaluation and RL
|
|
274
|
+
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep Chawla and Nguyen Nhat Minh},
|
|
275
|
+
title = {HUD: An Evaluation and RL Environments Platform for Agents},
|
|
273
276
|
date = {2025-04},
|
|
274
277
|
url = {https://github.com/hud-evals/hud-python},
|
|
275
278
|
langid = {en}
|
|
@@ -5,7 +5,7 @@ hud/conftest.py,sha256=HKbHvmFXLPX6KFSJgPFUAM22auclNNdFmHGwilNzg98,1012
|
|
|
5
5
|
hud/server.py,sha256=NtSHIjBFr9lYvryfXrCa-VhwqnwkRy7n5fp_OuNhNOI,1235
|
|
6
6
|
hud/settings.py,sha256=eyvMIOOlFk6kIAP8UsHEeoqf_UiOVhb1jhRCM2qv7b8,6393
|
|
7
7
|
hud/types.py,sha256=kFVbQ-CcVhYpdX5jjgacRIppFS0q_nMXahijV_Hhl58,15022
|
|
8
|
-
hud/version.py,sha256=
|
|
8
|
+
hud/version.py,sha256=RD_T-I7Yj0KBuadVj2UQF2XmPhTeHn3Lo45gIQTb5e4,109
|
|
9
9
|
hud/agents/__init__.py,sha256=UL1PXucnY1Ln9o_Xf0Y-mvfbNh6NUdMyPJp-_d9Wq7Q,5082
|
|
10
10
|
hud/agents/base.py,sha256=WgEOWUmMioXTxYe6cOvbqnbM4n989Z9kFEZIN6xJ3pU,659
|
|
11
11
|
hud/agents/tool_agent.py,sha256=a0xsh2d8IwvmiPGMs9LCzghi61FHt4vMK_9sW8eNFbA,12557
|
|
@@ -54,10 +54,10 @@ hud/agents/openai/tools/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
|
|
|
54
54
|
hud/agents/openai/tools/tests/test_computer.py,sha256=qEK7h2eD4j6Wg6VjU_YD8kCRpXOXwHDXBv1bz0mh5bo,3488
|
|
55
55
|
hud/agents/openai/tools/tests/test_strict_schema.py,sha256=8dGkCSO7_-TvryEfStKZ7nKEuO3WGLfzsjPUbfdHMhQ,2344
|
|
56
56
|
hud/agents/openai_compatible/__init__.py,sha256=zQZSQHB97g3rtPx4Y8aG_0K1i17MLwGRaTyQLd31Jqk,98
|
|
57
|
-
hud/agents/openai_compatible/agent.py,sha256=
|
|
58
|
-
hud/agents/openai_compatible/tools/__init__.py,sha256=
|
|
57
|
+
hud/agents/openai_compatible/agent.py,sha256=7Zw6wa1ce7kt_xF4R_OfuoDbMPl09TktpjgFc16-_Lo,9946
|
|
58
|
+
hud/agents/openai_compatible/tools/__init__.py,sha256=kOPtrgiqTcnQabZpo1aNfYfnaqip6M3z2OeffJNz-Ak,361
|
|
59
59
|
hud/agents/openai_compatible/tools/base.py,sha256=Jl6Bm9ZgEOqgdOnM7Xm66VN3RpfjeZF9w55of_ZGCMI,5760
|
|
60
|
-
hud/agents/openai_compatible/tools/filesystem.py,sha256=
|
|
60
|
+
hud/agents/openai_compatible/tools/filesystem.py,sha256=hHSVW25OT_zxdJO6fE2kOPnnABOd06kHPWC08epoCNg,12523
|
|
61
61
|
hud/agents/openai_compatible/tools/mcp_proxy.py,sha256=pfJdCvFxTaXkj6qrGK04jxibjeIhm6O-5STHPcB_qL4,844
|
|
62
62
|
hud/agents/robot/__init__.py,sha256=UXyQYaoLMrxFr1QYU2D6UUz6BwK9gsp4-abe5jAOqUU,1620
|
|
63
63
|
hud/agents/robot/_types.py,sha256=byWZMYRwLuzvu2U-ZXMx3TcyRTPcsjGF5HkItbgfcQ4,222
|
|
@@ -75,7 +75,7 @@ hud/agents/tests/test_claude_sdk_agent.py,sha256=lSY8wnLQgfJBNzF9BU-PcO4IrKaWtva
|
|
|
75
75
|
hud/agents/tests/test_gemini_agent.py,sha256=7OdFFVSOkJE8Gb3blptWnEXuFWHuFCNlFAoMXTyV0Ec,4835
|
|
76
76
|
hud/agents/tests/test_openai_agent.py,sha256=-69hoi_Bv9JdGngEnaJ74mSH-JCupg66ny7hODXQF00,4180
|
|
77
77
|
hud/agents/tests/test_openai_compatible_agent.py,sha256=6JxFxkRdPT1O574VYvcsMXiUwhcvBFJQLBx46Utt4QI,2874
|
|
78
|
-
hud/agents/tests/test_provider_native_tools.py,sha256=
|
|
78
|
+
hud/agents/tests/test_provider_native_tools.py,sha256=dZ4dOT3sUkMh_7p-pGDnTIL7UDdwngNJ8jarlqU0Plk,12989
|
|
79
79
|
hud/agents/tests/test_tool_agent.py,sha256=w8cuBAMcGBbIwiMnjH-tg4ztqhlewQOnXK3h1XLkj5o,5373
|
|
80
80
|
hud/agents/tests/test_trace.py,sha256=rUNbV-y4gI0dH0xluT9COY_epJD69XHAzaC1HO4mX10,4517
|
|
81
81
|
hud/agents/tools/__init__.py,sha256=-fnzzq8qwEXWD8s-T8RUGamuYndXTESeFNNMQxsXH5A,858
|
|
@@ -96,7 +96,7 @@ hud/cli/__init__.py,sha256=i_R0izloGfZfDa7gXmxxfqfSOMmZR-qUp4wMtD9ulZY,5936
|
|
|
96
96
|
hud/cli/__main__.py,sha256=fDH7XITyuDITwSDIVwRso06aouADO0CzTHKqp5TOwJE,143
|
|
97
97
|
hud/cli/cancel.py,sha256=MHRdZ6IoFy3iac9NtTT_c91IB_iQbYISsCb7ph7u298,3785
|
|
98
98
|
hud/cli/client.py,sha256=cC23TUTo7w4S63jtk8SWkFWzG9h8Z2AKwYEqaefBWqg,2946
|
|
99
|
-
hud/cli/deploy.py,sha256=
|
|
99
|
+
hud/cli/deploy.py,sha256=Uh-Sfp-fCezV5U7yNleOUBVEfzoVhIvxMVEb8b9le1U,27659
|
|
100
100
|
hud/cli/eval.py,sha256=-pNxIdQWqzFraRhtpbk9Z0kXYVn3Ia6qBidHJ0KbLqw,35596
|
|
101
101
|
hud/cli/init.py,sha256=FmvQ2hPhkKDkyr8krxuGPYDFBzeeLlMqMnkxfNo9nQ4,8258
|
|
102
102
|
hud/cli/jobs.py,sha256=FtLRys2UwccovvQju37sXNTllOQnSJ4rWhpkeTbkjv4,4884
|
|
@@ -112,7 +112,7 @@ hud/cli/tests/__init__.py,sha256=ZrGVkmH7DHXGqOvjOSNGZeMYaFIRB2K8c6hwr8FPJ-8,68
|
|
|
112
112
|
hud/cli/tests/test_cli_init.py,sha256=e4pHfw4jLt2TY5n_fSTfA7XRcMsu1hKxOK7PNfVRO7U,3002
|
|
113
113
|
hud/cli/tests/test_cli_main.py,sha256=0wMho9p9NcGjp0jLiUtCQh_FYdbMaCJtSY3sBbSgPwA,697
|
|
114
114
|
hud/cli/tests/test_cli_more_wrappers.py,sha256=EEFrqTMg3yknQdmb8hWcHJAbinGAOGyqx3CGdiVX1b8,115
|
|
115
|
-
hud/cli/tests/test_deploy.py,sha256=
|
|
115
|
+
hud/cli/tests/test_deploy.py,sha256=vgEs66oYfsrbfDfrZgf1CuIPQ7f_JCo6RrPoPAKVd7I,17247
|
|
116
116
|
hud/cli/tests/test_eval_bedrock.py,sha256=UBGakgIV4kzXUj0Jtbr3t05xBss66YPqruKsovUYyoo,1900
|
|
117
117
|
hud/cli/tests/test_eval_config.py,sha256=12ZCqFW-4NCVqEU-GheD8M3AIgm9UZAE-dDnD3UXlWM,10386
|
|
118
118
|
hud/cli/tests/test_init.py,sha256=GsUKhZ9p5Gmg_0J0D9j35M4Ks569_M2bWPkcIAEVEtg,3821
|
|
@@ -169,18 +169,18 @@ hud/eval/chat.py,sha256=Gu6aDv1UkDAZ8qs41TXLUrFlcX4cav4HHSa7Yyovifw,5853
|
|
|
169
169
|
hud/eval/file_tracking.py,sha256=MqLNl_lYfp74KCr4ZbwW9yGHmIyJ6GeOgAN9NMhELgs,4278
|
|
170
170
|
hud/eval/job.py,sha256=QMck-M_0HWtEd56hvfdYaNnStLlIOuNOGBStdcLr2vI,5606
|
|
171
171
|
hud/eval/run.py,sha256=wsG7xvvr-gJ778xWVXDMOA6BNlnHH9OiFZTzy6AyORo,18039
|
|
172
|
-
hud/eval/runtime.py,sha256=
|
|
173
|
-
hud/eval/sync.py,sha256=
|
|
172
|
+
hud/eval/runtime.py,sha256=fcpGNH3UhKAyTwm_qBNv6dJCMv2eGAtVfmK8MZJuI3w,42984
|
|
173
|
+
hud/eval/sync.py,sha256=dptdsSZvtC0O5Qyk4VQir1PuYVFDxw2DMZ-Q9wCdah0,6445
|
|
174
174
|
hud/eval/task.py,sha256=9xQ_x4AjY26M-sVRH6HhDZpKeofBw1hd8LZ0csBy53k,4617
|
|
175
175
|
hud/eval/taskset.py,sha256=9KVYxoeMnMHa2syR4fRGh9r1IuKoDVyMe7WEUFvaGxQ,12086
|
|
176
176
|
hud/eval/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
177
177
|
hud/eval/tests/test_chat.py,sha256=yQt_dM1UJu-Cw8JS8di102h06ee7QI9CqNA27U66_Qg,4310
|
|
178
178
|
hud/eval/tests/test_docker_provider.py,sha256=1W1xyOzjHti6jfV2eiVnNd5CxKEMAKq8NBgBy-m5ftA,23741
|
|
179
179
|
hud/eval/tests/test_file_tracking_observer.py,sha256=DteazLLWK0LKgtUn_6v4_wMI-1jhENMx7Y87-pdg-I8,4197
|
|
180
|
-
hud/eval/tests/test_hosted.py,sha256=
|
|
180
|
+
hud/eval/tests/test_hosted.py,sha256=S0gGqAUaizlCGC30XwvaWb-TJhFgLUPlwsMO0WgjVWM,16284
|
|
181
181
|
hud/eval/tests/test_job.py,sha256=UyaqbOY-0pnd2RNIp3glS_L_JJFT0-7GlSkgRhgaU1A,1867
|
|
182
|
-
hud/eval/tests/test_rollout.py,sha256=
|
|
183
|
-
hud/eval/tests/test_sync.py,sha256=
|
|
182
|
+
hud/eval/tests/test_rollout.py,sha256=YUVqzDbIg9Y5LNnDwaNJ40hOL1BVAFgpHRHCyGlcfQw,14027
|
|
183
|
+
hud/eval/tests/test_sync.py,sha256=1gFC65ZiZojeSn9q1v-RMK2Ps130mlh-aXE7G8sn54k,5234
|
|
184
184
|
hud/eval/tests/test_task.py,sha256=n0E3B3TBYV6aM2_KFVGPHuD9nBGlpwq4ZvBu9wpjqtU,9754
|
|
185
185
|
hud/graders/__init__.py,sha256=eccF8MXHQBvmynULljOCEMn82YSK0HSScD1TlS8UoT4,1570
|
|
186
186
|
hud/graders/base.py,sha256=fDGrdWfMyu4cvoNkWwDPtN5oRchLFqhqcZPO50jJtA8,1428
|
|
@@ -226,8 +226,8 @@ hud/utils/tests/test_platform.py,sha256=mwhyFkUBvgmHRc43vQ_JgAAW2N9fIaxkQhVo-GB4
|
|
|
226
226
|
hud/utils/tests/test_requests.py,sha256=ENK6P5xLTuSgWDcCau4zCj_5zPV_EooGwU4P8YYl5Gw,9109
|
|
227
227
|
hud/utils/tests/test_serialization.py,sha256=GY4NiFUJtwLSYQWA0n1zme-Ul4DnBLByHCOOkxn2kLM,819
|
|
228
228
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
229
|
-
hud_python-0.6.
|
|
230
|
-
hud_python-0.6.
|
|
231
|
-
hud_python-0.6.
|
|
232
|
-
hud_python-0.6.
|
|
233
|
-
hud_python-0.6.
|
|
229
|
+
hud_python-0.6.8.dev0.dist-info/METADATA,sha256=k0BA7OmInHsM-CB-pm5GNc0yYVwF7EbUr0etmiU-xXg,12427
|
|
230
|
+
hud_python-0.6.8.dev0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
231
|
+
hud_python-0.6.8.dev0.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
232
|
+
hud_python-0.6.8.dev0.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
233
|
+
hud_python-0.6.8.dev0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|