hud-python 0.6.7__py3-none-any.whl → 0.6.8.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/agents/openai_compatible/agent.py +7 -3
- hud/agents/openai_compatible/tools/__init__.py +4 -2
- hud/agents/openai_compatible/tools/filesystem.py +227 -33
- hud/agents/tests/test_provider_native_tools.py +135 -6
- hud/eval/tests/test_rollout.py +90 -0
- hud/version.py +1 -1
- {hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/METADATA +27 -24
- {hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/RECORD +11 -11
- {hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/WHEEL +0 -0
- {hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/entry_points.txt +0 -0
- {hud_python-0.6.7.dist-info → hud_python-0.6.8.dev0.dist-info}/licenses/LICENSE +0 -0
|
@@ -17,11 +17,13 @@ from hud.types import MCPToolCall, MCPToolResult
|
|
|
17
17
|
from hud.utils import gateway
|
|
18
18
|
|
|
19
19
|
from .tools import (
|
|
20
|
+
BashTool,
|
|
21
|
+
EditTool,
|
|
20
22
|
GlobTool,
|
|
21
23
|
GrepTool,
|
|
22
|
-
ListTool,
|
|
23
24
|
OpenAICompatibleMCPProxyTool,
|
|
24
25
|
ReadTool,
|
|
26
|
+
WriteTool,
|
|
25
27
|
)
|
|
26
28
|
from .tools.base import format_chat_result
|
|
27
29
|
|
|
@@ -41,10 +43,12 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
|
|
|
41
43
|
"""OpenAI-compatible agent using the chat.completions protocol."""
|
|
42
44
|
|
|
43
45
|
tool_catalog = (
|
|
46
|
+
BashTool,
|
|
44
47
|
ReadTool,
|
|
45
|
-
GrepTool,
|
|
46
48
|
GlobTool,
|
|
47
|
-
|
|
49
|
+
GrepTool,
|
|
50
|
+
EditTool,
|
|
51
|
+
WriteTool,
|
|
48
52
|
OpenAICompatibleMCPProxyTool,
|
|
49
53
|
)
|
|
50
54
|
|
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from .filesystem import GlobTool, GrepTool,
|
|
5
|
+
from .filesystem import BashTool, EditTool, GlobTool, GrepTool, ReadTool, WriteTool
|
|
6
6
|
from .mcp_proxy import OpenAICompatibleMCPProxyTool
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
|
+
"BashTool",
|
|
10
|
+
"EditTool",
|
|
9
11
|
"GlobTool",
|
|
10
12
|
"GrepTool",
|
|
11
|
-
"ListTool",
|
|
12
13
|
"OpenAICompatibleMCPProxyTool",
|
|
13
14
|
"ReadTool",
|
|
15
|
+
"WriteTool",
|
|
14
16
|
]
|
|
@@ -1,16 +1,20 @@
|
|
|
1
|
-
"""OpenAI-compatible
|
|
1
|
+
"""OpenAI-compatible OpenCode-style workspace tools backed by SSHClient."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import math
|
|
6
|
+
import posixpath
|
|
5
7
|
import shlex
|
|
6
8
|
from typing import Any, ClassVar
|
|
7
9
|
|
|
8
10
|
import mcp.types as mcp_types
|
|
9
11
|
|
|
10
12
|
from hud.agents.tools import SSHTool
|
|
11
|
-
from hud.agents.tools.base import AgentToolSpec, result_text
|
|
13
|
+
from hud.agents.tools.base import AgentToolSpec, result_text, tool_err
|
|
12
14
|
from hud.types import MCPToolResult
|
|
13
15
|
|
|
16
|
+
DEFAULT_READ_LIMIT = 2000
|
|
17
|
+
|
|
14
18
|
|
|
15
19
|
class _FilesystemTool(SSHTool):
|
|
16
20
|
description: ClassVar[str]
|
|
@@ -34,16 +38,26 @@ class _FilesystemTool(SSHTool):
|
|
|
34
38
|
|
|
35
39
|
class ReadTool(_FilesystemTool):
|
|
36
40
|
name = "read"
|
|
37
|
-
description =
|
|
41
|
+
description = (
|
|
42
|
+
"Reads a file or directory from the workspace. Use offset and limit for pagination."
|
|
43
|
+
)
|
|
38
44
|
parameters: ClassVar[dict[str, Any]] = {
|
|
39
45
|
"type": "object",
|
|
40
46
|
"properties": {
|
|
41
|
-
"filePath": {
|
|
47
|
+
"filePath": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"description": "The absolute path to the file or directory to read.",
|
|
50
|
+
},
|
|
42
51
|
"offset": {
|
|
43
52
|
"type": "integer",
|
|
44
|
-
"description": "
|
|
53
|
+
"description": "The line number to start reading from (1-indexed).",
|
|
54
|
+
"minimum": 0,
|
|
55
|
+
},
|
|
56
|
+
"limit": {
|
|
57
|
+
"type": "integer",
|
|
58
|
+
"description": "The maximum number of lines to read (defaults to 2000).",
|
|
59
|
+
"minimum": 1,
|
|
45
60
|
},
|
|
46
|
-
"limit": {"type": "integer", "description": "Maximum number of lines to read."},
|
|
47
61
|
},
|
|
48
62
|
"required": ["filePath"],
|
|
49
63
|
}
|
|
@@ -52,19 +66,205 @@ class ReadTool(_FilesystemTool):
|
|
|
52
66
|
path = arguments.get("filePath")
|
|
53
67
|
if not isinstance(path, str) or not path:
|
|
54
68
|
raise ValueError("filePath is required")
|
|
69
|
+
offset = _read_offset(arguments.get("offset"))
|
|
70
|
+
limit = _positive_int(arguments.get("limit"), default=DEFAULT_READ_LIMIT, name="limit")
|
|
71
|
+
if not (await self.bash(f"test -d {shlex.quote(path)}")).isError:
|
|
72
|
+
return await self._read_directory(path, offset=offset, limit=limit)
|
|
55
73
|
result = await self.file_read(path)
|
|
56
74
|
if result.isError:
|
|
57
75
|
return result
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
76
|
+
text = result_text(result)
|
|
77
|
+
lines = text.splitlines()
|
|
78
|
+
start = offset - 1
|
|
79
|
+
if start > len(lines) and not (len(lines) == 0 and offset == 1):
|
|
80
|
+
return tool_err(f"Offset {offset} is out of range for this file ({len(lines)} lines)")
|
|
81
|
+
sliced = lines[start : start + limit]
|
|
82
|
+
last = offset + len(sliced) - 1
|
|
83
|
+
more = last < len(lines)
|
|
84
|
+
body = [
|
|
85
|
+
f"<path>{path}</path>",
|
|
86
|
+
"<type>file</type>",
|
|
87
|
+
"<content>",
|
|
88
|
+
*[f"{i + offset}: {line}" for i, line in enumerate(sliced)],
|
|
89
|
+
]
|
|
90
|
+
if more:
|
|
91
|
+
body.append(
|
|
92
|
+
f"\n(Showing lines {offset}-{last} of {len(lines)}. "
|
|
93
|
+
f"Use offset={last + 1} to continue.)"
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
body.append(f"\n(End of file - total {len(lines)} lines)")
|
|
97
|
+
body.append("</content>")
|
|
98
|
+
return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
|
|
99
|
+
|
|
100
|
+
async def _read_directory(self, path: str, *, offset: int, limit: int) -> MCPToolResult:
|
|
101
|
+
result = await self.file_list(path)
|
|
102
|
+
if result.isError:
|
|
103
|
+
return result
|
|
104
|
+
entries = result_text(result).splitlines()
|
|
105
|
+
if entries == ["(empty)"]:
|
|
106
|
+
entries = []
|
|
107
|
+
start = offset - 1
|
|
108
|
+
sliced = entries[start : start + limit]
|
|
109
|
+
truncated = start + len(sliced) < len(entries)
|
|
110
|
+
body = [
|
|
111
|
+
f"<path>{path}</path>",
|
|
112
|
+
"<type>directory</type>",
|
|
113
|
+
"<entries>",
|
|
114
|
+
*sliced,
|
|
115
|
+
]
|
|
116
|
+
if truncated:
|
|
117
|
+
body.append(
|
|
118
|
+
f"\n(Showing {len(sliced)} of {len(entries)} entries. "
|
|
119
|
+
f"Use offset={offset + len(sliced)} to continue.)"
|
|
66
120
|
)
|
|
67
|
-
|
|
121
|
+
else:
|
|
122
|
+
body.append(f"\n({len(entries)} entries)")
|
|
123
|
+
body.append("</entries>")
|
|
124
|
+
return MCPToolResult(content=[mcp_types.TextContent(type="text", text="\n".join(body))])
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class BashTool(_FilesystemTool):
|
|
128
|
+
name = "bash"
|
|
129
|
+
description = (
|
|
130
|
+
"Executes a shell command in the workspace. Prefer read, grep, glob, edit, "
|
|
131
|
+
"and write for filesystem operations."
|
|
132
|
+
)
|
|
133
|
+
parameters: ClassVar[dict[str, Any]] = {
|
|
134
|
+
"type": "object",
|
|
135
|
+
"properties": {
|
|
136
|
+
"command": {"type": "string", "description": "The command to execute."},
|
|
137
|
+
"timeout": {
|
|
138
|
+
"type": "integer",
|
|
139
|
+
"description": "Optional timeout in milliseconds.",
|
|
140
|
+
"minimum": 1,
|
|
141
|
+
},
|
|
142
|
+
"workdir": {
|
|
143
|
+
"type": "string",
|
|
144
|
+
"description": "The working directory to run the command in.",
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
"required": ["command"],
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
|
|
151
|
+
command = arguments.get("command")
|
|
152
|
+
if not isinstance(command, str) or not command:
|
|
153
|
+
raise ValueError("command is required")
|
|
154
|
+
timeout = arguments.get("timeout")
|
|
155
|
+
if timeout is not None:
|
|
156
|
+
if not isinstance(timeout, int) or timeout < 1:
|
|
157
|
+
raise ValueError("timeout must be a positive integer")
|
|
158
|
+
seconds = max(1, math.ceil(timeout / 1000))
|
|
159
|
+
command = f"timeout {seconds}s bash -lc {shlex.quote(command)}"
|
|
160
|
+
workdir = arguments.get("workdir")
|
|
161
|
+
if isinstance(workdir, str) and workdir:
|
|
162
|
+
command = f"cd {shlex.quote(workdir)} && {command}"
|
|
163
|
+
return await self.bash(command)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class EditTool(_FilesystemTool):
|
|
167
|
+
name = "edit"
|
|
168
|
+
description = (
|
|
169
|
+
"Replaces text within a file. Use oldString as exact literal context. "
|
|
170
|
+
"Set replaceAll to true to replace every occurrence."
|
|
171
|
+
)
|
|
172
|
+
parameters: ClassVar[dict[str, Any]] = {
|
|
173
|
+
"type": "object",
|
|
174
|
+
"properties": {
|
|
175
|
+
"filePath": {
|
|
176
|
+
"type": "string",
|
|
177
|
+
"description": "The absolute path to the file to modify.",
|
|
178
|
+
},
|
|
179
|
+
"oldString": {"type": "string", "description": "The text to replace."},
|
|
180
|
+
"newString": {
|
|
181
|
+
"type": "string",
|
|
182
|
+
"description": "The text to replace it with (must be different from oldString).",
|
|
183
|
+
},
|
|
184
|
+
"replaceAll": {
|
|
185
|
+
"type": "boolean",
|
|
186
|
+
"description": "Replace all occurrences of oldString (default false).",
|
|
187
|
+
},
|
|
188
|
+
},
|
|
189
|
+
"required": ["filePath", "oldString", "newString"],
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
|
|
193
|
+
path = arguments.get("filePath")
|
|
194
|
+
if not isinstance(path, str) or not path:
|
|
195
|
+
raise ValueError("filePath is required")
|
|
196
|
+
old = arguments.get("oldString")
|
|
197
|
+
new = arguments.get("newString")
|
|
198
|
+
if not isinstance(old, str):
|
|
199
|
+
raise ValueError("oldString is required")
|
|
200
|
+
if not isinstance(new, str):
|
|
201
|
+
raise ValueError("newString is required")
|
|
202
|
+
if old == new:
|
|
203
|
+
return tool_err("No changes to apply: oldString and newString are identical.")
|
|
204
|
+
if old == "":
|
|
205
|
+
exists = not (await self.bash(f"test -e {shlex.quote(path)}")).isError
|
|
206
|
+
if exists:
|
|
207
|
+
return tool_err(
|
|
208
|
+
"oldString cannot be empty when editing an existing file. "
|
|
209
|
+
"Provide exact text to replace, or use write for full-file replacement."
|
|
210
|
+
)
|
|
211
|
+
mkdir = await self._ensure_parent(path)
|
|
212
|
+
if mkdir.isError:
|
|
213
|
+
return mkdir
|
|
214
|
+
return await self.file_write(path, new)
|
|
215
|
+
|
|
216
|
+
existing = await self.file_read(path)
|
|
217
|
+
if existing.isError:
|
|
218
|
+
return existing
|
|
219
|
+
text = result_text(existing)
|
|
220
|
+
count = text.count(old)
|
|
221
|
+
if count == 0:
|
|
222
|
+
return tool_err(f"oldString not found in {path}")
|
|
223
|
+
replace_all = arguments.get("replaceAll") is True
|
|
224
|
+
if count > 1 and not replace_all:
|
|
225
|
+
return tool_err(f"oldString matches {count} times in {path}; set replaceAll to true")
|
|
226
|
+
next_text = text.replace(old, new) if replace_all else text.replace(old, new, 1)
|
|
227
|
+
return await self.file_write(path, next_text)
|
|
228
|
+
|
|
229
|
+
async def _ensure_parent(self, path: str) -> MCPToolResult:
|
|
230
|
+
parent = posixpath.dirname(path)
|
|
231
|
+
if not parent or parent in {".", "/"}:
|
|
232
|
+
return MCPToolResult(content=[])
|
|
233
|
+
return await self.bash(f"mkdir -p {shlex.quote(parent)}")
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class WriteTool(_FilesystemTool):
|
|
237
|
+
name = "write"
|
|
238
|
+
description = "Creates or overwrites a file with the provided content."
|
|
239
|
+
parameters: ClassVar[dict[str, Any]] = {
|
|
240
|
+
"type": "object",
|
|
241
|
+
"properties": {
|
|
242
|
+
"content": {"type": "string", "description": "The content to write to the file."},
|
|
243
|
+
"filePath": {
|
|
244
|
+
"type": "string",
|
|
245
|
+
"description": "The absolute path to the file to write.",
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
"required": ["content", "filePath"],
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async def execute(self, arguments: dict[str, Any]) -> MCPToolResult:
|
|
252
|
+
path = arguments.get("filePath")
|
|
253
|
+
if not isinstance(path, str) or not path:
|
|
254
|
+
raise ValueError("filePath is required")
|
|
255
|
+
content = arguments.get("content")
|
|
256
|
+
if not isinstance(content, str):
|
|
257
|
+
raise ValueError("content is required")
|
|
258
|
+
mkdir = await self._ensure_parent(path)
|
|
259
|
+
if mkdir.isError:
|
|
260
|
+
return mkdir
|
|
261
|
+
return await self.file_write(path, content)
|
|
262
|
+
|
|
263
|
+
async def _ensure_parent(self, path: str) -> MCPToolResult:
|
|
264
|
+
parent = posixpath.dirname(path)
|
|
265
|
+
if not parent or parent in {".", "/"}:
|
|
266
|
+
return MCPToolResult(content=[])
|
|
267
|
+
return await self.bash(f"mkdir -p {shlex.quote(parent)}")
|
|
68
268
|
|
|
69
269
|
|
|
70
270
|
class GrepTool(_FilesystemTool):
|
|
@@ -115,24 +315,18 @@ class GlobTool(_FilesystemTool):
|
|
|
115
315
|
return await self.bash(f"find {shlex.quote(str(path))} -name {shlex.quote(pattern)}")
|
|
116
316
|
|
|
117
317
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
"
|
|
123
|
-
|
|
124
|
-
"path": {"type": "string", "description": "Directory to list."},
|
|
125
|
-
"ignore": {
|
|
126
|
-
"type": "array",
|
|
127
|
-
"items": {"type": "string"},
|
|
128
|
-
"description": "Glob patterns to ignore.",
|
|
129
|
-
},
|
|
130
|
-
},
|
|
131
|
-
}
|
|
318
|
+
def _positive_int(value: Any, *, default: int, name: str) -> int:
|
|
319
|
+
if value is None:
|
|
320
|
+
return default
|
|
321
|
+
if not isinstance(value, int) or value < 1:
|
|
322
|
+
raise ValueError(f"{name} must be a positive integer")
|
|
323
|
+
return value
|
|
132
324
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
325
|
+
|
|
326
|
+
def _read_offset(value: Any) -> int:
|
|
327
|
+
if value is None or value == 0:
|
|
328
|
+
return 1
|
|
329
|
+
return _positive_int(value, default=1, name="offset")
|
|
136
330
|
|
|
137
331
|
|
|
138
|
-
__all__ = ["GlobTool", "GrepTool", "
|
|
332
|
+
__all__ = ["BashTool", "EditTool", "GlobTool", "GrepTool", "ReadTool", "WriteTool"]
|
|
@@ -7,16 +7,19 @@ client and assert the command translation + result shape, fully offline.
|
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
import shlex
|
|
11
|
+
from typing import Any, cast
|
|
11
12
|
|
|
12
13
|
import pytest
|
|
13
14
|
|
|
14
15
|
from hud.agents.claude.tools.coding import ClaudeBashTool, ClaudeTextEditorTool
|
|
15
16
|
from hud.agents.gemini.tools.coding import GeminiEditTool, GeminiShellTool
|
|
16
17
|
from hud.agents.openai.tools.coding import OpenAIShellTool
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
from hud.agents.openai_compatible.agent import OpenAIChatAgent
|
|
19
|
+
from hud.agents.openai_compatible.tools import BashTool, EditTool, ReadTool, WriteTool
|
|
20
|
+
from hud.agents.tools.base import result_text
|
|
21
|
+
from hud.agents.types import OpenAIChatConfig
|
|
22
|
+
from hud.capabilities import Capability, SSHClient
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class _Completed:
|
|
@@ -61,6 +64,21 @@ class _FakeSFTP:
|
|
|
61
64
|
def open(self, path: str, mode: str) -> _FakeOpenFile:
|
|
62
65
|
return _FakeOpenFile(self._store, path, mode)
|
|
63
66
|
|
|
67
|
+
async def listdir(self, path: str) -> list[str]:
|
|
68
|
+
prefix = path.rstrip("/")
|
|
69
|
+
if not prefix:
|
|
70
|
+
prefix = "/"
|
|
71
|
+
if prefix != "/":
|
|
72
|
+
prefix += "/"
|
|
73
|
+
names: set[str] = set()
|
|
74
|
+
for file_path in self._store:
|
|
75
|
+
if not file_path.startswith(prefix):
|
|
76
|
+
continue
|
|
77
|
+
rest = file_path[len(prefix) :]
|
|
78
|
+
if rest:
|
|
79
|
+
names.add(rest.split("/", 1)[0])
|
|
80
|
+
return sorted(names)
|
|
81
|
+
|
|
64
82
|
|
|
65
83
|
class _Conn:
|
|
66
84
|
def __init__(self, completed: _Completed, store: dict[str, bytes]) -> None:
|
|
@@ -70,13 +88,26 @@ class _Conn:
|
|
|
70
88
|
|
|
71
89
|
async def run(self, command: str, check: bool = False) -> _Completed:
|
|
72
90
|
self.commands.append(command)
|
|
91
|
+
parts = shlex.split(command)
|
|
92
|
+
if len(parts) == 3 and parts[:2] in (["test", "-d"], ["test", "-e"]):
|
|
93
|
+
path = parts[2]
|
|
94
|
+
exists = path in self._store or any(
|
|
95
|
+
file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
|
|
96
|
+
)
|
|
97
|
+
if parts[1] == "-d":
|
|
98
|
+
exists = any(
|
|
99
|
+
file_path.startswith(path.rstrip("/") + "/") for file_path in self._store
|
|
100
|
+
)
|
|
101
|
+
return _Completed(exit_status=0 if exists else 1)
|
|
102
|
+
if len(parts) >= 3 and parts[:2] == ["mkdir", "-p"]:
|
|
103
|
+
return _Completed(exit_status=0)
|
|
73
104
|
return self._completed
|
|
74
105
|
|
|
75
106
|
def start_sftp_client(self) -> _FakeSFTP:
|
|
76
107
|
return _FakeSFTP(self._store)
|
|
77
108
|
|
|
78
109
|
|
|
79
|
-
class _FakeSSH:
|
|
110
|
+
class _FakeSSH(SSHClient):
|
|
80
111
|
"""Duck-typed ``SSHClient``: ``conn.run`` (bash) + ``conn.start_sftp_client`` (files)."""
|
|
81
112
|
|
|
82
113
|
def __init__(
|
|
@@ -87,7 +118,10 @@ class _FakeSSH:
|
|
|
87
118
|
files: dict[str, bytes] | None = None,
|
|
88
119
|
) -> None:
|
|
89
120
|
self.files: dict[str, bytes] = files or {}
|
|
90
|
-
|
|
121
|
+
super().__init__(
|
|
122
|
+
Capability(name="shell", protocol="ssh/2", url="ssh://localhost:22"),
|
|
123
|
+
cast("Any", _Conn(_Completed(stdout=stdout, exit_status=exit_status), self.files)),
|
|
124
|
+
)
|
|
91
125
|
|
|
92
126
|
|
|
93
127
|
def _ssh(**kwargs: Any) -> SSHClient:
|
|
@@ -98,6 +132,11 @@ def _commands(tool: Any) -> list[str]:
|
|
|
98
132
|
return tool.client.conn.commands
|
|
99
133
|
|
|
100
134
|
|
|
135
|
+
class _OpenAIChatAgentForTest(OpenAIChatAgent):
|
|
136
|
+
async def build_tools_for_test(self, ssh: SSHClient) -> tuple[dict[str, Any], list[Any]]:
|
|
137
|
+
return await self._build_tools({"ssh": ssh})
|
|
138
|
+
|
|
139
|
+
|
|
101
140
|
# ─── OpenAI shell ─────────────────────────────────────────────────────
|
|
102
141
|
|
|
103
142
|
|
|
@@ -135,6 +174,96 @@ def test_openai_shell_to_params_is_shell_type() -> None:
|
|
|
135
174
|
assert tool.to_params()["type"] == "shell"
|
|
136
175
|
|
|
137
176
|
|
|
177
|
+
# ─── OpenAI-compatible OpenCode workspace tools ───────────────────────
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
async def test_openai_compatible_catalog_matches_opencode_workspace_tools() -> None:
|
|
181
|
+
agent = _OpenAIChatAgentForTest(
|
|
182
|
+
OpenAIChatConfig(model="qwen3.6-plus", model_client=cast("Any", object()))
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
tools, params = await agent.build_tools_for_test(_ssh())
|
|
186
|
+
|
|
187
|
+
assert list(tools) == ["bash", "read", "glob", "grep", "edit", "write"]
|
|
188
|
+
assert [param["function"]["name"] for param in params] == [
|
|
189
|
+
"bash",
|
|
190
|
+
"read",
|
|
191
|
+
"glob",
|
|
192
|
+
"grep",
|
|
193
|
+
"edit",
|
|
194
|
+
"write",
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
async def test_openai_compatible_bash_uses_workdir_and_timeout() -> None:
|
|
199
|
+
tool = BashTool(spec=BashTool.default_spec("qwen"), client=_ssh())
|
|
200
|
+
|
|
201
|
+
await tool.execute({"command": "echo hi", "workdir": "/tmp/my dir", "timeout": 2500})
|
|
202
|
+
|
|
203
|
+
assert _commands(tool) == ["cd '/tmp/my dir' && timeout 3s bash -lc 'echo hi'"]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
async def test_openai_compatible_write_stores_file_via_workspace_sftp() -> None:
|
|
207
|
+
ssh = _FakeSSH()
|
|
208
|
+
tool = WriteTool(spec=WriteTool.default_spec("qwen"), client=cast("SSHClient", ssh))
|
|
209
|
+
|
|
210
|
+
result = await tool.execute({"filePath": "/REPORT.md", "content": "done"})
|
|
211
|
+
|
|
212
|
+
assert result.isError is False
|
|
213
|
+
assert ssh.files["/REPORT.md"] == b"done"
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def test_openai_compatible_edit_rewrites_unique_match() -> None:
|
|
217
|
+
ssh = _FakeSSH(files={"/f.txt": b"hello old world"})
|
|
218
|
+
tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
|
|
219
|
+
|
|
220
|
+
result = await tool.execute(
|
|
221
|
+
{"filePath": "/f.txt", "oldString": "old", "newString": "new"},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
assert result.isError is False
|
|
225
|
+
assert ssh.files["/f.txt"] == b"hello new world"
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
async def test_openai_compatible_edit_rejects_ambiguous_match() -> None:
|
|
229
|
+
ssh = _FakeSSH(files={"/f.txt": b"a a a"})
|
|
230
|
+
tool = EditTool(spec=EditTool.default_spec("qwen"), client=cast("SSHClient", ssh))
|
|
231
|
+
|
|
232
|
+
result = await tool.execute(
|
|
233
|
+
{"filePath": "/f.txt", "oldString": "a", "newString": "b"},
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
assert result.isError is True
|
|
237
|
+
assert ssh.files["/f.txt"] == b"a a a"
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
async def test_openai_compatible_read_lists_directories() -> None:
|
|
241
|
+
tool = ReadTool(
|
|
242
|
+
spec=ReadTool.default_spec("qwen"),
|
|
243
|
+
client=_ssh(files={"/work/a.txt": b"a", "/work/nested/b.txt": b"b"}),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
result = await tool.execute({"filePath": "/work"})
|
|
247
|
+
|
|
248
|
+
text = result_text(result)
|
|
249
|
+
assert "<type>directory</type>" in text
|
|
250
|
+
assert "a.txt" in text
|
|
251
|
+
assert "nested" in text
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
async def test_openai_compatible_read_accepts_zero_offset_for_first_page() -> None:
|
|
255
|
+
tool = ReadTool(
|
|
256
|
+
spec=ReadTool.default_spec("qwen"),
|
|
257
|
+
client=_ssh(files={"/f.txt": b"alpha\nbeta\n"}),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
result = await tool.execute({"filePath": "/f.txt", "offset": 0, "limit": 1})
|
|
261
|
+
|
|
262
|
+
text = result_text(result)
|
|
263
|
+
assert "1: alpha" in text
|
|
264
|
+
assert "2: beta" not in text
|
|
265
|
+
|
|
266
|
+
|
|
138
267
|
# ─── Gemini shell ─────────────────────────────────────────────────────
|
|
139
268
|
|
|
140
269
|
|
hud/eval/tests/test_rollout.py
CHANGED
|
@@ -13,14 +13,18 @@ the atom and return a :class:`Job`.
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import json
|
|
16
17
|
import textwrap
|
|
17
18
|
from contextlib import asynccontextmanager
|
|
19
|
+
from types import SimpleNamespace
|
|
18
20
|
from typing import TYPE_CHECKING, Any
|
|
19
21
|
|
|
20
22
|
import mcp.types as mcp_types
|
|
21
23
|
import pytest
|
|
22
24
|
|
|
23
25
|
from hud.agents.base import Agent
|
|
26
|
+
from hud.agents.openai_compatible import OpenAIChatAgent
|
|
27
|
+
from hud.agents.types import OpenAIChatConfig
|
|
24
28
|
from hud.environment import Environment
|
|
25
29
|
from hud.eval import Job, LocalRuntime, Task, Taskset
|
|
26
30
|
from hud.eval.run import Run, rollout
|
|
@@ -63,6 +67,44 @@ class _FnAgent(Agent):
|
|
|
63
67
|
run.trace.content = self._fn(run.prompt)
|
|
64
68
|
|
|
65
69
|
|
|
70
|
+
class _SequencedCompletions:
|
|
71
|
+
def __init__(self, responses: list[Any]) -> None:
|
|
72
|
+
self._responses = responses
|
|
73
|
+
self.requests: list[dict[str, Any]] = []
|
|
74
|
+
|
|
75
|
+
async def create(self, **kwargs: Any) -> Any:
|
|
76
|
+
self.requests.append(kwargs)
|
|
77
|
+
return self._responses.pop(0)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class _FakeOpenAI:
|
|
81
|
+
def __init__(self, responses: list[Any]) -> None:
|
|
82
|
+
self.chat = SimpleNamespace(completions=_SequencedCompletions(responses))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _chat_response(content: str, tool_calls: list[Any] | None = None) -> Any:
|
|
86
|
+
message = SimpleNamespace(
|
|
87
|
+
content=content,
|
|
88
|
+
tool_calls=tool_calls or [],
|
|
89
|
+
refusal=None,
|
|
90
|
+
model_dump=lambda exclude_none=True: {"role": "assistant", "content": content},
|
|
91
|
+
)
|
|
92
|
+
choice = SimpleNamespace(message=message, finish_reason="stop", logprobs=None)
|
|
93
|
+
return SimpleNamespace(
|
|
94
|
+
choices=[choice],
|
|
95
|
+
model="fake-openai-compatible",
|
|
96
|
+
usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1, prompt_tokens_details=None),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _tool_call(name: str, arguments: str) -> Any:
|
|
101
|
+
return SimpleNamespace(
|
|
102
|
+
type="function",
|
|
103
|
+
id=f"call_{name}",
|
|
104
|
+
function=SimpleNamespace(name=name, arguments=arguments),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
66
108
|
def _add_task(a: int, b: int) -> Task:
|
|
67
109
|
"""A pure data row; the env it names is defined by the spawned file."""
|
|
68
110
|
return Task(env="sums", id="add", args={"a": a, "b": b})
|
|
@@ -86,6 +128,54 @@ async def test_rollout_returns_graded_run_with_trace_id(env_file: Path) -> None:
|
|
|
86
128
|
assert run.runtime.startswith("tcp://127.0.0.1:")
|
|
87
129
|
|
|
88
130
|
|
|
131
|
+
async def test_openai_compatible_write_reaches_workspace_grader(tmp_path: Path) -> None:
|
|
132
|
+
workspace = tmp_path / "workspace"
|
|
133
|
+
report = workspace / "REPORT.md"
|
|
134
|
+
env = Environment("opencode_report")
|
|
135
|
+
env.workspace(workspace, guest_path=str(workspace))
|
|
136
|
+
|
|
137
|
+
@env.initialize
|
|
138
|
+
async def seed() -> None:
|
|
139
|
+
workspace.mkdir(parents=True, exist_ok=True)
|
|
140
|
+
report.unlink(missing_ok=True)
|
|
141
|
+
|
|
142
|
+
@env.template()
|
|
143
|
+
async def write_report():
|
|
144
|
+
yield "Write PASS to REPORT.md."
|
|
145
|
+
yield 1.0 if report.exists() and report.read_text().strip() == "PASS" else 0.0
|
|
146
|
+
|
|
147
|
+
model_client = _FakeOpenAI(
|
|
148
|
+
[
|
|
149
|
+
_chat_response(
|
|
150
|
+
"",
|
|
151
|
+
[_tool_call("write", json.dumps({"filePath": str(report), "content": "PASS"}))],
|
|
152
|
+
),
|
|
153
|
+
_chat_response("done"),
|
|
154
|
+
]
|
|
155
|
+
)
|
|
156
|
+
agent = OpenAIChatAgent(
|
|
157
|
+
OpenAIChatConfig(model="qwen3.6-plus", model_client=model_client, max_steps=4)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
run = await rollout(
|
|
161
|
+
Task(env="opencode_report", id="write_report"),
|
|
162
|
+
agent,
|
|
163
|
+
runtime=lambda _task: _local(env),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
assert run.reward == 1.0
|
|
167
|
+
assert report.read_text() == "PASS"
|
|
168
|
+
tools = model_client.chat.completions.requests[0]["extra_body"]["tools"]
|
|
169
|
+
assert [tool["function"]["name"] for tool in tools] == [
|
|
170
|
+
"bash",
|
|
171
|
+
"read",
|
|
172
|
+
"glob",
|
|
173
|
+
"grep",
|
|
174
|
+
"edit",
|
|
175
|
+
"write",
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
|
|
89
179
|
async def test_mid_run_failure_keeps_the_real_run_and_its_evidence(env_file: Path) -> None:
|
|
90
180
|
def boom(prompt: str) -> str:
|
|
91
181
|
raise RuntimeError("agent exploded")
|
hud/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.8.dev0
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -87,7 +87,7 @@ Description-Content-Type: text/markdown
|
|
|
87
87
|
|
|
88
88
|
HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
|
|
89
89
|
|
|
90
|
-
To learn more, see the [documentation](https://docs.hud.ai) and [
|
|
90
|
+
To learn more, see the [documentation](https://docs.hud.ai) and [environment reference](https://docs.hud.ai/v6/core/environment).
|
|
91
91
|
|
|
92
92
|
[](https://pypi.org/project/hud-python/)
|
|
93
93
|
[](LICENSE)
|
|
@@ -120,7 +120,7 @@ Then scaffold your first environment:
|
|
|
120
120
|
hud init my-env
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
-

|
|
124
124
|
|
|
125
125
|
## The protocol
|
|
126
126
|
|
|
@@ -159,14 +159,14 @@ hud eval my-taskset --remote
|
|
|
159
159
|
For local iteration, the same protocol works against a container on your laptop:
|
|
160
160
|
|
|
161
161
|
```bash
|
|
162
|
-
|
|
163
|
-
docker run -d --name run1 my-env
|
|
164
|
-
|
|
165
|
-
|
|
162
|
+
docker build -f Dockerfile.hud -t my-env .
|
|
163
|
+
docker run -d --name run1 -p 8765:8765 my-env
|
|
164
|
+
hud task start fix_bug --url tcp://127.0.0.1:8765
|
|
165
|
+
hud task grade fix_bug --url tcp://127.0.0.1:8765 --answer "..."
|
|
166
166
|
docker rm -f run1
|
|
167
167
|
```
|
|
168
168
|
|
|
169
|
-
→ [
|
|
169
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
170
170
|
|
|
171
171
|
## Environments & templates
|
|
172
172
|
|
|
@@ -193,7 +193,7 @@ hud eval tasks.py claude --group 3
|
|
|
193
193
|
|
|
194
194
|
Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
|
|
195
195
|
|
|
196
|
-
→ [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/
|
|
196
|
+
→ [Quickstart](https://docs.hud.ai/v6/start/quickstart) · [Tasks & tasksets](https://docs.hud.ai/v6/core/tasks)
|
|
197
197
|
|
|
198
198
|
## Capabilities & harnesses
|
|
199
199
|
|
|
@@ -211,39 +211,42 @@ A **capability** is a connection the environment exposes; a **harness** attaches
|
|
|
211
211
|
|
|
212
212
|
**Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
|
|
213
213
|
|
|
214
|
-
→ [Capabilities](https://docs.hud.ai/
|
|
214
|
+
→ [Capabilities](https://docs.hud.ai/v6/core/capabilities) · [Models](https://docs.hud.ai/v6/core/agents) · [Robots](https://docs.hud.ai/v6/advanced/robots)
|
|
215
215
|
|
|
216
216
|
## Deploy on the platform
|
|
217
217
|
|
|
218
218
|
From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
|
|
219
219
|
|
|
220
|
-
→ [
|
|
220
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
221
221
|
|
|
222
222
|
## Train on rewards
|
|
223
223
|
|
|
224
|
-
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and
|
|
224
|
+
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and pass the graded runs to `TrainingClient.step()`:
|
|
225
225
|
|
|
226
226
|
```python
|
|
227
|
+
from hud import TrainingClient
|
|
227
228
|
from hud.agents import create_agent
|
|
228
|
-
from hud.eval import
|
|
229
|
+
from hud.eval import Job
|
|
229
230
|
|
|
230
|
-
agent = create_agent("
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
231
|
+
agent = create_agent("arith-rl", completion_kwargs={"extra_body": {"return_token_ids": True}})
|
|
232
|
+
trainer = TrainingClient("arith-rl")
|
|
233
|
+
taskset, runtime = ... # your Taskset and where rollouts run
|
|
234
|
+
|
|
235
|
+
session = await Job.start("arith-rl", group=8)
|
|
236
|
+
start = len(session.runs)
|
|
237
|
+
await taskset.run(agent, runtime=runtime, group=8, job=session)
|
|
238
|
+
await trainer.step(session.runs[start:], learning_rate=1e-5, group_size=8)
|
|
235
239
|
```
|
|
236
240
|
|
|
237
241
|
HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
|
|
238
242
|
|
|
239
|
-
→ [Training](https://docs.hud.ai/
|
|
243
|
+
→ [Training](https://docs.hud.ai/v6/core/training) · [Designing tasks for signal](https://docs.hud.ai/v6/core/advice)
|
|
240
244
|
|
|
241
245
|
## Links
|
|
242
246
|
|
|
243
247
|
- [Documentation](https://docs.hud.ai)
|
|
244
|
-
- [Quickstart](https://docs.hud.ai/quickstart)
|
|
245
|
-
- [CLI reference](https://docs.hud.ai/
|
|
246
|
-
- [Leaderboards](https://hud.ai/leaderboards)
|
|
248
|
+
- [Quickstart](https://docs.hud.ai/v6/start/quickstart)
|
|
249
|
+
- [CLI reference](https://docs.hud.ai/v6/core/cli)
|
|
247
250
|
- [Environment templates](https://hud.ai/environments)
|
|
248
251
|
- [Supported models](https://hud.ai/models)
|
|
249
252
|
- [Discord](https://discord.gg/wkjtmHYYjm)
|
|
@@ -268,8 +271,8 @@ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabil
|
|
|
268
271
|
|
|
269
272
|
```bibtex
|
|
270
273
|
@software{hud2025agentevalplatform,
|
|
271
|
-
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
|
|
272
|
-
title = {HUD: An Evaluation and RL
|
|
274
|
+
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep Chawla and Nguyen Nhat Minh},
|
|
275
|
+
title = {HUD: An Evaluation and RL Environments Platform for Agents},
|
|
273
276
|
date = {2025-04},
|
|
274
277
|
url = {https://github.com/hud-evals/hud-python},
|
|
275
278
|
langid = {en}
|
|
@@ -5,7 +5,7 @@ hud/conftest.py,sha256=HKbHvmFXLPX6KFSJgPFUAM22auclNNdFmHGwilNzg98,1012
|
|
|
5
5
|
hud/server.py,sha256=NtSHIjBFr9lYvryfXrCa-VhwqnwkRy7n5fp_OuNhNOI,1235
|
|
6
6
|
hud/settings.py,sha256=eyvMIOOlFk6kIAP8UsHEeoqf_UiOVhb1jhRCM2qv7b8,6393
|
|
7
7
|
hud/types.py,sha256=kFVbQ-CcVhYpdX5jjgacRIppFS0q_nMXahijV_Hhl58,15022
|
|
8
|
-
hud/version.py,sha256=
|
|
8
|
+
hud/version.py,sha256=RD_T-I7Yj0KBuadVj2UQF2XmPhTeHn3Lo45gIQTb5e4,109
|
|
9
9
|
hud/agents/__init__.py,sha256=UL1PXucnY1Ln9o_Xf0Y-mvfbNh6NUdMyPJp-_d9Wq7Q,5082
|
|
10
10
|
hud/agents/base.py,sha256=WgEOWUmMioXTxYe6cOvbqnbM4n989Z9kFEZIN6xJ3pU,659
|
|
11
11
|
hud/agents/tool_agent.py,sha256=a0xsh2d8IwvmiPGMs9LCzghi61FHt4vMK_9sW8eNFbA,12557
|
|
@@ -54,10 +54,10 @@ hud/agents/openai/tools/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
|
|
|
54
54
|
hud/agents/openai/tools/tests/test_computer.py,sha256=qEK7h2eD4j6Wg6VjU_YD8kCRpXOXwHDXBv1bz0mh5bo,3488
|
|
55
55
|
hud/agents/openai/tools/tests/test_strict_schema.py,sha256=8dGkCSO7_-TvryEfStKZ7nKEuO3WGLfzsjPUbfdHMhQ,2344
|
|
56
56
|
hud/agents/openai_compatible/__init__.py,sha256=zQZSQHB97g3rtPx4Y8aG_0K1i17MLwGRaTyQLd31Jqk,98
|
|
57
|
-
hud/agents/openai_compatible/agent.py,sha256=
|
|
58
|
-
hud/agents/openai_compatible/tools/__init__.py,sha256=
|
|
57
|
+
hud/agents/openai_compatible/agent.py,sha256=7Zw6wa1ce7kt_xF4R_OfuoDbMPl09TktpjgFc16-_Lo,9946
|
|
58
|
+
hud/agents/openai_compatible/tools/__init__.py,sha256=kOPtrgiqTcnQabZpo1aNfYfnaqip6M3z2OeffJNz-Ak,361
|
|
59
59
|
hud/agents/openai_compatible/tools/base.py,sha256=Jl6Bm9ZgEOqgdOnM7Xm66VN3RpfjeZF9w55of_ZGCMI,5760
|
|
60
|
-
hud/agents/openai_compatible/tools/filesystem.py,sha256=
|
|
60
|
+
hud/agents/openai_compatible/tools/filesystem.py,sha256=hHSVW25OT_zxdJO6fE2kOPnnABOd06kHPWC08epoCNg,12523
|
|
61
61
|
hud/agents/openai_compatible/tools/mcp_proxy.py,sha256=pfJdCvFxTaXkj6qrGK04jxibjeIhm6O-5STHPcB_qL4,844
|
|
62
62
|
hud/agents/robot/__init__.py,sha256=UXyQYaoLMrxFr1QYU2D6UUz6BwK9gsp4-abe5jAOqUU,1620
|
|
63
63
|
hud/agents/robot/_types.py,sha256=byWZMYRwLuzvu2U-ZXMx3TcyRTPcsjGF5HkItbgfcQ4,222
|
|
@@ -75,7 +75,7 @@ hud/agents/tests/test_claude_sdk_agent.py,sha256=lSY8wnLQgfJBNzF9BU-PcO4IrKaWtva
|
|
|
75
75
|
hud/agents/tests/test_gemini_agent.py,sha256=7OdFFVSOkJE8Gb3blptWnEXuFWHuFCNlFAoMXTyV0Ec,4835
|
|
76
76
|
hud/agents/tests/test_openai_agent.py,sha256=-69hoi_Bv9JdGngEnaJ74mSH-JCupg66ny7hODXQF00,4180
|
|
77
77
|
hud/agents/tests/test_openai_compatible_agent.py,sha256=6JxFxkRdPT1O574VYvcsMXiUwhcvBFJQLBx46Utt4QI,2874
|
|
78
|
-
hud/agents/tests/test_provider_native_tools.py,sha256=
|
|
78
|
+
hud/agents/tests/test_provider_native_tools.py,sha256=dZ4dOT3sUkMh_7p-pGDnTIL7UDdwngNJ8jarlqU0Plk,12989
|
|
79
79
|
hud/agents/tests/test_tool_agent.py,sha256=w8cuBAMcGBbIwiMnjH-tg4ztqhlewQOnXK3h1XLkj5o,5373
|
|
80
80
|
hud/agents/tests/test_trace.py,sha256=rUNbV-y4gI0dH0xluT9COY_epJD69XHAzaC1HO4mX10,4517
|
|
81
81
|
hud/agents/tools/__init__.py,sha256=-fnzzq8qwEXWD8s-T8RUGamuYndXTESeFNNMQxsXH5A,858
|
|
@@ -179,7 +179,7 @@ hud/eval/tests/test_docker_provider.py,sha256=1W1xyOzjHti6jfV2eiVnNd5CxKEMAKq8NB
|
|
|
179
179
|
hud/eval/tests/test_file_tracking_observer.py,sha256=DteazLLWK0LKgtUn_6v4_wMI-1jhENMx7Y87-pdg-I8,4197
|
|
180
180
|
hud/eval/tests/test_hosted.py,sha256=S0gGqAUaizlCGC30XwvaWb-TJhFgLUPlwsMO0WgjVWM,16284
|
|
181
181
|
hud/eval/tests/test_job.py,sha256=UyaqbOY-0pnd2RNIp3glS_L_JJFT0-7GlSkgRhgaU1A,1867
|
|
182
|
-
hud/eval/tests/test_rollout.py,sha256=
|
|
182
|
+
hud/eval/tests/test_rollout.py,sha256=YUVqzDbIg9Y5LNnDwaNJ40hOL1BVAFgpHRHCyGlcfQw,14027
|
|
183
183
|
hud/eval/tests/test_sync.py,sha256=1gFC65ZiZojeSn9q1v-RMK2Ps130mlh-aXE7G8sn54k,5234
|
|
184
184
|
hud/eval/tests/test_task.py,sha256=n0E3B3TBYV6aM2_KFVGPHuD9nBGlpwq4ZvBu9wpjqtU,9754
|
|
185
185
|
hud/graders/__init__.py,sha256=eccF8MXHQBvmynULljOCEMn82YSK0HSScD1TlS8UoT4,1570
|
|
@@ -226,8 +226,8 @@ hud/utils/tests/test_platform.py,sha256=mwhyFkUBvgmHRc43vQ_JgAAW2N9fIaxkQhVo-GB4
|
|
|
226
226
|
hud/utils/tests/test_requests.py,sha256=ENK6P5xLTuSgWDcCau4zCj_5zPV_EooGwU4P8YYl5Gw,9109
|
|
227
227
|
hud/utils/tests/test_serialization.py,sha256=GY4NiFUJtwLSYQWA0n1zme-Ul4DnBLByHCOOkxn2kLM,819
|
|
228
228
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
229
|
-
hud_python-0.6.
|
|
230
|
-
hud_python-0.6.
|
|
231
|
-
hud_python-0.6.
|
|
232
|
-
hud_python-0.6.
|
|
233
|
-
hud_python-0.6.
|
|
229
|
+
hud_python-0.6.8.dev0.dist-info/METADATA,sha256=k0BA7OmInHsM-CB-pm5GNc0yYVwF7EbUr0etmiU-xXg,12427
|
|
230
|
+
hud_python-0.6.8.dev0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
231
|
+
hud_python-0.6.8.dev0.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
232
|
+
hud_python-0.6.8.dev0.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
233
|
+
hud_python-0.6.8.dev0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|