hud-python 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +13 -10
- hud/adapters/claude/adapter.py +30 -18
- hud/adapters/common/adapter.py +0 -1
- hud/adapters/common/types.py +129 -4
- hud/adapters/operator/adapter.py +23 -13
- hud/agent/base.py +5 -4
- hud/agent/claude.py +65 -13
- hud/agent/claude_plays_pokemon.py +3 -2
- hud/agent/langchain.py +8 -2
- hud/agent/operator.py +36 -11
- hud/agent/tests/test_base.py +2 -2
- hud/env/docker_client.py +24 -2
- hud/env/environment.py +86 -40
- hud/env/local_docker_client.py +50 -4
- hud/env/remote_client.py +22 -4
- hud/env/remote_docker_client.py +8 -4
- hud/gym.py +15 -4
- hud/job.py +100 -35
- hud/server/requests.py +26 -4
- hud/settings.py +7 -1
- hud/task.py +84 -6
- hud/taskset.py +79 -12
- hud/telemetry/context.py +33 -57
- hud/telemetry/exporter.py +4 -6
- hud/telemetry/instrumentation/mcp.py +0 -3
- hud/telemetry/tests/test_context.py +7 -3
- hud/trajectory.py +3 -0
- hud/types.py +28 -2
- hud/utils/agent.py +37 -0
- hud/utils/common.py +142 -26
- hud/utils/config.py +11 -0
- hud/utils/tests/test_common.py +225 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/METADATA +26 -23
- {hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/RECORD +38 -37
- {hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/WHEEL +0 -0
- {hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/licenses/LICENSE +0 -0
hud/utils/tests/test_common.py
CHANGED
|
@@ -50,3 +50,228 @@ async def test_get_gym_id(mocker: pytest_mock.MockerFixture):
|
|
|
50
50
|
mocker.patch("hud.utils.common.make_request", return_value={"id": "test_gym_id"})
|
|
51
51
|
gym_id = await get_gym_id("test_gym")
|
|
52
52
|
assert gym_id == "test_gym_id"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_function_config_stores_function_name_args_and_optional_id():
|
|
56
|
+
"""FunctionConfig should store function name, args list, and optional id."""
|
|
57
|
+
from hud.utils.common import FunctionConfig
|
|
58
|
+
|
|
59
|
+
# Minimal config
|
|
60
|
+
minimal = FunctionConfig(function="test_func", args=[])
|
|
61
|
+
assert minimal.function == "test_func"
|
|
62
|
+
assert minimal.args == []
|
|
63
|
+
assert minimal.id is None
|
|
64
|
+
|
|
65
|
+
# With args
|
|
66
|
+
with_args = FunctionConfig(function="navigate", args=["https://example.com", {"wait": True}])
|
|
67
|
+
assert with_args.function == "navigate"
|
|
68
|
+
assert len(with_args.args) == 2
|
|
69
|
+
assert with_args.args[0] == "https://example.com"
|
|
70
|
+
assert with_args.args[1] == {"wait": True}
|
|
71
|
+
|
|
72
|
+
# With id
|
|
73
|
+
with_id = FunctionConfig(
|
|
74
|
+
function="complex_operation",
|
|
75
|
+
args=[42, "test", {"nested": {"key": "value"}}],
|
|
76
|
+
id="op_123",
|
|
77
|
+
)
|
|
78
|
+
assert with_id.function == "complex_operation"
|
|
79
|
+
assert len(with_id.args) == 3
|
|
80
|
+
assert with_id.id == "op_123"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@pytest.mark.asyncio
|
|
84
|
+
async def test_get_gym_id_fetches_id_from_api_response(
|
|
85
|
+
mocker: pytest_mock.MockerFixture,
|
|
86
|
+
):
|
|
87
|
+
"""get_gym_id should extract 'id' field from API response."""
|
|
88
|
+
# Arrange
|
|
89
|
+
api_response = {"id": "gym-123", "name": "Test Gym", "status": "active"}
|
|
90
|
+
mocker.patch("hud.utils.common.make_request", return_value=api_response)
|
|
91
|
+
|
|
92
|
+
# Act
|
|
93
|
+
gym_id = await get_gym_id("test_gym")
|
|
94
|
+
|
|
95
|
+
# Assert
|
|
96
|
+
assert gym_id == "gym-123"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@pytest.mark.asyncio
|
|
100
|
+
async def test_get_gym_id_propagates_network_errors(mocker: pytest_mock.MockerFixture):
|
|
101
|
+
"""get_gym_id should propagate exceptions from make_request."""
|
|
102
|
+
# Arrange
|
|
103
|
+
mocker.patch("hud.utils.common.make_request", side_effect=ConnectionError("API unavailable"))
|
|
104
|
+
|
|
105
|
+
# Act & Assert
|
|
106
|
+
with pytest.raises(ConnectionError, match="API unavailable"):
|
|
107
|
+
await get_gym_id("test_gym")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@pytest.mark.asyncio
|
|
111
|
+
async def test_get_gym_id_raises_key_error_when_id_missing(
|
|
112
|
+
mocker: pytest_mock.MockerFixture,
|
|
113
|
+
):
|
|
114
|
+
"""get_gym_id should raise KeyError when response lacks 'id' field."""
|
|
115
|
+
# Arrange
|
|
116
|
+
incomplete_response = {"name": "Test Gym", "status": "active"} # Missing 'id'
|
|
117
|
+
mocker.patch("hud.utils.common.make_request", return_value=incomplete_response)
|
|
118
|
+
|
|
119
|
+
# Act & Assert
|
|
120
|
+
with pytest.raises(KeyError):
|
|
121
|
+
await get_gym_id("test_gym")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_directory_to_tar_bytes_creates_valid_tar_archive(
|
|
125
|
+
tmpdir_factory: pytest.TempdirFactory,
|
|
126
|
+
):
|
|
127
|
+
"""directory_to_tar_bytes should create a valid tar archive containing all files."""
|
|
128
|
+
# Arrange
|
|
129
|
+
temp_dir = tmpdir_factory.mktemp("test_archive")
|
|
130
|
+
temp_dir_path = Path(temp_dir)
|
|
131
|
+
|
|
132
|
+
# Create test structure
|
|
133
|
+
(temp_dir_path / "file1.txt").write_text("content1")
|
|
134
|
+
(temp_dir_path / "file2.py").write_text("import os\nprint('hello')")
|
|
135
|
+
|
|
136
|
+
subdir = temp_dir_path / "subdir"
|
|
137
|
+
subdir.mkdir()
|
|
138
|
+
(subdir / "nested.json").write_text('{"key": "value"}')
|
|
139
|
+
|
|
140
|
+
# Act
|
|
141
|
+
tar_bytes = directory_to_tar_bytes(temp_dir_path)
|
|
142
|
+
|
|
143
|
+
# Assert
|
|
144
|
+
assert isinstance(tar_bytes, bytes)
|
|
145
|
+
assert len(tar_bytes) > 0
|
|
146
|
+
|
|
147
|
+
# Verify contents
|
|
148
|
+
with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
|
|
149
|
+
members = {m.name for m in tar.getmembers()}
|
|
150
|
+
assert "file1.txt" in members
|
|
151
|
+
assert "file2.py" in members
|
|
152
|
+
assert "subdir/nested.json" in members
|
|
153
|
+
|
|
154
|
+
# Verify file contents
|
|
155
|
+
content = tar.extractfile("file1.txt")
|
|
156
|
+
assert content is not None
|
|
157
|
+
assert content.read().decode() == "content1"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_directory_to_tar_bytes_handles_empty_directory(
|
|
161
|
+
tmpdir_factory: pytest.TempdirFactory,
|
|
162
|
+
):
|
|
163
|
+
"""directory_to_tar_bytes should handle empty directories gracefully."""
|
|
164
|
+
# Arrange
|
|
165
|
+
empty_dir = tmpdir_factory.mktemp("empty")
|
|
166
|
+
empty_dir_path = Path(empty_dir)
|
|
167
|
+
|
|
168
|
+
# Act
|
|
169
|
+
tar_bytes = directory_to_tar_bytes(empty_dir_path)
|
|
170
|
+
|
|
171
|
+
# Assert
|
|
172
|
+
assert isinstance(tar_bytes, bytes)
|
|
173
|
+
assert len(tar_bytes) > 0 # Even empty tar has headers
|
|
174
|
+
|
|
175
|
+
with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
|
|
176
|
+
members = tar.getmembers()
|
|
177
|
+
# May contain the directory itself or be completely empty
|
|
178
|
+
assert len(members) >= 0
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def test_directory_to_tar_bytes_preserves_directory_structure(
|
|
182
|
+
tmpdir_factory: pytest.TempdirFactory,
|
|
183
|
+
):
|
|
184
|
+
"""directory_to_tar_bytes should preserve nested directory structure."""
|
|
185
|
+
# Arrange
|
|
186
|
+
root = tmpdir_factory.mktemp("root")
|
|
187
|
+
root_path = Path(root)
|
|
188
|
+
|
|
189
|
+
# Create nested structure
|
|
190
|
+
(root_path / "a" / "b" / "c").mkdir(parents=True)
|
|
191
|
+
(root_path / "a" / "file1.txt").write_text("in a")
|
|
192
|
+
(root_path / "a" / "b" / "file2.txt").write_text("in b")
|
|
193
|
+
(root_path / "a" / "b" / "c" / "file3.txt").write_text("in c")
|
|
194
|
+
|
|
195
|
+
# Act
|
|
196
|
+
tar_bytes = directory_to_tar_bytes(root_path)
|
|
197
|
+
|
|
198
|
+
# Assert
|
|
199
|
+
with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
|
|
200
|
+
members = {m.name for m in tar.getmembers()}
|
|
201
|
+
assert "a/file1.txt" in members
|
|
202
|
+
assert "a/b/file2.txt" in members
|
|
203
|
+
assert "a/b/c/file3.txt" in members
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def test_directory_to_tar_bytes_with_exclusions(tmpdir_factory: pytest.TempdirFactory):
|
|
207
|
+
"""Test directory_to_tar_bytes with files to exclude."""
|
|
208
|
+
temp_dir = tmpdir_factory.mktemp("test_exclude_dir")
|
|
209
|
+
temp_dir_path = Path(temp_dir)
|
|
210
|
+
|
|
211
|
+
# Create various files
|
|
212
|
+
(temp_dir_path / "include_me.txt").write_text("include")
|
|
213
|
+
(temp_dir_path / ".git").mkdir()
|
|
214
|
+
(temp_dir_path / ".git" / "config").write_text("git config")
|
|
215
|
+
(temp_dir_path / "__pycache__").mkdir()
|
|
216
|
+
(temp_dir_path / "__pycache__" / "module.pyc").write_bytes(b"pyc content")
|
|
217
|
+
(temp_dir_path / "normal_dir").mkdir()
|
|
218
|
+
(temp_dir_path / "normal_dir" / "file.py").write_text("python code")
|
|
219
|
+
|
|
220
|
+
tar_bytes = directory_to_tar_bytes(temp_dir_path)
|
|
221
|
+
|
|
222
|
+
# Check contents
|
|
223
|
+
with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
|
|
224
|
+
member_names = {m.name for m in tar.getmembers()}
|
|
225
|
+
|
|
226
|
+
# Should include regular files and directories
|
|
227
|
+
assert "include_me.txt" in member_names
|
|
228
|
+
assert "normal_dir/file.py" in member_names
|
|
229
|
+
|
|
230
|
+
# Implementation might exclude common patterns like .git and __pycache__
|
|
231
|
+
# This depends on the actual implementation
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def test_directory_to_tar_bytes_empty_directory(tmpdir_factory: pytest.TempdirFactory):
|
|
235
|
+
"""Test directory_to_tar_bytes with empty directory."""
|
|
236
|
+
temp_dir = tmpdir_factory.mktemp("empty_dir")
|
|
237
|
+
temp_dir_path = Path(temp_dir)
|
|
238
|
+
|
|
239
|
+
tar_bytes = directory_to_tar_bytes(temp_dir_path)
|
|
240
|
+
|
|
241
|
+
# Should still create a valid tar even if empty
|
|
242
|
+
assert tar_bytes is not None
|
|
243
|
+
assert len(tar_bytes) > 0
|
|
244
|
+
|
|
245
|
+
with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
|
|
246
|
+
members = tar.getmembers()
|
|
247
|
+
# Might be empty or contain just the root directory
|
|
248
|
+
assert len(members) >= 0
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_directory_to_tar_bytes_symlinks(tmpdir_factory: pytest.TempdirFactory):
|
|
252
|
+
"""Test directory_to_tar_bytes with symbolic links."""
|
|
253
|
+
temp_dir = tmpdir_factory.mktemp("symlink_dir")
|
|
254
|
+
temp_dir_path = Path(temp_dir)
|
|
255
|
+
|
|
256
|
+
# Create a file and a symlink to it
|
|
257
|
+
target_file = temp_dir_path / "target.txt"
|
|
258
|
+
target_file.write_text("target content")
|
|
259
|
+
|
|
260
|
+
symlink = temp_dir_path / "link_to_target.txt"
|
|
261
|
+
try:
|
|
262
|
+
symlink.symlink_to(target_file)
|
|
263
|
+
has_symlink = True
|
|
264
|
+
except OSError:
|
|
265
|
+
# Symlinks might not be supported on all systems (e.g., Windows without admin)
|
|
266
|
+
has_symlink = False
|
|
267
|
+
|
|
268
|
+
tar_bytes = directory_to_tar_bytes(temp_dir_path)
|
|
269
|
+
|
|
270
|
+
with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
|
|
271
|
+
members = {m.name: m for m in tar.getmembers()}
|
|
272
|
+
|
|
273
|
+
assert "target.txt" in members
|
|
274
|
+
|
|
275
|
+
if has_symlink:
|
|
276
|
+
# Check how symlinks are handled (might be followed or preserved)
|
|
277
|
+
assert "link_to_target.txt" in members
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: SDK for the HUD evaluation platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-sdk
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-sdk/issues
|
|
@@ -47,6 +47,7 @@ Requires-Dist: langchain-openai
|
|
|
47
47
|
Requires-Dist: mcp
|
|
48
48
|
Requires-Dist: numpy
|
|
49
49
|
Requires-Dist: openai
|
|
50
|
+
Requires-Dist: pathspec>=0.12.1
|
|
50
51
|
Requires-Dist: pillow>=11.1.0
|
|
51
52
|
Requires-Dist: pydantic-settings<3,>=2
|
|
52
53
|
Requires-Dist: pydantic<3,>=2
|
|
@@ -61,7 +62,7 @@ Requires-Dist: ipython<9; extra == 'dev'
|
|
|
61
62
|
Requires-Dist: jupyter-client; extra == 'dev'
|
|
62
63
|
Requires-Dist: jupyter-core; extra == 'dev'
|
|
63
64
|
Requires-Dist: openai; extra == 'dev'
|
|
64
|
-
Requires-Dist: pyright==1.1.
|
|
65
|
+
Requires-Dist: pyright==1.1.401; extra == 'dev'
|
|
65
66
|
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
66
67
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
67
68
|
Requires-Dist: pytest-mock; extra == 'dev'
|
|
@@ -74,23 +75,23 @@ Description-Content-Type: text/markdown
|
|
|
74
75
|
</div>
|
|
75
76
|
|
|
76
77
|
<h3>
|
|
77
|
-
|
|
78
|
+
Evaluate your Computer Use AI agents across web browsers, desktop environments, and custom scenarios.
|
|
78
79
|
</h3>
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
>
|
|
82
|
-
> [📅 Hop on a call ](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.so](mailto:founders@hud.so)
|
|
83
|
-
>
|
|
84
|
-
> We're here to help with eval strategies, custom environments, or improving your agent architecture!
|
|
81
|
+
### 🚀 Are you a startup building agents?
|
|
85
82
|
|
|
83
|
+
[📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.so](mailto:founders@hud.so)
|
|
86
84
|
|
|
87
|
-
|
|
85
|
+
We're here to help with eval strategies, custom environments, or improving your agent architecture!
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
> **Early Release Notice**: We'd love to hear your feedback in [Issues](https://github.com/hud-evals/hud-sdk/issues), as the SDK is still evolving!
|
|
88
89
|
|
|
89
90
|
[](https://pypi.org/project/hud-python/)
|
|
90
91
|
|
|
91
92
|
## ✨ What You Can Do
|
|
92
93
|
|
|
93
|
-
**Evaluate Existing Benchmarks**
|
|
94
|
+
**[Evaluate Existing Benchmarks](https://docs.hud.so/examples/benchmarking-agents)**
|
|
94
95
|
```python
|
|
95
96
|
from hud import load_taskset, run_job, ClaudeAgent
|
|
96
97
|
|
|
@@ -98,7 +99,7 @@ taskset = await load_taskset("WebVoyager") # or GAIA, OSWorld-Ubuntu, Mind2Web
|
|
|
98
99
|
job = await run_job(ClaudeAgent, taskset, "my-evaluation")
|
|
99
100
|
```
|
|
100
101
|
|
|
101
|
-
**Create Custom Tasks**
|
|
102
|
+
**[Create Custom Tasks](https://docs.hud.so/task-creation)**
|
|
102
103
|
```python
|
|
103
104
|
from hud.task import Task
|
|
104
105
|
|
|
@@ -110,7 +111,7 @@ task = Task(
|
|
|
110
111
|
)
|
|
111
112
|
```
|
|
112
113
|
|
|
113
|
-
**Build Custom Environments**
|
|
114
|
+
**[Build Custom Environments](https://docs.hud.so/environment-creation)**
|
|
114
115
|
```python
|
|
115
116
|
from hud.types import CustomGym
|
|
116
117
|
|
|
@@ -123,7 +124,7 @@ custom_gym = CustomGym(
|
|
|
123
124
|
# Or create complex Docker environments - see environments/ folder for examples
|
|
124
125
|
```
|
|
125
126
|
|
|
126
|
-
**Trace Tool Calls Alongside HUD Environments (or Independently)**
|
|
127
|
+
**[Trace Tool Calls Alongside HUD Environments (or Independently)](https://docs.hud.so/examples/mcp-agent-tracing)**
|
|
127
128
|
```python
|
|
128
129
|
import hud
|
|
129
130
|
|
|
@@ -132,23 +133,23 @@ with hud.trace("my-agent-run"):
|
|
|
132
133
|
result = await agent.run(task)
|
|
133
134
|
```
|
|
134
135
|
|
|
135
|
-
##
|
|
136
|
-
|
|
137
|
-
Before getting started, you'll need to obtain an API key:
|
|
136
|
+
## Quick Start
|
|
138
137
|
|
|
139
|
-
|
|
140
|
-
2. Set it in your environment or .env file:
|
|
138
|
+
### Installation
|
|
141
139
|
|
|
142
140
|
```bash
|
|
143
|
-
|
|
141
|
+
pip install hud-python
|
|
144
142
|
```
|
|
145
143
|
|
|
146
|
-
|
|
144
|
+
### API Key Setup
|
|
147
145
|
|
|
148
|
-
|
|
146
|
+
Before getting started, you'll need to obtain an API key:
|
|
147
|
+
|
|
148
|
+
1. Visit [app.hud.so](https://app.hud.so) to create a free account and generate your API key
|
|
149
|
+
2. Set it in your environment or .env file:
|
|
149
150
|
|
|
150
151
|
```bash
|
|
151
|
-
|
|
152
|
+
export HUD_API_KEY=your_api_key_here
|
|
152
153
|
```
|
|
153
154
|
|
|
154
155
|
### Simple Browser Example with Claude Computer Use
|
|
@@ -171,6 +172,7 @@ async def main():
|
|
|
171
172
|
setup=("goto", "google.com"),
|
|
172
173
|
evaluate=("contains_text", "capybara")
|
|
173
174
|
)
|
|
175
|
+
print(f"Running task with prompt: {task.prompt}")
|
|
174
176
|
|
|
175
177
|
# Create environment using the gym module
|
|
176
178
|
env = await gym.make(task)
|
|
@@ -182,6 +184,7 @@ async def main():
|
|
|
182
184
|
obs, _ = await env.reset() # Gets first observation
|
|
183
185
|
for i in range(5):
|
|
184
186
|
actions, done = await agent.predict(obs)
|
|
187
|
+
print(f"Agent action {i}: {actions}")
|
|
185
188
|
|
|
186
189
|
obs, reward, terminated, info = await env.step(actions)
|
|
187
190
|
if done or terminated: break
|
|
@@ -269,4 +272,4 @@ If you use this SDK in your research, please cite it as follows:
|
|
|
269
272
|
url = {https://github.com/hud-evals/hud-sdk},
|
|
270
273
|
langid = {en}
|
|
271
274
|
}
|
|
272
|
-
```
|
|
275
|
+
```
|
|
@@ -1,44 +1,44 @@
|
|
|
1
|
-
hud/__init__.py,sha256=
|
|
1
|
+
hud/__init__.py,sha256=kjjq-l2msg9HcfYQ4sL8c0-StQIlsl2qLwh8Tx0nKro,1210
|
|
2
2
|
hud/exceptions.py,sha256=pifKvSqxj9_g4NfARVyH5a-lTThhi9XW06tIXaBakQw,5526
|
|
3
|
-
hud/gym.py,sha256=
|
|
4
|
-
hud/job.py,sha256=
|
|
5
|
-
hud/settings.py,sha256=
|
|
6
|
-
hud/task.py,sha256=
|
|
7
|
-
hud/taskset.py,sha256=
|
|
8
|
-
hud/trajectory.py,sha256=
|
|
9
|
-
hud/types.py,sha256=
|
|
10
|
-
hud/version.py,sha256=
|
|
3
|
+
hud/gym.py,sha256=JNWlO2GXev0xIjoTI9HMEbcQgGpzc6fku7-RYoYAxHI,4996
|
|
4
|
+
hud/job.py,sha256=_OKcdeWdoT7f3wiR7fZFjTVJs0OZCfMrxEG_cXYR6v4,26965
|
|
5
|
+
hud/settings.py,sha256=rx2zc3abJmf9ztwMHRYf9rGqgGprdRPCRhvJstsgyzc,1674
|
|
6
|
+
hud/task.py,sha256=vDcjKUo8la0AUTP7mwMc2nYwe0tkbnrWwM9-Kvf3Ugg,8773
|
|
7
|
+
hud/taskset.py,sha256=9IRwHeAdsk_IEibayM-hElE3gTp0mgmi-huN67h9-tc,7019
|
|
8
|
+
hud/trajectory.py,sha256=ctAwrGIkdULr4xI6G-1Dp2fhDol4o_PmnPcqTzAEIUc,3797
|
|
9
|
+
hud/types.py,sha256=xqrBb4rPKVkoLVwnyGk4PUrVKayCjKcUD_--n4OrxIM,2954
|
|
10
|
+
hud/version.py,sha256=8RdhNwFca8e-uu1sKAl6TQ58wG5TSHVNqBIPUQXsYcU,104
|
|
11
11
|
hud/adapters/__init__.py,sha256=zz24KdC_e9TJPgWo6y57_8SzevEE5ak4Cm6tXzMxwRk,266
|
|
12
12
|
hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
|
|
13
|
-
hud/adapters/claude/adapter.py,sha256=
|
|
13
|
+
hud/adapters/claude/adapter.py,sha256=vCpotJ5gzQs4PP2iCXVavIcyG8c_4m1P6fuXStwUxSo,6675
|
|
14
14
|
hud/adapters/claude/tests/__init__.py,sha256=9GZj0rz4tTkiPnLfxTmyBPr-s8UZc3gph6WH8fs8T34,39
|
|
15
15
|
hud/adapters/claude/tests/test_adapter.py,sha256=cAdHEoqLngLiV7QwlWJ0KuNgb1vNv9WZTPQMnxhMDKI,18319
|
|
16
16
|
hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
|
|
17
|
-
hud/adapters/common/adapter.py,sha256=
|
|
18
|
-
hud/adapters/common/types.py,sha256=
|
|
17
|
+
hud/adapters/common/adapter.py,sha256=GETzlsEl-uYkL-U4cQHBnfLAvm1dbXec4fKC2ypR1L0,5821
|
|
18
|
+
hud/adapters/common/types.py,sha256=DpBu30o32tFEcTdMF8j-IKLN9cNRA9Luko8FKIB8K20,9904
|
|
19
19
|
hud/adapters/common/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
hud/adapters/common/tests/test_adapter.py,sha256=rTD36LjvytHqMIyOLDyrn0RLIkd20s6f6dwoBEarJaw,8744
|
|
21
21
|
hud/adapters/operator/__init__.py,sha256=31vTRs268_TOLd-TeQRKau5bDYy78wxCNpJFhD5_l8U,104
|
|
22
|
-
hud/adapters/operator/adapter.py,sha256=
|
|
22
|
+
hud/adapters/operator/adapter.py,sha256=heATqYKrTZy4PIM22CfkhgIOPxugDpuF66wOQjZaaxE,3569
|
|
23
23
|
hud/adapters/operator/tests/__init__.py,sha256=yTsDVusVXZBQL6DnXpLgKQCBRuOYUAVQ8Blk_k5GETk,41
|
|
24
24
|
hud/adapters/operator/tests/test_adapter.py,sha256=4RAXwyxAtkh-1Mlt1zJayRkcv3LWaPNEhDVTpwOZd4A,12942
|
|
25
25
|
hud/agent/__init__.py,sha256=_OxMG3UW1vXSuixdpo09b1jexfWcUbfK44zto8t6_LE,453
|
|
26
|
-
hud/agent/base.py,sha256=
|
|
27
|
-
hud/agent/claude.py,sha256=
|
|
28
|
-
hud/agent/claude_plays_pokemon.py,sha256=
|
|
29
|
-
hud/agent/langchain.py,sha256=
|
|
30
|
-
hud/agent/operator.py,sha256=
|
|
26
|
+
hud/agent/base.py,sha256=hC3mVUMAWo5HHF2b576ScA9UQzsAzcCfPU9S8mDWthA,4080
|
|
27
|
+
hud/agent/claude.py,sha256=FBSKCxICO6XXYCuIrerVL89bVJ-5JxrZJBDeZgzAdJI,9886
|
|
28
|
+
hud/agent/claude_plays_pokemon.py,sha256=4TPibnTFhTb24ISRKAU3pA4waIcISTfZLOdfBMIMqxE,10085
|
|
29
|
+
hud/agent/langchain.py,sha256=H55JNHcGkdl-LVzZEqOFRkuuFEO0D8MI1jCNz9deoko,9012
|
|
30
|
+
hud/agent/operator.py,sha256=DDU2YOmNu00apt3W-k45Ybcl4lQ4vAw-v8NXv055Ut8,9387
|
|
31
31
|
hud/agent/misc/__init__.py,sha256=-ftYH1T5r7fXKKra6d8jXYmUz9KOTmYwBrPJU-V3S7g,71
|
|
32
32
|
hud/agent/misc/response_agent.py,sha256=3PPsZqNAyUo2ouSV0ylGQj9fJqojfSB2roq2DadUdG0,3048
|
|
33
33
|
hud/agent/tests/__init__.py,sha256=HbAW7FvSvzzKPU5LpveZceU8XTcDkRe1Bmte3OGi2f0,29
|
|
34
|
-
hud/agent/tests/test_base.py,sha256=
|
|
34
|
+
hud/agent/tests/test_base.py,sha256=MAHx4QWsX4y4jXDoA1sxWw8uFvL7lIzGlXrnHfOTmkw,8511
|
|
35
35
|
hud/env/__init__.py,sha256=wVEesXMXM5hcNXQHt0-PN4-9RnE69DEnQENS7uJSv_Y,266
|
|
36
36
|
hud/env/client.py,sha256=brhfLkWGSuvxl3vqGMCQT-vXfj8rUbJMhE3zJg9WMDA,869
|
|
37
|
-
hud/env/docker_client.py,sha256
|
|
38
|
-
hud/env/environment.py,sha256=
|
|
39
|
-
hud/env/local_docker_client.py,sha256=
|
|
40
|
-
hud/env/remote_client.py,sha256=
|
|
41
|
-
hud/env/remote_docker_client.py,sha256=
|
|
37
|
+
hud/env/docker_client.py,sha256=ReEByMKMrcrBnD4PifeQBOiX3Kn990pOcypUUtd2nC4,11178
|
|
38
|
+
hud/env/environment.py,sha256=WcZTkYwCH3khBD_PldPnarZ2rLfI2ZCH1-wdTjayzkU,16806
|
|
39
|
+
hud/env/local_docker_client.py,sha256=ewZYVDEv3cBXg3jzuouh6raj0W922rID0n6xkqc4qqE,11555
|
|
40
|
+
hud/env/remote_client.py,sha256=afLaQRSn9hge4arTkxYeus-a-T6DfeQ_EZBlPMRb2fM,6652
|
|
41
|
+
hud/env/remote_docker_client.py,sha256=RkuBtju_ycxma7ZSEbcN0tjwBTQvvorb4Ctggu09aio,9538
|
|
42
42
|
hud/evaluators/__init__.py,sha256=V5nktEAw3EDn2Y537pjia5Y1IjdLBIPrDjTs6YTCdX4,153
|
|
43
43
|
hud/evaluators/base.py,sha256=ALO9Rj-R_9HtHIHYp84bsQQD12De0XnCTwad78_T5-k,771
|
|
44
44
|
hud/evaluators/inspect.py,sha256=ZvrTXLpgibyvQ5aNXAMP4quyXISrRQHg9besDcuCx7U,692
|
|
@@ -51,34 +51,35 @@ hud/evaluators/tests/test_judge.py,sha256=c1GaAeq_WpBVgBlx-gQncHrOPokzKNxlbgiC8W
|
|
|
51
51
|
hud/evaluators/tests/test_match.py,sha256=C04GoluyT9i41YZ65xEjN7tKHQbENbrpNhNtUd4ivmA,3919
|
|
52
52
|
hud/evaluators/tests/test_remote.py,sha256=YdJpyyuRLkYP0e3jTUkD3zobS2WHQPePn8yBZtYOIN4,3243
|
|
53
53
|
hud/server/__init__.py,sha256=IPxPCqtPLguryN-nBq78Sakypw2bRiE2iHv3SXG8YRk,139
|
|
54
|
-
hud/server/requests.py,sha256=
|
|
54
|
+
hud/server/requests.py,sha256=AnFW4ELojjvfF6xjS2no6_fg4Rph2aR2hjPzYTede0Q,8841
|
|
55
55
|
hud/server/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
hud/server/tests/test_requests.py,sha256=63YCbykcib5MxKxm-OgHJPLX3QC7hmgIwnWaYukVM6s,9077
|
|
57
57
|
hud/telemetry/__init__.py,sha256=ky48kuZD3Bt0vOf9FwZwkV_ka7O26Tvcxh7p1lMpsMk,582
|
|
58
58
|
hud/telemetry/_trace.py,sha256=W7S6CxwtmjNl4OZbA1SQHXsaNm072J9c-fjPjQomgOY,5135
|
|
59
|
-
hud/telemetry/context.py,sha256=
|
|
60
|
-
hud/telemetry/exporter.py,sha256=
|
|
59
|
+
hud/telemetry/context.py,sha256=PNbfrMgjeRTTg0nUKXYCflqn71I_cSjU8LXdvouUfc4,5209
|
|
60
|
+
hud/telemetry/exporter.py,sha256=IWK7Ahj9EIlFvG3J54Gj55X3pFnBWYYNAyXv5CIWcpA,17752
|
|
61
61
|
hud/telemetry/mcp_models.py,sha256=YIArMtCVfC4NVvaEmUYs_kxDs0GQ-xtFFmB8jEGKaag,11342
|
|
62
62
|
hud/telemetry/instrumentation/__init__.py,sha256=vHmSqaJMMehgRNn6EN2SMoYDD12rSHkLeVmj7Uy1my0,88
|
|
63
|
-
hud/telemetry/instrumentation/mcp.py,sha256=
|
|
63
|
+
hud/telemetry/instrumentation/mcp.py,sha256=xGAMdhTgM1ixHiDX7xkS9Ax1NCjK3u7pLWIbIh8WZIA,21925
|
|
64
64
|
hud/telemetry/instrumentation/registry.py,sha256=UVaSsEA693lvKYd5R3n3ve6GcAB1fwqubRwIVeZiNmo,1821
|
|
65
65
|
hud/telemetry/tests/__init__.py,sha256=QMN8OzfrBUDbQESwrwHCqXLdDwCjYWX8BJcpeLUJfqA,33
|
|
66
|
-
hud/telemetry/tests/test_context.py,sha256=
|
|
66
|
+
hud/telemetry/tests/test_context.py,sha256=BGRDlXXC_VbpD4cYl_o9gRQDDKb2ox1das_ZuX14NC8,6531
|
|
67
67
|
hud/telemetry/tests/test_trace.py,sha256=JzmjNRtHdQFPqLm7hOPastENg-hMJo9p8bbxJ77iXyc,10687
|
|
68
68
|
hud/utils/__init__.py,sha256=oSl_gGoS272X2VFnBYX8hLxcP2xgGoBYQXAuLhtQgw8,260
|
|
69
|
-
hud/utils/
|
|
70
|
-
hud/utils/
|
|
69
|
+
hud/utils/agent.py,sha256=CpNgjKWMaNqo-EATH_vfJHIN53rEkZngm2LXfUFlldQ,1225
|
|
70
|
+
hud/utils/common.py,sha256=_3HNmSOsHWyexP6iXTuU2wMx3Fafeg5hZU3VXBmv0Ag,7780
|
|
71
|
+
hud/utils/config.py,sha256=L_sSYtEaOap-Gnb2iLPJPQc2rteyt6mjOdJUrktmFwM,4020
|
|
71
72
|
hud/utils/misc.py,sha256=CfOv_ftLty1iEo3Rxyz4AD4nmaBkhCJVO_W-FlcyDgI,1481
|
|
72
73
|
hud/utils/progress.py,sha256=suikwFM8sdSfkV10nAOEaInDhG4XKgOSvFePg4jSj1A,5927
|
|
73
74
|
hud/utils/telemetry.py,sha256=hrVIx2rUjSGyy9IVxTZ_3Jii83PiHjyFRd5ls2whimM,1863
|
|
74
75
|
hud/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
|
-
hud/utils/tests/test_common.py,sha256=
|
|
76
|
+
hud/utils/tests/test_common.py,sha256=KqDSMf7gWf1oYCiQ_BXsnvW1wUmyzbOzAT-HNoF7txs,9443
|
|
76
77
|
hud/utils/tests/test_config.py,sha256=dPlXYWuMrxX-NOYbf0vdJ27TJpfacKG8eiKOSGOcfDU,4079
|
|
77
78
|
hud/utils/tests/test_progress.py,sha256=QunwDgi_heQXhDgmC25zgjr-sFUu5FdJ_1aYigMKeIc,6351
|
|
78
79
|
hud/utils/tests/test_telemetry.py,sha256=t0An1RTBaE0dZVEpF4uwuq5k1R-PXFR5k4u71h60tx8,1224
|
|
79
|
-
hud/utils/tests/test_version.py,sha256=
|
|
80
|
+
hud/utils/tests/test_version.py,sha256=j6v7tM07H-ghzECNHNDAsLek3BlaPi_gaQIp2qWAL98,159
|
|
80
81
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
-
hud_python-0.2.
|
|
82
|
-
hud_python-0.2.
|
|
83
|
-
hud_python-0.2.
|
|
84
|
-
hud_python-0.2.
|
|
82
|
+
hud_python-0.2.7.dist-info/METADATA,sha256=zcKhW8Xved3ea6bsOKbdCsFpFhAd-Ru6_z4zfo8Xwfs,9785
|
|
83
|
+
hud_python-0.2.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
84
|
+
hud_python-0.2.7.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
85
|
+
hud_python-0.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|