hud-python 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/types.py CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import enum
4
4
  from pathlib import Path
5
- from typing import Literal, TypeAlias
5
+ from typing import Any, Literal, TypeAlias
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
@@ -28,6 +28,9 @@ class CustomGym(BaseModel):
28
28
  # B. If string, then it is the uri of the docker image to use.
29
29
  # The controller must already be installed in the image.
30
30
  image_or_build_context: str | Path
31
+ # host_config will be passed to the docker client when creating the environment.
32
+ # refer to official docker api documentation for available configs.
33
+ host_config: dict[str, Any] | None = None
31
34
 
32
35
 
33
36
  class EnvironmentStatus(str, enum.Enum):
@@ -48,7 +51,30 @@ class EnvironmentStatus(str, enum.Enum):
48
51
 
49
52
 
50
53
  # Available HUD gyms
51
- ServerGym: TypeAlias = Literal["qa", "hud-browser", "OSWorld-Ubuntu"]
54
+ ServerGym: TypeAlias = Literal["qa", "hud-browser", "OSWorld-Ubuntu", "docker"]
52
55
 
53
56
  # Gyms can be either custom or server-side
54
57
  Gym: TypeAlias = CustomGym | ServerGym
58
+
59
+
60
+ # Metadata keys for the environment.
61
+ # partial: Whether the environment evaluator should give partial grades.
62
+ # eval_model: The model to use for evaluation when running a VLM. Wraps langchain.
63
+ # agent_name: The name of the agent that was used for running this task.
64
+ ServerMetadataKeys: TypeAlias = Literal["partial", "eval_model", "agent_name"]
65
+ MetadataKeys: TypeAlias = str | ServerMetadataKeys
66
+
67
+
68
+ # Dictionary of sensitive data (only supported for hud-browser environments)
69
+ # key: website name or page identifier
70
+ # value: Dictionary of credentials for the sensitive data
71
+ # Example:
72
+ # {
73
+ # "google.com": {
74
+ # "google_username": "my_username",
75
+ # "google_password": "my_password"
76
+ # }
77
+ # }
78
+ # The agent only has access to the key of the credential, not the value. (i.e. google_username)
79
+ # The value is only available to the environment. (i.e. my_username)
80
+ SensitiveData: TypeAlias = dict[str, dict[str, str]]
hud/utils/agent.py ADDED
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from hud.task import Task
7
+
8
+ AGENT_PROMPT = (
9
+ "You are an AI agent whose goal is to accomplish the ultimate task following the instructions."
10
+ )
11
+
12
+
13
+ def format_agent_prompt(environment_prompt: str | None, task: Task | None) -> str:
14
+ """
15
+ Format the agent prompt with the environment prompt and the task prompt.
16
+ """
17
+ prompt = AGENT_PROMPT
18
+
19
+ # User-provided system prompt takes precedence over environment prompt
20
+ if task and task.system_prompt:
21
+ prompt += f"\n\n{task.system_prompt}"
22
+ elif environment_prompt:
23
+ prompt += f"\n\n{environment_prompt}"
24
+
25
+ if task:
26
+ if task.sensitive_data:
27
+ prompt += "\n\nHere are placeholders for sensitive data for each domain:"
28
+ for domain, credentials in task.sensitive_data.items():
29
+ prompt += f"\n{domain}: "
30
+ placeholders = [f"{key}" for key in credentials]
31
+ prompt += f"{', '.join(placeholders)}"
32
+ prompt += "\n\nYou can type these placeholders to enter the sensitive data when needed."
33
+
34
+ if task.prompt:
35
+ prompt += f"\n\n{task.prompt}"
36
+
37
+ return prompt
hud/utils/common.py CHANGED
@@ -6,6 +6,7 @@ import tarfile
6
6
  import zipfile
7
7
  from typing import TYPE_CHECKING, Any, TypedDict
8
8
 
9
+ from pathspec import PathSpec
9
10
  from pydantic import BaseModel
10
11
 
11
12
  from hud.server.requests import make_request
@@ -67,8 +68,8 @@ class Observation(BaseModel):
67
68
 
68
69
  def __str__(self) -> str:
69
70
  return f"""Observation(screenshot={
70
- self.screenshot[:100] if self.screenshot else "None"
71
- }..., text={self.text}...)"""
71
+ f"{self.screenshot[:100]}..." if self.screenshot else "None"
72
+ }, text={f"{self.text[:100]}..." if self.text else "None"})"""
72
73
 
73
74
 
74
75
  class ExecuteResult(TypedDict):
@@ -86,44 +87,159 @@ class ExecuteResult(TypedDict):
86
87
  exit_code: int
87
88
 
88
89
 
89
- def directory_to_tar_bytes(directory_path: Path) -> bytes:
90
+ # ---------------------------------------------------------------------------
91
+ # Helper functions for handling ignore patterns
92
+ # ---------------------------------------------------------------------------
93
+
94
+
95
+ def _read_ignore_file(file_path: Path) -> list[str]:
96
+ """Return patterns from *file_path* (ignoring blanks / comments)."""
97
+ if not file_path.exists():
98
+ return []
99
+
100
+ patterns: list[str] = []
101
+ for line in file_path.read_text().splitlines():
102
+ stripped = line.strip()
103
+ if not stripped or stripped.startswith("#"):
104
+ continue
105
+ patterns.append(stripped)
106
+ return patterns
107
+
108
+
109
+ def _gather_ignore_patterns(root_dir: Path, filename: str) -> list[str]:
110
+ """Collect *filename* patterns throughout *root_dir* respecting hierarchy.
111
+
112
+ For a nested ignore file located at ``sub/dir/.gitignore`` containing the
113
+ pattern ``foo/``, the returned pattern will be ``sub/dir/foo/`` so that it
114
+ is evaluated relative to *root_dir* when passed to ``PathSpec``.
90
115
  """
91
- Converts a directory to a tar archive and returns it as bytes.
116
+ gathered: list[str] = []
117
+
118
+ root_dir = root_dir.resolve()
119
+
120
+ for ignore_file in root_dir.rglob(filename):
121
+ prefix = ignore_file.parent.relative_to(root_dir).as_posix()
122
+ base_prefix = "" if prefix == "." else prefix
123
+
124
+ for pat in _read_ignore_file(ignore_file):
125
+ negate = pat.startswith("!")
126
+ pat_body = pat[1:] if negate else pat
127
+
128
+ # Leading slash means relative to the directory the ignore file is
129
+ # located in - remove it so we can prepend *prefix* below.
130
+ if pat_body.startswith("/"):
131
+ pat_body = pat_body.lstrip("/")
92
132
 
93
- This function creates a tar archive of the specified directory in memory,
94
- without writing to a temporary file on disk.
133
+ full_pattern = f"{base_prefix}/{pat_body}" if base_prefix else pat_body
134
+ if negate:
135
+ full_pattern = f"!{full_pattern}"
95
136
 
96
- Args:
97
- path: Path to the directory to convert
137
+ gathered.append(full_pattern)
98
138
 
99
- Returns:
100
- Bytes of the tar archive
139
+ return gathered
140
+
141
+
142
+ def _compile_pathspec(
143
+ directory: Path,
144
+ *,
145
+ respect_gitignore: bool,
146
+ respect_dockerignore: bool,
147
+ respect_hudignore: bool,
148
+ ) -> PathSpec | None:
149
+ """Compile a ``PathSpec`` from all relevant ignore files under *directory*.
150
+
151
+ In addition to the standard ``.gitignore`` and ``.dockerignore`` files we now
152
+ recognise a project-specific ``.hudignore`` file that shares the same pattern
153
+ syntax. Each file can be toggled independently through the corresponding
154
+ ``respect_*`` keyword argument.
155
+ """
156
+ patterns: list[str] = []
157
+
158
+ if respect_gitignore:
159
+ patterns.extend(_gather_ignore_patterns(directory, ".gitignore"))
160
+ if respect_dockerignore:
161
+ patterns.extend(_gather_ignore_patterns(directory, ".dockerignore"))
162
+ if respect_hudignore:
163
+ patterns.extend(_gather_ignore_patterns(directory, ".hudignore"))
164
+
165
+ if not patterns:
166
+ return None
167
+
168
+ return PathSpec.from_lines("gitwildmatch", patterns)
169
+
170
+
171
+ def _iter_files(
172
+ directory: Path,
173
+ *,
174
+ respect_gitignore: bool,
175
+ respect_dockerignore: bool,
176
+ respect_hudignore: bool,
177
+ ) -> Iterator[tuple[Path, Path]]:
178
+ """Yield ``(file_path, relative_path)`` while respecting ignore files."""
179
+ spec = _compile_pathspec(
180
+ directory,
181
+ respect_gitignore=respect_gitignore,
182
+ respect_dockerignore=respect_dockerignore,
183
+ respect_hudignore=respect_hudignore,
184
+ )
185
+
186
+ for file_path in directory.rglob("*"):
187
+ if not file_path.is_file():
188
+ continue
189
+ rel_path = file_path.relative_to(directory)
190
+ rel_str = rel_path.as_posix()
191
+ if spec and spec.match_file(rel_str):
192
+ continue
193
+ yield file_path, rel_path
194
+
195
+
196
+ def directory_to_tar_bytes(
197
+ directory_path: Path,
198
+ *,
199
+ respect_gitignore: bool = False,
200
+ respect_dockerignore: bool = False,
201
+ respect_hudignore: bool = True,
202
+ ) -> bytes:
203
+ """
204
+ Converts a directory to a tar archive and returns it as bytes.
205
+
206
+ By default the archive respects ignore rules defined in ``.gitignore``,
207
+ ``.dockerignore`` and ``.hudignore`` (each can be disabled via kwargs).
101
208
  """
102
209
  output = io.BytesIO()
103
210
 
104
211
  with tarfile.open(fileobj=output, mode="w") as tar:
105
- # Walk through the directory
106
- for file_path in directory_path.rglob("*"):
107
- if file_path.is_file():
108
- # Calculate relative path for the archive
109
- rel_path = file_path.relative_to(directory_path)
110
- logger.debug("Adding %s to tar archive", rel_path)
111
- tar.add(file_path, arcname=str(rel_path))
112
-
113
- # Get the bytes from the BytesIO object
212
+ for file_path, rel_path in _iter_files(
213
+ directory_path,
214
+ respect_gitignore=respect_gitignore,
215
+ respect_dockerignore=respect_dockerignore,
216
+ respect_hudignore=respect_hudignore,
217
+ ):
218
+ logger.debug("Adding %s to tar archive", rel_path)
219
+ tar.add(file_path, arcname=str(rel_path))
220
+
114
221
  output.seek(0)
115
222
  return output.getvalue()
116
223
 
117
224
 
118
- def directory_to_zip_bytes(context_dir: Path) -> bytes:
119
- """Zip a directory and return the zip archive as bytes."""
225
+ def directory_to_zip_bytes(
226
+ context_dir: Path,
227
+ *,
228
+ respect_gitignore: bool = False,
229
+ respect_dockerignore: bool = False,
230
+ respect_hudignore: bool = True,
231
+ ) -> bytes:
232
+ """Zip *context_dir* and return the zip archive as bytes, respecting ignore rules."""
120
233
  output = io.BytesIO()
121
234
  with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as zipf:
122
- for file_path in context_dir.rglob("*"):
123
- if file_path.is_file():
124
- rel_path = file_path.relative_to(context_dir)
125
- logger.debug("Adding %s to zip archive", rel_path)
126
- zipf.write(str(file_path), arcname=str(rel_path))
235
+ for file_path, rel_path in _iter_files(
236
+ context_dir,
237
+ respect_gitignore=respect_gitignore,
238
+ respect_dockerignore=respect_dockerignore,
239
+ respect_hudignore=respect_hudignore,
240
+ ):
241
+ logger.debug("Adding %s to zip archive", rel_path)
242
+ zipf.write(str(file_path), arcname=str(rel_path))
127
243
  return output.getvalue()
128
244
 
129
245
 
hud/utils/config.py CHANGED
@@ -103,6 +103,17 @@ def expand_config(config: FunctionConfigs) -> list[FunctionConfig]:
103
103
 
104
104
  return [FunctionConfig(function=function_name, args=args)]
105
105
 
106
+ if isinstance(config, list):
107
+ result = []
108
+ for item in config:
109
+ if isinstance(item, tuple) and len(item) >= 1 and isinstance(item[0], str):
110
+ function_name = item[0]
111
+ args = list(item[1:]) if len(item) > 1 else []
112
+ result.append(FunctionConfig(function=function_name, args=args))
113
+ else:
114
+ raise ValueError(f"Invalid list item configuration: {item}")
115
+ return result
116
+
106
117
  # Unknown configuration type
107
118
  error_msg = f"Unknown configuration type: {type(config)}"
108
119
  logger.error(error_msg)
@@ -50,3 +50,228 @@ async def test_get_gym_id(mocker: pytest_mock.MockerFixture):
50
50
  mocker.patch("hud.utils.common.make_request", return_value={"id": "test_gym_id"})
51
51
  gym_id = await get_gym_id("test_gym")
52
52
  assert gym_id == "test_gym_id"
53
+
54
+
55
+ def test_function_config_stores_function_name_args_and_optional_id():
56
+ """FunctionConfig should store function name, args list, and optional id."""
57
+ from hud.utils.common import FunctionConfig
58
+
59
+ # Minimal config
60
+ minimal = FunctionConfig(function="test_func", args=[])
61
+ assert minimal.function == "test_func"
62
+ assert minimal.args == []
63
+ assert minimal.id is None
64
+
65
+ # With args
66
+ with_args = FunctionConfig(function="navigate", args=["https://example.com", {"wait": True}])
67
+ assert with_args.function == "navigate"
68
+ assert len(with_args.args) == 2
69
+ assert with_args.args[0] == "https://example.com"
70
+ assert with_args.args[1] == {"wait": True}
71
+
72
+ # With id
73
+ with_id = FunctionConfig(
74
+ function="complex_operation",
75
+ args=[42, "test", {"nested": {"key": "value"}}],
76
+ id="op_123",
77
+ )
78
+ assert with_id.function == "complex_operation"
79
+ assert len(with_id.args) == 3
80
+ assert with_id.id == "op_123"
81
+
82
+
83
+ @pytest.mark.asyncio
84
+ async def test_get_gym_id_fetches_id_from_api_response(
85
+ mocker: pytest_mock.MockerFixture,
86
+ ):
87
+ """get_gym_id should extract 'id' field from API response."""
88
+ # Arrange
89
+ api_response = {"id": "gym-123", "name": "Test Gym", "status": "active"}
90
+ mocker.patch("hud.utils.common.make_request", return_value=api_response)
91
+
92
+ # Act
93
+ gym_id = await get_gym_id("test_gym")
94
+
95
+ # Assert
96
+ assert gym_id == "gym-123"
97
+
98
+
99
+ @pytest.mark.asyncio
100
+ async def test_get_gym_id_propagates_network_errors(mocker: pytest_mock.MockerFixture):
101
+ """get_gym_id should propagate exceptions from make_request."""
102
+ # Arrange
103
+ mocker.patch("hud.utils.common.make_request", side_effect=ConnectionError("API unavailable"))
104
+
105
+ # Act & Assert
106
+ with pytest.raises(ConnectionError, match="API unavailable"):
107
+ await get_gym_id("test_gym")
108
+
109
+
110
+ @pytest.mark.asyncio
111
+ async def test_get_gym_id_raises_key_error_when_id_missing(
112
+ mocker: pytest_mock.MockerFixture,
113
+ ):
114
+ """get_gym_id should raise KeyError when response lacks 'id' field."""
115
+ # Arrange
116
+ incomplete_response = {"name": "Test Gym", "status": "active"} # Missing 'id'
117
+ mocker.patch("hud.utils.common.make_request", return_value=incomplete_response)
118
+
119
+ # Act & Assert
120
+ with pytest.raises(KeyError):
121
+ await get_gym_id("test_gym")
122
+
123
+
124
+ def test_directory_to_tar_bytes_creates_valid_tar_archive(
125
+ tmpdir_factory: pytest.TempdirFactory,
126
+ ):
127
+ """directory_to_tar_bytes should create a valid tar archive containing all files."""
128
+ # Arrange
129
+ temp_dir = tmpdir_factory.mktemp("test_archive")
130
+ temp_dir_path = Path(temp_dir)
131
+
132
+ # Create test structure
133
+ (temp_dir_path / "file1.txt").write_text("content1")
134
+ (temp_dir_path / "file2.py").write_text("import os\nprint('hello')")
135
+
136
+ subdir = temp_dir_path / "subdir"
137
+ subdir.mkdir()
138
+ (subdir / "nested.json").write_text('{"key": "value"}')
139
+
140
+ # Act
141
+ tar_bytes = directory_to_tar_bytes(temp_dir_path)
142
+
143
+ # Assert
144
+ assert isinstance(tar_bytes, bytes)
145
+ assert len(tar_bytes) > 0
146
+
147
+ # Verify contents
148
+ with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
149
+ members = {m.name for m in tar.getmembers()}
150
+ assert "file1.txt" in members
151
+ assert "file2.py" in members
152
+ assert "subdir/nested.json" in members
153
+
154
+ # Verify file contents
155
+ content = tar.extractfile("file1.txt")
156
+ assert content is not None
157
+ assert content.read().decode() == "content1"
158
+
159
+
160
+ def test_directory_to_tar_bytes_handles_empty_directory(
161
+ tmpdir_factory: pytest.TempdirFactory,
162
+ ):
163
+ """directory_to_tar_bytes should handle empty directories gracefully."""
164
+ # Arrange
165
+ empty_dir = tmpdir_factory.mktemp("empty")
166
+ empty_dir_path = Path(empty_dir)
167
+
168
+ # Act
169
+ tar_bytes = directory_to_tar_bytes(empty_dir_path)
170
+
171
+ # Assert
172
+ assert isinstance(tar_bytes, bytes)
173
+ assert len(tar_bytes) > 0 # Even empty tar has headers
174
+
175
+ with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
176
+ members = tar.getmembers()
177
+ # May contain the directory itself or be completely empty
178
+ assert len(members) >= 0
179
+
180
+
181
+ def test_directory_to_tar_bytes_preserves_directory_structure(
182
+ tmpdir_factory: pytest.TempdirFactory,
183
+ ):
184
+ """directory_to_tar_bytes should preserve nested directory structure."""
185
+ # Arrange
186
+ root = tmpdir_factory.mktemp("root")
187
+ root_path = Path(root)
188
+
189
+ # Create nested structure
190
+ (root_path / "a" / "b" / "c").mkdir(parents=True)
191
+ (root_path / "a" / "file1.txt").write_text("in a")
192
+ (root_path / "a" / "b" / "file2.txt").write_text("in b")
193
+ (root_path / "a" / "b" / "c" / "file3.txt").write_text("in c")
194
+
195
+ # Act
196
+ tar_bytes = directory_to_tar_bytes(root_path)
197
+
198
+ # Assert
199
+ with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
200
+ members = {m.name for m in tar.getmembers()}
201
+ assert "a/file1.txt" in members
202
+ assert "a/b/file2.txt" in members
203
+ assert "a/b/c/file3.txt" in members
204
+
205
+
206
+ def test_directory_to_tar_bytes_with_exclusions(tmpdir_factory: pytest.TempdirFactory):
207
+ """Test directory_to_tar_bytes with files to exclude."""
208
+ temp_dir = tmpdir_factory.mktemp("test_exclude_dir")
209
+ temp_dir_path = Path(temp_dir)
210
+
211
+ # Create various files
212
+ (temp_dir_path / "include_me.txt").write_text("include")
213
+ (temp_dir_path / ".git").mkdir()
214
+ (temp_dir_path / ".git" / "config").write_text("git config")
215
+ (temp_dir_path / "__pycache__").mkdir()
216
+ (temp_dir_path / "__pycache__" / "module.pyc").write_bytes(b"pyc content")
217
+ (temp_dir_path / "normal_dir").mkdir()
218
+ (temp_dir_path / "normal_dir" / "file.py").write_text("python code")
219
+
220
+ tar_bytes = directory_to_tar_bytes(temp_dir_path)
221
+
222
+ # Check contents
223
+ with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
224
+ member_names = {m.name for m in tar.getmembers()}
225
+
226
+ # Should include regular files and directories
227
+ assert "include_me.txt" in member_names
228
+ assert "normal_dir/file.py" in member_names
229
+
230
+ # Implementation might exclude common patterns like .git and __pycache__
231
+ # This depends on the actual implementation
232
+
233
+
234
+ def test_directory_to_tar_bytes_empty_directory(tmpdir_factory: pytest.TempdirFactory):
235
+ """Test directory_to_tar_bytes with empty directory."""
236
+ temp_dir = tmpdir_factory.mktemp("empty_dir")
237
+ temp_dir_path = Path(temp_dir)
238
+
239
+ tar_bytes = directory_to_tar_bytes(temp_dir_path)
240
+
241
+ # Should still create a valid tar even if empty
242
+ assert tar_bytes is not None
243
+ assert len(tar_bytes) > 0
244
+
245
+ with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
246
+ members = tar.getmembers()
247
+ # Might be empty or contain just the root directory
248
+ assert len(members) >= 0
249
+
250
+
251
+ def test_directory_to_tar_bytes_symlinks(tmpdir_factory: pytest.TempdirFactory):
252
+ """Test directory_to_tar_bytes with symbolic links."""
253
+ temp_dir = tmpdir_factory.mktemp("symlink_dir")
254
+ temp_dir_path = Path(temp_dir)
255
+
256
+ # Create a file and a symlink to it
257
+ target_file = temp_dir_path / "target.txt"
258
+ target_file.write_text("target content")
259
+
260
+ symlink = temp_dir_path / "link_to_target.txt"
261
+ try:
262
+ symlink.symlink_to(target_file)
263
+ has_symlink = True
264
+ except OSError:
265
+ # Symlinks might not be supported on all systems (e.g., Windows without admin)
266
+ has_symlink = False
267
+
268
+ tar_bytes = directory_to_tar_bytes(temp_dir_path)
269
+
270
+ with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
271
+ members = {m.name: m for m in tar.getmembers()}
272
+
273
+ assert "target.txt" in members
274
+
275
+ if has_symlink:
276
+ # Check how symlinks are handled (might be followed or preserved)
277
+ assert "link_to_target.txt" in members
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.2.6"
8
+ assert hud.__version__ == "0.2.7"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.2.6"
7
+ __version__ = "0.2.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-sdk/issues
@@ -47,6 +47,7 @@ Requires-Dist: langchain-openai
47
47
  Requires-Dist: mcp
48
48
  Requires-Dist: numpy
49
49
  Requires-Dist: openai
50
+ Requires-Dist: pathspec>=0.12.1
50
51
  Requires-Dist: pillow>=11.1.0
51
52
  Requires-Dist: pydantic-settings<3,>=2
52
53
  Requires-Dist: pydantic<3,>=2
@@ -61,7 +62,7 @@ Requires-Dist: ipython<9; extra == 'dev'
61
62
  Requires-Dist: jupyter-client; extra == 'dev'
62
63
  Requires-Dist: jupyter-core; extra == 'dev'
63
64
  Requires-Dist: openai; extra == 'dev'
64
- Requires-Dist: pyright==1.1.364; extra == 'dev'
65
+ Requires-Dist: pyright==1.1.401; extra == 'dev'
65
66
  Requires-Dist: pytest-asyncio; extra == 'dev'
66
67
  Requires-Dist: pytest-cov; extra == 'dev'
67
68
  Requires-Dist: pytest-mock; extra == 'dev'
@@ -90,7 +91,7 @@ We're here to help with eval strategies, custom environments, or improving your
90
91
 
91
92
  ## ✨ What You Can Do
92
93
 
93
- **Evaluate Existing Benchmarks**
94
+ **[Evaluate Existing Benchmarks](https://docs.hud.so/examples/benchmarking-agents)**
94
95
  ```python
95
96
  from hud import load_taskset, run_job, ClaudeAgent
96
97
 
@@ -98,7 +99,7 @@ taskset = await load_taskset("WebVoyager") # or GAIA, OSWorld-Ubuntu, Mind2Web
98
99
  job = await run_job(ClaudeAgent, taskset, "my-evaluation")
99
100
  ```
100
101
 
101
- **Create Custom Tasks**
102
+ **[Create Custom Tasks](https://docs.hud.so/task-creation)**
102
103
  ```python
103
104
  from hud.task import Task
104
105
 
@@ -110,7 +111,7 @@ task = Task(
110
111
  )
111
112
  ```
112
113
 
113
- **Build Custom Environments**
114
+ **[Build Custom Environments](https://docs.hud.so/environment-creation)**
114
115
  ```python
115
116
  from hud.types import CustomGym
116
117
 
@@ -123,7 +124,7 @@ custom_gym = CustomGym(
123
124
  # Or create complex Docker environments - see environments/ folder for examples
124
125
  ```
125
126
 
126
- **Trace Tool Calls Alongside HUD Environments (or Independently)**
127
+ **[Trace Tool Calls Alongside HUD Environments (or Independently)](https://docs.hud.so/examples/mcp-agent-tracing)**
127
128
  ```python
128
129
  import hud
129
130
 
@@ -171,6 +172,7 @@ async def main():
171
172
  setup=("goto", "google.com"),
172
173
  evaluate=("contains_text", "capybara")
173
174
  )
175
+ print(f"Running task with prompt: {task.prompt}")
174
176
 
175
177
  # Create environment using the gym module
176
178
  env = await gym.make(task)
@@ -182,6 +184,7 @@ async def main():
182
184
  obs, _ = await env.reset() # Gets first observation
183
185
  for i in range(5):
184
186
  actions, done = await agent.predict(obs)
187
+ print(f"Agent action {i}: {actions}")
185
188
 
186
189
  obs, reward, terminated, info = await env.step(actions)
187
190
  if done or terminated: break