hud-python 0.4.29__py3-none-any.whl → 0.4.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +12 -4
- hud/agents/openai_chat_generic.py +2 -1
- hud/cli/flows/tasks.py +185 -0
- hud/cli/init.py +2 -2
- hud/cli/rl/__init__.py +40 -458
- hud/cli/rl/display.py +1 -1
- hud/cli/rl/local_runner.py +571 -0
- hud/cli/rl/remote_runner.py +11 -2
- hud/cli/utils/docker.py +94 -0
- hud/native/comparator.py +6 -6
- hud/native/tests/test_comparator.py +8 -8
- hud/native/tests/test_native_init.py +12 -10
- hud/rl/README.md +2 -3
- hud/rl/learner.py +3 -0
- hud/rl/train.py +3 -0
- hud/rl/vllm_adapter.py +32 -14
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.29.dist-info → hud_python-0.4.31.dist-info}/METADATA +26 -27
- {hud_python-0.4.29.dist-info → hud_python-0.4.31.dist-info}/RECORD +23 -22
- {hud_python-0.4.29.dist-info → hud_python-0.4.31.dist-info}/WHEEL +0 -0
- {hud_python-0.4.29.dist-info → hud_python-0.4.31.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.29.dist-info → hud_python-0.4.31.dist-info}/licenses/LICENSE +0 -0
hud/cli/utils/docker.py
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
+
import platform
|
|
7
|
+
import shutil
|
|
6
8
|
import subprocess
|
|
7
9
|
|
|
8
10
|
|
|
@@ -117,3 +119,95 @@ def generate_container_name(identifier: str, prefix: str = "hud") -> str:
|
|
|
117
119
|
# Replace special characters with hyphens
|
|
118
120
|
safe_name = identifier.replace(":", "-").replace("/", "-").replace("\\", "-")
|
|
119
121
|
return f"{prefix}-{safe_name}"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _emit_docker_hints(error_text: str) -> None:
|
|
125
|
+
"""Parse common Docker connectivity errors and print platform-specific hints."""
|
|
126
|
+
from hud.utils.hud_console import hud_console
|
|
127
|
+
|
|
128
|
+
text = error_text.lower()
|
|
129
|
+
system = platform.system()
|
|
130
|
+
|
|
131
|
+
markers = [
|
|
132
|
+
"cannot connect to the docker daemon",
|
|
133
|
+
"is the docker daemon running",
|
|
134
|
+
"error during connect",
|
|
135
|
+
"permission denied while trying to connect",
|
|
136
|
+
"no such file or directory",
|
|
137
|
+
"pipe/dockerdesktop",
|
|
138
|
+
"dockerdesktoplinuxengine",
|
|
139
|
+
"//./pipe/docker",
|
|
140
|
+
"/var/run/docker.sock",
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
if any(m in text for m in markers):
|
|
144
|
+
hud_console.error("Docker does not appear to be running or accessible")
|
|
145
|
+
if system == "Windows":
|
|
146
|
+
hud_console.hint("Open Docker Desktop and wait until it shows 'Running'")
|
|
147
|
+
hud_console.hint("If using WSL, enable integration for your distro in Docker Desktop")
|
|
148
|
+
elif system == "Linux":
|
|
149
|
+
hud_console.hint(
|
|
150
|
+
"Start the daemon: sudo systemctl start docker (or service docker start)"
|
|
151
|
+
)
|
|
152
|
+
hud_console.hint("If permission denied: sudo usermod -aG docker $USER && re-login")
|
|
153
|
+
elif system == "Darwin":
|
|
154
|
+
hud_console.hint("Open Docker Desktop and wait until it shows 'Running'")
|
|
155
|
+
else:
|
|
156
|
+
hud_console.hint("Start Docker and ensure the daemon is reachable")
|
|
157
|
+
trimmed = error_text.strip()
|
|
158
|
+
if len(trimmed) > 300:
|
|
159
|
+
trimmed = trimmed[:300] + "..."
|
|
160
|
+
hud_console.dim_info("Details", trimmed)
|
|
161
|
+
else:
|
|
162
|
+
from hud.utils.hud_console import hud_console as _hc
|
|
163
|
+
|
|
164
|
+
_hc.error("Docker returned an error")
|
|
165
|
+
trimmed = error_text.strip()
|
|
166
|
+
if len(trimmed) > 300:
|
|
167
|
+
trimmed = trimmed[:300] + "..."
|
|
168
|
+
_hc.dim_info("Details", trimmed)
|
|
169
|
+
_hc.hint("Is Docker running and accessible?")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def require_docker_running() -> None:
|
|
173
|
+
"""Ensure Docker CLI exists and daemon is reachable; print hints and exit if not."""
|
|
174
|
+
import typer
|
|
175
|
+
|
|
176
|
+
from hud.utils.hud_console import hud_console
|
|
177
|
+
|
|
178
|
+
docker_path: str | None = shutil.which("docker")
|
|
179
|
+
if not docker_path:
|
|
180
|
+
hud_console.error("Docker CLI not found")
|
|
181
|
+
hud_console.info("Install Docker Desktop (Windows/macOS) or Docker Engine (Linux)")
|
|
182
|
+
hud_console.hint("After installation, start Docker and re-run this command")
|
|
183
|
+
raise typer.Exit(1)
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
result = subprocess.run( # noqa: UP022, S603
|
|
187
|
+
[docker_path, "info"],
|
|
188
|
+
stdout=subprocess.PIPE,
|
|
189
|
+
stderr=subprocess.PIPE,
|
|
190
|
+
text=True,
|
|
191
|
+
timeout=8,
|
|
192
|
+
check=False,
|
|
193
|
+
)
|
|
194
|
+
if result.returncode == 0:
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
error_text = (result.stderr or "") + "\n" + (result.stdout or "")
|
|
198
|
+
_emit_docker_hints(error_text)
|
|
199
|
+
raise typer.Exit(1)
|
|
200
|
+
except FileNotFoundError as e:
|
|
201
|
+
hud_console.error("Docker CLI not found on PATH")
|
|
202
|
+
hud_console.hint("Install Docker and ensure 'docker' is on your PATH")
|
|
203
|
+
raise typer.Exit(1) from e
|
|
204
|
+
except subprocess.TimeoutExpired as e:
|
|
205
|
+
hud_console.error("Docker did not respond in time")
|
|
206
|
+
hud_console.hint(
|
|
207
|
+
"Is Docker running? Open Docker Desktop and wait until it reports 'Running'"
|
|
208
|
+
)
|
|
209
|
+
raise typer.Exit(1) from e
|
|
210
|
+
except Exception as e:
|
|
211
|
+
hud_console.error(f"Docker check failed: {e}")
|
|
212
|
+
hud_console.hint("Is the Docker daemon running?")
|
|
213
|
+
raise typer.Exit(1) from e
|
hud/native/comparator.py
CHANGED
|
@@ -513,11 +513,11 @@ def make_alias_tool(name: str, preset_mode: ComparisonMode, description: str) ->
|
|
|
513
513
|
|
|
514
514
|
|
|
515
515
|
# Create MCP server
|
|
516
|
-
|
|
516
|
+
comparator = MCPServer(name="comparator")
|
|
517
517
|
|
|
518
518
|
# Register main tool
|
|
519
|
-
|
|
520
|
-
|
|
519
|
+
comparator.add_tool(SubmitTool())
|
|
520
|
+
comparator.add_tool(CompareTool())
|
|
521
521
|
|
|
522
522
|
# Register aliases - these are just thin wrappers
|
|
523
523
|
ALIASES = [
|
|
@@ -534,13 +534,13 @@ ALIASES = [
|
|
|
534
534
|
|
|
535
535
|
for name, mode, desc in ALIASES:
|
|
536
536
|
AliasTool = make_alias_tool(name, mode, desc)
|
|
537
|
-
|
|
537
|
+
comparator.add_tool(AliasTool())
|
|
538
538
|
|
|
539
539
|
# Export for mounting
|
|
540
|
-
__all__ = ["
|
|
540
|
+
__all__ = ["comparator"]
|
|
541
541
|
|
|
542
542
|
|
|
543
543
|
if __name__ == "__main__":
|
|
544
544
|
# Run as standalone server
|
|
545
545
|
logger.info("Starting Comparator MCP Server...")
|
|
546
|
-
|
|
546
|
+
comparator.run()
|
|
@@ -11,7 +11,7 @@ from hud.native.comparator import (
|
|
|
11
11
|
ComparisonResult,
|
|
12
12
|
DataType,
|
|
13
13
|
auto_select_mode,
|
|
14
|
-
|
|
14
|
+
comparator,
|
|
15
15
|
detect_type,
|
|
16
16
|
extract_boolean,
|
|
17
17
|
extract_json,
|
|
@@ -321,10 +321,10 @@ class TestAliasTools:
|
|
|
321
321
|
@pytest.mark.asyncio
|
|
322
322
|
async def test_aliases_work(self):
|
|
323
323
|
"""Test that aliases are properly registered and work."""
|
|
324
|
-
from hud.native.comparator import
|
|
324
|
+
from hud.native.comparator import comparator
|
|
325
325
|
|
|
326
326
|
# Check that aliases are registered
|
|
327
|
-
tool_names = [t.name for t in
|
|
327
|
+
tool_names = [t.name for t in comparator._tool_manager._tools.values()]
|
|
328
328
|
|
|
329
329
|
expected_aliases = [
|
|
330
330
|
"compare_exact",
|
|
@@ -433,7 +433,7 @@ class TestAliasPreprocessing:
|
|
|
433
433
|
@pytest.mark.asyncio
|
|
434
434
|
async def test_json_alias_preprocessing(self):
|
|
435
435
|
"""Test JSON extraction in compare_json tool."""
|
|
436
|
-
tools = {t.name: t for t in
|
|
436
|
+
tools = {t.name: t for t in comparator._tool_manager._tools.values()}
|
|
437
437
|
json_tool = tools["compare_json"]
|
|
438
438
|
|
|
439
439
|
assert isinstance(json_tool, FunctionTool)
|
|
@@ -448,7 +448,7 @@ class TestAliasPreprocessing:
|
|
|
448
448
|
@pytest.mark.asyncio
|
|
449
449
|
async def test_numeric_alias_preprocessing(self):
|
|
450
450
|
"""Test number extraction in numeric tools."""
|
|
451
|
-
tools = {t.name: t for t in
|
|
451
|
+
tools = {t.name: t for t in comparator._tool_manager._tools.values()}
|
|
452
452
|
|
|
453
453
|
# Float tool
|
|
454
454
|
float_tool = tools["compare_float"]
|
|
@@ -471,7 +471,7 @@ class TestAliasPreprocessing:
|
|
|
471
471
|
@pytest.mark.asyncio
|
|
472
472
|
async def test_boolean_alias_preprocessing(self):
|
|
473
473
|
"""Test boolean extraction in compare_boolean tool."""
|
|
474
|
-
tools = {t.name: t for t in
|
|
474
|
+
tools = {t.name: t for t in comparator._tool_manager._tools.values()}
|
|
475
475
|
bool_tool = tools["compare_boolean"]
|
|
476
476
|
|
|
477
477
|
assert isinstance(bool_tool, FunctionTool)
|
|
@@ -485,7 +485,7 @@ class TestAliasPreprocessing:
|
|
|
485
485
|
@pytest.mark.asyncio
|
|
486
486
|
async def test_list_alias_preprocessing(self):
|
|
487
487
|
"""Test list extraction in compare_list tool."""
|
|
488
|
-
tools = {t.name: t for t in
|
|
488
|
+
tools = {t.name: t for t in comparator._tool_manager._tools.values()}
|
|
489
489
|
list_tool = tools["compare_list"]
|
|
490
490
|
|
|
491
491
|
assert isinstance(list_tool, FunctionTool)
|
|
@@ -499,7 +499,7 @@ class TestAliasPreprocessing:
|
|
|
499
499
|
@pytest.mark.asyncio
|
|
500
500
|
async def test_complex_llm_output(self):
|
|
501
501
|
"""Test extraction from complex LLM outputs with reasoning."""
|
|
502
|
-
tools = {t.name: t for t in
|
|
502
|
+
tools = {t.name: t for t in comparator._tool_manager._tools.values()}
|
|
503
503
|
json_tool = tools["compare_json"]
|
|
504
504
|
|
|
505
505
|
llm_output = """
|
|
@@ -8,12 +8,12 @@ class TestNativeInit:
|
|
|
8
8
|
|
|
9
9
|
def test_comparator_server_import(self):
|
|
10
10
|
"""Test that comparator server can be imported."""
|
|
11
|
-
from hud.native.comparator import
|
|
11
|
+
from hud.native.comparator import comparator
|
|
12
12
|
from hud.server import MCPServer
|
|
13
13
|
|
|
14
14
|
# Verify comparator is an MCPServer instance
|
|
15
|
-
assert isinstance(
|
|
16
|
-
assert
|
|
15
|
+
assert isinstance(comparator, MCPServer)
|
|
16
|
+
assert comparator.name == "comparator"
|
|
17
17
|
|
|
18
18
|
def test_all_exports(self):
|
|
19
19
|
"""Test that __all__ is properly defined."""
|
|
@@ -31,11 +31,11 @@ class TestNativeInit:
|
|
|
31
31
|
|
|
32
32
|
def test_comparator_tools_registered(self):
|
|
33
33
|
"""Test that comparator server has tools registered."""
|
|
34
|
-
from hud.native.comparator import
|
|
34
|
+
from hud.native.comparator import comparator
|
|
35
35
|
|
|
36
36
|
# The server should have tools registered
|
|
37
37
|
# We can check that the tool manager has tools
|
|
38
|
-
tool_names = [t.name for t in
|
|
38
|
+
tool_names = [t.name for t in comparator._tool_manager._tools.values()]
|
|
39
39
|
|
|
40
40
|
# Should have the main compare tool
|
|
41
41
|
assert "compare" in tool_names
|
|
@@ -64,16 +64,18 @@ class TestNativeInit:
|
|
|
64
64
|
|
|
65
65
|
def test_comparator_tool_functionality(self):
|
|
66
66
|
"""Test that we can get the CompareTool from the comparator."""
|
|
67
|
-
from hud.native.comparator import
|
|
68
|
-
from hud.tools import BaseTool
|
|
67
|
+
from hud.native.comparator import comparator
|
|
69
68
|
|
|
70
69
|
# Get the compare tool
|
|
71
70
|
compare_tool = None
|
|
72
|
-
for tool in
|
|
71
|
+
for tool in comparator._tool_manager._tools.values():
|
|
73
72
|
if tool.name == "compare":
|
|
74
73
|
compare_tool = tool
|
|
75
74
|
break
|
|
76
75
|
|
|
77
76
|
assert compare_tool is not None
|
|
78
|
-
|
|
79
|
-
assert hasattr(compare_tool, "
|
|
77
|
+
# FastMCP wraps tools as FunctionTool instances
|
|
78
|
+
assert hasattr(compare_tool, "name")
|
|
79
|
+
assert compare_tool.name == "compare"
|
|
80
|
+
# FunctionTool has a 'fn' attribute for the callable
|
|
81
|
+
assert hasattr(compare_tool, "fn") or hasattr(compare_tool, "__call__")
|
hud/rl/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
We suggest running hud rl (or with the --local flag) for optimal hyperparameters and native HuggingFace running.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
However, to run this independently, sping up an instance with at least 2 GPUs and run:
|
|
4
4
|
```bash
|
|
5
5
|
sudo apt-get update -y && sudo apt-get install -y cuda-toolkit-12-6
|
|
6
6
|
uv pip install -e .[rl]
|
|
@@ -8,8 +8,7 @@ uv pip install ninja
|
|
|
8
8
|
uv pip install flash-attn --no-build-isolation
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
However, if you want to run the training directly, launch a vllm server with:
|
|
11
|
+
Launch a vllm server with:
|
|
13
12
|
```bash
|
|
14
13
|
export VLLM_ALLOW_RUNTIME_LORA_UPDATING=True
|
|
15
14
|
export TOKENIZERS_PARALLELISM=false
|
hud/rl/learner.py
CHANGED
|
@@ -103,10 +103,12 @@ class GRPOLearner:
|
|
|
103
103
|
|
|
104
104
|
# Load processor/tokenizer based on model type
|
|
105
105
|
if is_vl_model:
|
|
106
|
+
# Some environments require remote code for Qwen2.5-VL processors
|
|
106
107
|
processor = AutoProcessor.from_pretrained(
|
|
107
108
|
model_cfg.base_model,
|
|
108
109
|
min_pixels=model_cfg.min_pixels,
|
|
109
110
|
max_pixels=model_cfg.max_pixels,
|
|
111
|
+
trust_remote_code=True,
|
|
110
112
|
)
|
|
111
113
|
else:
|
|
112
114
|
processor = AutoTokenizer.from_pretrained(model_cfg.base_model)
|
|
@@ -123,6 +125,7 @@ class GRPOLearner:
|
|
|
123
125
|
model_cfg.base_model,
|
|
124
126
|
torch_dtype=torch.bfloat16,
|
|
125
127
|
attn_implementation=attn_implementation,
|
|
128
|
+
trust_remote_code=True,
|
|
126
129
|
)
|
|
127
130
|
self.log(f"Using {attn_implementation} for attention")
|
|
128
131
|
except (ImportError, ValueError) as e:
|
hud/rl/train.py
CHANGED
|
@@ -232,6 +232,9 @@ async def train(config: Config, tasks: list[Task]) -> None:
|
|
|
232
232
|
)
|
|
233
233
|
learner.save(str(checkpoint_path))
|
|
234
234
|
|
|
235
|
+
# Wait for 6 seconds to ensure the checkpoint is saved
|
|
236
|
+
await asyncio.sleep(6)
|
|
237
|
+
|
|
235
238
|
adapter_name = f"{config.adapter_prefix}-{checkpoint_id}"
|
|
236
239
|
if vllm.load_adapter(adapter_name, str(checkpoint_path)):
|
|
237
240
|
actor.update_adapter(adapter_name)
|
hud/rl/vllm_adapter.py
CHANGED
|
@@ -35,20 +35,38 @@ class VLLMAdapter:
|
|
|
35
35
|
url = f"{self.base_url}/load_lora_adapter"
|
|
36
36
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
37
37
|
payload = {"lora_name": adapter_name, "lora_path": adapter_path}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
38
|
+
# Implement exponential backoff for retrying the adapter load request.
|
|
39
|
+
max_retries = 5
|
|
40
|
+
backoff_factor = 2
|
|
41
|
+
delay = 1 # initial delay in seconds
|
|
42
|
+
|
|
43
|
+
for attempt in range(1, max_retries + 1):
|
|
44
|
+
try:
|
|
45
|
+
response = requests.post(
|
|
46
|
+
url, headers=headers, data=json.dumps(payload), timeout=timeout
|
|
47
|
+
)
|
|
48
|
+
response.raise_for_status()
|
|
49
|
+
|
|
50
|
+
self.current_adapter = adapter_name
|
|
51
|
+
hud_console.info(f"[VLLMAdapter] Loaded adapter: {adapter_name}")
|
|
52
|
+
return True
|
|
53
|
+
|
|
54
|
+
except requests.exceptions.RequestException as e:
|
|
55
|
+
if attempt == max_retries:
|
|
56
|
+
hud_console.error(
|
|
57
|
+
f"[VLLMAdapter] Failed to load adapter {adapter_name} after {attempt} attempts: {e}" # noqa: E501
|
|
58
|
+
)
|
|
59
|
+
return False
|
|
60
|
+
else:
|
|
61
|
+
hud_console.warning(
|
|
62
|
+
f"[VLLMAdapter] Load adapter {adapter_name} failed (attempt {attempt}/{max_retries}): {e}. Retrying in {delay} seconds...", # noqa: E501
|
|
63
|
+
)
|
|
64
|
+
import time
|
|
65
|
+
|
|
66
|
+
time.sleep(delay)
|
|
67
|
+
delay *= backoff_factor
|
|
68
|
+
|
|
69
|
+
return False
|
|
52
70
|
|
|
53
71
|
def unload_adapter(self, adapter_name: str) -> bool:
|
|
54
72
|
"""
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.31
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -35,15 +35,20 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
35
35
|
Classifier: Programming Language :: Python :: 3.12
|
|
36
36
|
Classifier: Programming Language :: Python :: 3.13
|
|
37
37
|
Requires-Python: <3.13,>=3.11
|
|
38
|
+
Requires-Dist: anthropic
|
|
39
|
+
Requires-Dist: datasets>=2.14.0
|
|
38
40
|
Requires-Dist: httpx<1,>=0.23.0
|
|
39
41
|
Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
|
|
40
42
|
Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
41
43
|
Requires-Dist: hud-mcp-use-python-sdk>=2.3.16
|
|
44
|
+
Requires-Dist: numpy>=1.24.0
|
|
45
|
+
Requires-Dist: openai
|
|
42
46
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
43
47
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
44
48
|
Requires-Dist: opentelemetry-instrumentation-mcp==0.47.0
|
|
45
49
|
Requires-Dist: opentelemetry-sdk>=1.34.1
|
|
46
50
|
Requires-Dist: pathspec>=0.12.1
|
|
51
|
+
Requires-Dist: pillow>=11.1.0
|
|
47
52
|
Requires-Dist: prompt-toolkit==3.0.51
|
|
48
53
|
Requires-Dist: pydantic-settings<3,>=2
|
|
49
54
|
Requires-Dist: pydantic<3,>=2
|
|
@@ -54,8 +59,6 @@ Requires-Dist: typer>=0.9.0
|
|
|
54
59
|
Requires-Dist: watchfiles>=0.21.0
|
|
55
60
|
Requires-Dist: wrapt>=1.14.0
|
|
56
61
|
Provides-Extra: agent
|
|
57
|
-
Requires-Dist: anthropic; extra == 'agent'
|
|
58
|
-
Requires-Dist: datasets>=2.14.0; extra == 'agent'
|
|
59
62
|
Requires-Dist: dotenv>=0.9.9; extra == 'agent'
|
|
60
63
|
Requires-Dist: ipykernel; extra == 'agent'
|
|
61
64
|
Requires-Dist: ipython<9; extra == 'agent'
|
|
@@ -64,12 +67,7 @@ Requires-Dist: jupyter-core; extra == 'agent'
|
|
|
64
67
|
Requires-Dist: langchain; extra == 'agent'
|
|
65
68
|
Requires-Dist: langchain-anthropic; extra == 'agent'
|
|
66
69
|
Requires-Dist: langchain-openai; extra == 'agent'
|
|
67
|
-
Requires-Dist: numpy>=1.24.0; extra == 'agent'
|
|
68
|
-
Requires-Dist: openai; extra == 'agent'
|
|
69
|
-
Requires-Dist: pillow>=11.1.0; extra == 'agent'
|
|
70
70
|
Provides-Extra: agents
|
|
71
|
-
Requires-Dist: anthropic; extra == 'agents'
|
|
72
|
-
Requires-Dist: datasets>=2.14.0; extra == 'agents'
|
|
73
71
|
Requires-Dist: dotenv>=0.9.9; extra == 'agents'
|
|
74
72
|
Requires-Dist: ipykernel; extra == 'agents'
|
|
75
73
|
Requires-Dist: ipython<9; extra == 'agents'
|
|
@@ -78,13 +76,8 @@ Requires-Dist: jupyter-core; extra == 'agents'
|
|
|
78
76
|
Requires-Dist: langchain; extra == 'agents'
|
|
79
77
|
Requires-Dist: langchain-anthropic; extra == 'agents'
|
|
80
78
|
Requires-Dist: langchain-openai; extra == 'agents'
|
|
81
|
-
Requires-Dist: numpy>=1.24.0; extra == 'agents'
|
|
82
|
-
Requires-Dist: openai; extra == 'agents'
|
|
83
|
-
Requires-Dist: pillow>=11.1.0; extra == 'agents'
|
|
84
79
|
Provides-Extra: dev
|
|
85
80
|
Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
|
|
86
|
-
Requires-Dist: anthropic; extra == 'dev'
|
|
87
|
-
Requires-Dist: datasets>=2.14.0; extra == 'dev'
|
|
88
81
|
Requires-Dist: dotenv>=0.9.9; extra == 'dev'
|
|
89
82
|
Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
|
|
90
83
|
Requires-Dist: ipykernel; extra == 'dev'
|
|
@@ -94,8 +87,6 @@ Requires-Dist: jupyter-core; extra == 'dev'
|
|
|
94
87
|
Requires-Dist: langchain; extra == 'dev'
|
|
95
88
|
Requires-Dist: langchain-anthropic; extra == 'dev'
|
|
96
89
|
Requires-Dist: langchain-openai; extra == 'dev'
|
|
97
|
-
Requires-Dist: numpy>=1.24.0; extra == 'dev'
|
|
98
|
-
Requires-Dist: openai; extra == 'dev'
|
|
99
90
|
Requires-Dist: pillow>=11.1.0; extra == 'dev'
|
|
100
91
|
Requires-Dist: playwright; extra == 'dev'
|
|
101
92
|
Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
|
|
@@ -108,9 +99,7 @@ Requires-Dist: ruff>=0.11.8; extra == 'dev'
|
|
|
108
99
|
Requires-Dist: setuptools; extra == 'dev'
|
|
109
100
|
Requires-Dist: textdistance<5,>=4.5.0; extra == 'dev'
|
|
110
101
|
Provides-Extra: rl
|
|
111
|
-
Requires-Dist: anthropic; extra == 'rl'
|
|
112
102
|
Requires-Dist: bitsandbytes>=0.41.0; (sys_platform == 'linux') and extra == 'rl'
|
|
113
|
-
Requires-Dist: datasets>=2.14.0; extra == 'rl'
|
|
114
103
|
Requires-Dist: dotenv>=0.9.9; extra == 'rl'
|
|
115
104
|
Requires-Dist: ipykernel; extra == 'rl'
|
|
116
105
|
Requires-Dist: ipython<9; extra == 'rl'
|
|
@@ -120,10 +109,7 @@ Requires-Dist: langchain; extra == 'rl'
|
|
|
120
109
|
Requires-Dist: langchain-anthropic; extra == 'rl'
|
|
121
110
|
Requires-Dist: langchain-openai; extra == 'rl'
|
|
122
111
|
Requires-Dist: liger-kernel>=0.5.0; (sys_platform == 'linux') and extra == 'rl'
|
|
123
|
-
Requires-Dist: numpy>=1.24.0; extra == 'rl'
|
|
124
|
-
Requires-Dist: openai; extra == 'rl'
|
|
125
112
|
Requires-Dist: peft>=0.17.1; extra == 'rl'
|
|
126
|
-
Requires-Dist: pillow>=11.1.0; extra == 'rl'
|
|
127
113
|
Requires-Dist: vllm==0.10.1.1; extra == 'rl'
|
|
128
114
|
Description-Content-Type: text/markdown
|
|
129
115
|
|
|
@@ -239,21 +225,34 @@ The above example let's the agent play 2048 ([See replay](https://app.hud.so/tra
|
|
|
239
225
|
|
|
240
226
|
## Reinforcement Learning with GRPO
|
|
241
227
|
|
|
242
|
-
This is a Qwen
|
|
228
|
+
This is a Qwen‑2.5‑VL‑3B agent training a policy on the 2048-basic browser environment:
|
|
243
229
|
|
|
244
230
|

|
|
245
231
|
|
|
246
|
-
|
|
232
|
+
Train with the new interactive `hud rl` flow:
|
|
247
233
|
|
|
248
234
|
```bash
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
235
|
+
# Install CLI with RL extras
|
|
236
|
+
uv tool install "hud-python[rl]"
|
|
237
|
+
|
|
238
|
+
# Option A: Run directly from a HuggingFace dataset
|
|
239
|
+
hud rl hud-evals/basic-2048
|
|
240
|
+
|
|
241
|
+
# Option B: Download first, modify, then train
|
|
242
|
+
hud get hud-evals/basic-2048
|
|
243
|
+
hud rl basic-2048.jsonl
|
|
244
|
+
|
|
245
|
+
# Optional: baseline evaluation
|
|
246
|
+
hud eval basic-2048.jsonl
|
|
252
247
|
```
|
|
253
248
|
|
|
254
|
-
|
|
249
|
+
Supports multi‑turn RL for both:
|
|
250
|
+
- Language‑only models (e.g., `Qwen/Qwen2.5-7B-Instruct`)
|
|
251
|
+
- Vision‑Language models (e.g., `Qwen/Qwen2.5-VL-3B-Instruct`)
|
|
252
|
+
|
|
253
|
+
By default, `hud rl` provisions a persistant server and trainer in the cloud, streams telemetry to `app.hud.so`, and lets you monitor/manage models at `app.hud.so/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
|
|
255
254
|
|
|
256
|
-
|
|
255
|
+
Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.so/train-agents/quickstart`.
|
|
257
256
|
|
|
258
257
|
## Benchmarking Agents
|
|
259
258
|
|
|
@@ -2,14 +2,14 @@ hud/__init__.py,sha256=JMDFUE1pP0J1Xl_miBdt7ERvoffZmTzSFe8yxz512A8,552
|
|
|
2
2
|
hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
|
|
3
3
|
hud/settings.py,sha256=sMS31iW1m-5VpWk-Blhi5-obLcUA0fwxWE1GgJz-vqU,2708
|
|
4
4
|
hud/types.py,sha256=Cn9suZ_ZitLnxmnknfbCYVvmLsXRWI56kJ3LXtdfI6M,10157
|
|
5
|
-
hud/version.py,sha256=
|
|
5
|
+
hud/version.py,sha256=wQqwRzN9OMugX4H2oCdAp5bWBbjfBRrvQGeo_0_uaYs,105
|
|
6
6
|
hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
|
|
7
|
-
hud/agents/base.py,sha256=
|
|
7
|
+
hud/agents/base.py,sha256=_u1zR3gXzZ1RlTCUYdMcvgHqdJBC4-AB1lZt0yBx8lg,35406
|
|
8
8
|
hud/agents/claude.py,sha256=wHiw8iAnjnRmZyKRKcOhagCDQMhz9Z6rlSBWqH1X--M,15781
|
|
9
9
|
hud/agents/grounded_openai.py,sha256=U-FHjB2Nh1_o0gmlxY5F17lWJ3oHsNRIB2a7z-IKB64,11231
|
|
10
10
|
hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
|
|
11
11
|
hud/agents/openai.py,sha256=ovARRWNuHqKkZ2Q_OCYSVCIZckrh8XY2jUB2p2x1m88,14259
|
|
12
|
-
hud/agents/openai_chat_generic.py,sha256=
|
|
12
|
+
hud/agents/openai_chat_generic.py,sha256=7n7timn3fvNRnL2xzWyOTeNTchej2r9cAL1mU6YnFdY,11605
|
|
13
13
|
hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
|
|
14
14
|
hud/agents/misc/response_agent.py,sha256=OJdQJ76jP9xxQxVYJ-qPcdBxvFr8ABcwbP1f1I5zU5A,3227
|
|
15
15
|
hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
|
|
@@ -27,20 +27,21 @@ hud/cli/debug.py,sha256=jtFW8J5F_3rhq1Hf1_SkJ7aLS3wjnyIs_LsC8k5cnzc,14200
|
|
|
27
27
|
hud/cli/dev.py,sha256=56vQdH9oe_XGnOcRcFbNIsLEoBnpCl1eANlRFUeddHQ,31734
|
|
28
28
|
hud/cli/eval.py,sha256=W_eY4uoIQwHcSCvxNaQeRfWC10uQA1UhBWiNQzQPuXM,22694
|
|
29
29
|
hud/cli/get.py,sha256=sksKrdzBGZa7ZuSoQkc0haj-CvOGVSSikoVXeaUd3N4,6274
|
|
30
|
-
hud/cli/init.py,sha256=
|
|
30
|
+
hud/cli/init.py,sha256=McZwpxZMXD-It_PXINCUy-SwUaPiQ7jdpSU5-F-caO8,19671
|
|
31
31
|
hud/cli/list_func.py,sha256=EVi2Vc3Lb3glBNJxFx4MPnZknZ4xmuJz1OFg_dc8a_E,7177
|
|
32
32
|
hud/cli/pull.py,sha256=Vd1l1-IwskyACzhtC8Df1SYINUZEYmFxrLl0s9cNN6c,12151
|
|
33
33
|
hud/cli/push.py,sha256=JXUxu1QGU7BPWb0erSJq42CIq0sLbaDAO42yYDcvA1g,18347
|
|
34
34
|
hud/cli/remove.py,sha256=8vGQyXDqgtjz85_vtusoIG8zurH4RHz6z8UMevQRYM4,6861
|
|
35
35
|
hud/cli/flows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
-
hud/cli/flows/tasks.py,sha256=
|
|
37
|
-
hud/cli/rl/__init__.py,sha256=
|
|
36
|
+
hud/cli/flows/tasks.py,sha256=8r-51oon3anwMAi5cyAOgC3iB48jnqlOUO3iTWmqsyI,6372
|
|
37
|
+
hud/cli/rl/__init__.py,sha256=BeqXdmzPwVBptz4j796XJRxSC5B_9tQta5aKd0jDMvo,5000
|
|
38
38
|
hud/cli/rl/config.py,sha256=iNhCxotM33OEiP9gqPvn8A_AxrBVe6fcFCQTvc13xzA,2884
|
|
39
|
-
hud/cli/rl/display.py,sha256=
|
|
39
|
+
hud/cli/rl/display.py,sha256=hqJVGmO9csYinladhZwjF-GMvppYWngxDHajTyIJ_gM,5214
|
|
40
40
|
hud/cli/rl/gpu.py,sha256=peXS-NdUF5RyuSs0aZoCzGLboneBUpCy8f9f99WMrG0,2009
|
|
41
41
|
hud/cli/rl/gpu_utils.py,sha256=H5ckPwgj5EVP3yJ5eVihR5R7Y6Gp6pt8ZUfWCCwcLG4,11072
|
|
42
|
+
hud/cli/rl/local_runner.py,sha256=GssmDgCxGfFsi31aFj22vwCiwa9ELllEwQjbActxSXY,21514
|
|
42
43
|
hud/cli/rl/presets.py,sha256=DzOO82xL5QyzdVtlX-Do1CODMvDz9ILMPapjU92jcZg,3051
|
|
43
|
-
hud/cli/rl/remote_runner.py,sha256=
|
|
44
|
+
hud/cli/rl/remote_runner.py,sha256=Umyjjbtw6ikyNNfVGjn6sY6Qnta8Uc1KC7KzMgFaJVw,13146
|
|
44
45
|
hud/cli/rl/rl_api.py,sha256=INJobvSa50ccR037u_GPsDa_9WboWyNwqEaoh9hcXj0,4306
|
|
45
46
|
hud/cli/rl/vllm.py,sha256=Gq_M6KsQArGz7FNIdemuM5mk16mu3xe8abpO2GCCuOE,6093
|
|
46
47
|
hud/cli/tests/__init__.py,sha256=ZrGVkmH7DHXGqOvjOSNGZeMYaFIRB2K8c6hwr8FPJ-8,68
|
|
@@ -61,7 +62,7 @@ hud/cli/tests/test_registry.py,sha256=-o9MvQTcBElteqrg0XW8Bg59KrHCt88ZyPqeaAlyyT
|
|
|
61
62
|
hud/cli/tests/test_utils.py,sha256=_oa2lTvgqJxXe0Mtovxb8x-Sug-f6oJJKvG67r5pFtA,13474
|
|
62
63
|
hud/cli/utils/__init__.py,sha256=L6s0oNzY2LugGp9faodCPnjzM-ZUorUH05-HmYOq5hY,35
|
|
63
64
|
hud/cli/utils/cursor.py,sha256=fy850p0rVp5k_1wwOCI7rK1SggbselJrywFInSQ2gio,3009
|
|
64
|
-
hud/cli/utils/docker.py,sha256
|
|
65
|
+
hud/cli/utils/docker.py,sha256=-nAj7wRRIilbezG0-pCHA2-tleoqUJN9sDXHxvMWilU,7331
|
|
65
66
|
hud/cli/utils/environment.py,sha256=y_c0ohxWrM054ZKid0KOQPzs2M2vh985AsumPG2wTPc,4282
|
|
66
67
|
hud/cli/utils/interactive.py,sha256=tcwp9HkAyr2_GiM3Raba4h0P_OgCksQKram80BucPo4,16546
|
|
67
68
|
hud/cli/utils/logging.py,sha256=DyOWuzZUg6HeKCqfs6ufb703XS3bW4G2pzaXVAvDqvA,9018
|
|
@@ -92,10 +93,10 @@ hud/datasets/utils.py,sha256=hdZfjWH5l3FVJaWBSHEEpjujAG7DqEam_vHgslL8MLs,4279
|
|
|
92
93
|
hud/misc/__init__.py,sha256=m_pprQQ-G-Y0Sd0NEiR8MtAMbElnuFZ2OWT8TXrw7c4,43
|
|
93
94
|
hud/misc/claude_plays_pokemon.py,sha256=IthAkjDVr2Q-GNvX-QLJyMzN7-0pHqqJbagGNv2m7yo,10453
|
|
94
95
|
hud/native/__init__.py,sha256=TqM0KaiQnDb2Nv1zOgpEMiLVq8JPd4j_aaK4rUZ0IiA,232
|
|
95
|
-
hud/native/comparator.py,sha256=
|
|
96
|
+
hud/native/comparator.py,sha256=GCHs7iZa0fB425es6vvG91UW4yrbY6-BsWdabYJaNA4,18255
|
|
96
97
|
hud/native/tests/__init__.py,sha256=gBTLMm6w5f6D-02Se2WleYsEEYyFt95JDcFzp3C2L_k,40
|
|
97
|
-
hud/native/tests/test_comparator.py,sha256=
|
|
98
|
-
hud/native/tests/test_native_init.py,sha256=
|
|
98
|
+
hud/native/tests/test_comparator.py,sha256=pDch3r3xDi2o5YXF_bkoLfIdHcCjse3foAaqyr7PzkQ,18512
|
|
99
|
+
hud/native/tests/test_native_init.py,sha256=Z-2dinbQYEkrbCcfBrBOLGdpXtWWOtkfPzp7ZKri68Y,2839
|
|
99
100
|
hud/otel/__init__.py,sha256=ii17ayoWiS5vAhA7UAmZ8TkmP52gs2pWyHsD46-uYbE,1003
|
|
100
101
|
hud/otel/collector.py,sha256=jLZymZ8r7xt2VDuWexfbnT7PY1-0aiyLMgjBy8KDY1M,4497
|
|
101
102
|
hud/otel/config.py,sha256=mricuAmtFd1yIfOYKw2aHI-u4piku0GXHWv6hjsWQLM,6806
|
|
@@ -105,18 +106,18 @@ hud/otel/instrumentation.py,sha256=fsFG9W89RdewFDxWKN9Ft4GUb7WbIKpfucTc16WxaZU,5
|
|
|
105
106
|
hud/otel/processors.py,sha256=-gGRbwifplcExDQBLfx_9tqWreDImULJNcENgO9q7VU,4700
|
|
106
107
|
hud/otel/tests/__init__.py,sha256=VNJKBMaxTtbn7trW-1Ph50zCvCok_wTSGcI1HD6GOLA,43
|
|
107
108
|
hud/otel/tests/test_processors.py,sha256=np0R4ssd9j6LJSJykJ5bNjl0POwNYNhgb7BqOZHwcMY,6778
|
|
108
|
-
hud/rl/README.md,sha256=
|
|
109
|
+
hud/rl/README.md,sha256=uFRpNFaEY8paq9k1C4miF7AGnbqHTGAsPmpcf9JIEeA,1189
|
|
109
110
|
hud/rl/__init__.py,sha256=yYL7U1WV6L3mr3Hig48-4lhnryTaWj4nCXm4lG5vrYI,25
|
|
110
111
|
hud/rl/actor.py,sha256=0YChXyxCz1wVBQ9lKb7vSl64_HQ24-DmYqCCxuORzJc,6747
|
|
111
112
|
hud/rl/buffer.py,sha256=xz4FlvO9l945VsSS4lzRFMwH3rA9HafgbUfADSauXok,15210
|
|
112
113
|
hud/rl/chat_template.jinja,sha256=XTdzI8oFGEcSA-exKxyHaprwRDmX5Am1KEb0VxvUc6U,4965
|
|
113
114
|
hud/rl/config.py,sha256=PAKYPCsKl8yg_j3gJSE5SJUgLM7j0lFy0K_Vt4-otDM,5384
|
|
114
115
|
hud/rl/distributed.py,sha256=8avhrb0lHYkhW22Z7MfkqSnlczWj5jMrUMEtkcoCf74,2473
|
|
115
|
-
hud/rl/learner.py,sha256=
|
|
116
|
-
hud/rl/train.py,sha256=
|
|
116
|
+
hud/rl/learner.py,sha256=FKIgIIghsNiDr_g090xokOO_BxNmTSj1O-TSJzIq_Uw,24703
|
|
117
|
+
hud/rl/train.py,sha256=ZigkUKj-I1nsYmFByZprqaoDZ88LVDH-6auYneEPOsA,13555
|
|
117
118
|
hud/rl/types.py,sha256=lrLKo7iaqodYth2EyeuOQfLiuzXfYM2eJjPmpObrD7c,3965
|
|
118
119
|
hud/rl/utils.py,sha256=IsgVUUibxnUzb32a4mu1sYrgJC1CwoG9E-Dd5y5VDOA,19115
|
|
119
|
-
hud/rl/vllm_adapter.py,sha256=
|
|
120
|
+
hud/rl/vllm_adapter.py,sha256=O2_TdTGIyNr9zRGhCw18XWjOKYzEM3049wvlyL2x0sc,4751
|
|
120
121
|
hud/rl/tests/__init__.py,sha256=PXmD3Gs6xOAwaYKb4HnwZERDjX05N1QF-aU6ya0dBtE,27
|
|
121
122
|
hud/rl/tests/test_learner.py,sha256=qfSHFFROteRb98TjBuAKjFmZjCGfuWXPysVvTAWJ7wQ,6025
|
|
122
123
|
hud/rl/utils/start_vllm_server.sh,sha256=ThPokrLK_Qm_uh916fHXXBfMlw1TC97P57-AEI5MuOc,910
|
|
@@ -197,10 +198,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
|
|
|
197
198
|
hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
|
|
198
199
|
hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
|
|
199
200
|
hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
|
|
200
|
-
hud/utils/tests/test_version.py,sha256=
|
|
201
|
+
hud/utils/tests/test_version.py,sha256=qN98qLlKKeM-W-AM1q1s0Lci8phBQ7SUd_L-0yVmujA,160
|
|
201
202
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
202
|
-
hud_python-0.4.
|
|
203
|
-
hud_python-0.4.
|
|
204
|
-
hud_python-0.4.
|
|
205
|
-
hud_python-0.4.
|
|
206
|
-
hud_python-0.4.
|
|
203
|
+
hud_python-0.4.31.dist-info/METADATA,sha256=UZMnmiRCMmHmXOD0BrkQs7Caqk5t8HcllxWPwR2SSzc,20861
|
|
204
|
+
hud_python-0.4.31.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
205
|
+
hud_python-0.4.31.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
206
|
+
hud_python-0.4.31.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
207
|
+
hud_python-0.4.31.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|