hud-python 0.4.29__py3-none-any.whl → 0.4.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/cli/utils/docker.py CHANGED
@@ -3,6 +3,8 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import json
6
+ import platform
7
+ import shutil
6
8
  import subprocess
7
9
 
8
10
 
@@ -117,3 +119,95 @@ def generate_container_name(identifier: str, prefix: str = "hud") -> str:
117
119
  # Replace special characters with hyphens
118
120
  safe_name = identifier.replace(":", "-").replace("/", "-").replace("\\", "-")
119
121
  return f"{prefix}-{safe_name}"
122
+
123
+
124
+ def _emit_docker_hints(error_text: str) -> None:
125
+ """Parse common Docker connectivity errors and print platform-specific hints."""
126
+ from hud.utils.hud_console import hud_console
127
+
128
+ text = error_text.lower()
129
+ system = platform.system()
130
+
131
+ markers = [
132
+ "cannot connect to the docker daemon",
133
+ "is the docker daemon running",
134
+ "error during connect",
135
+ "permission denied while trying to connect",
136
+ "no such file or directory",
137
+ "pipe/dockerdesktop",
138
+ "dockerdesktoplinuxengine",
139
+ "//./pipe/docker",
140
+ "/var/run/docker.sock",
141
+ ]
142
+
143
+ if any(m in text for m in markers):
144
+ hud_console.error("Docker does not appear to be running or accessible")
145
+ if system == "Windows":
146
+ hud_console.hint("Open Docker Desktop and wait until it shows 'Running'")
147
+ hud_console.hint("If using WSL, enable integration for your distro in Docker Desktop")
148
+ elif system == "Linux":
149
+ hud_console.hint(
150
+ "Start the daemon: sudo systemctl start docker (or service docker start)"
151
+ )
152
+ hud_console.hint("If permission denied: sudo usermod -aG docker $USER && re-login")
153
+ elif system == "Darwin":
154
+ hud_console.hint("Open Docker Desktop and wait until it shows 'Running'")
155
+ else:
156
+ hud_console.hint("Start Docker and ensure the daemon is reachable")
157
+ trimmed = error_text.strip()
158
+ if len(trimmed) > 300:
159
+ trimmed = trimmed[:300] + "..."
160
+ hud_console.dim_info("Details", trimmed)
161
+ else:
162
+ from hud.utils.hud_console import hud_console as _hc
163
+
164
+ _hc.error("Docker returned an error")
165
+ trimmed = error_text.strip()
166
+ if len(trimmed) > 300:
167
+ trimmed = trimmed[:300] + "..."
168
+ _hc.dim_info("Details", trimmed)
169
+ _hc.hint("Is Docker running and accessible?")
170
+
171
+
172
+ def require_docker_running() -> None:
173
+ """Ensure Docker CLI exists and daemon is reachable; print hints and exit if not."""
174
+ import typer
175
+
176
+ from hud.utils.hud_console import hud_console
177
+
178
+ docker_path: str | None = shutil.which("docker")
179
+ if not docker_path:
180
+ hud_console.error("Docker CLI not found")
181
+ hud_console.info("Install Docker Desktop (Windows/macOS) or Docker Engine (Linux)")
182
+ hud_console.hint("After installation, start Docker and re-run this command")
183
+ raise typer.Exit(1)
184
+
185
+ try:
186
+ result = subprocess.run( # noqa: UP022, S603
187
+ [docker_path, "info"],
188
+ stdout=subprocess.PIPE,
189
+ stderr=subprocess.PIPE,
190
+ text=True,
191
+ timeout=8,
192
+ check=False,
193
+ )
194
+ if result.returncode == 0:
195
+ return
196
+
197
+ error_text = (result.stderr or "") + "\n" + (result.stdout or "")
198
+ _emit_docker_hints(error_text)
199
+ raise typer.Exit(1)
200
+ except FileNotFoundError as e:
201
+ hud_console.error("Docker CLI not found on PATH")
202
+ hud_console.hint("Install Docker and ensure 'docker' is on your PATH")
203
+ raise typer.Exit(1) from e
204
+ except subprocess.TimeoutExpired as e:
205
+ hud_console.error("Docker did not respond in time")
206
+ hud_console.hint(
207
+ "Is Docker running? Open Docker Desktop and wait until it reports 'Running'"
208
+ )
209
+ raise typer.Exit(1) from e
210
+ except Exception as e:
211
+ hud_console.error(f"Docker check failed: {e}")
212
+ hud_console.hint("Is the Docker daemon running?")
213
+ raise typer.Exit(1) from e
hud/native/comparator.py CHANGED
@@ -513,11 +513,11 @@ def make_alias_tool(name: str, preset_mode: ComparisonMode, description: str) ->
513
513
 
514
514
 
515
515
  # Create MCP server
516
- comparator_server = MCPServer(name="comparator")
516
+ comparator = MCPServer(name="comparator")
517
517
 
518
518
  # Register main tool
519
- comparator_server.add_tool(SubmitTool())
520
- comparator_server.add_tool(CompareTool())
519
+ comparator.add_tool(SubmitTool())
520
+ comparator.add_tool(CompareTool())
521
521
 
522
522
  # Register aliases - these are just thin wrappers
523
523
  ALIASES = [
@@ -534,13 +534,13 @@ ALIASES = [
534
534
 
535
535
  for name, mode, desc in ALIASES:
536
536
  AliasTool = make_alias_tool(name, mode, desc)
537
- comparator_server.add_tool(AliasTool())
537
+ comparator.add_tool(AliasTool())
538
538
 
539
539
  # Export for mounting
540
- __all__ = ["comparator_server"]
540
+ __all__ = ["comparator"]
541
541
 
542
542
 
543
543
  if __name__ == "__main__":
544
544
  # Run as standalone server
545
545
  logger.info("Starting Comparator MCP Server...")
546
- comparator_server.run()
546
+ comparator.run()
@@ -11,7 +11,7 @@ from hud.native.comparator import (
11
11
  ComparisonResult,
12
12
  DataType,
13
13
  auto_select_mode,
14
- comparator_server,
14
+ comparator,
15
15
  detect_type,
16
16
  extract_boolean,
17
17
  extract_json,
@@ -321,10 +321,10 @@ class TestAliasTools:
321
321
  @pytest.mark.asyncio
322
322
  async def test_aliases_work(self):
323
323
  """Test that aliases are properly registered and work."""
324
- from hud.native.comparator import comparator_server
324
+ from hud.native.comparator import comparator
325
325
 
326
326
  # Check that aliases are registered
327
- tool_names = [t.name for t in comparator_server._tool_manager._tools.values()]
327
+ tool_names = [t.name for t in comparator._tool_manager._tools.values()]
328
328
 
329
329
  expected_aliases = [
330
330
  "compare_exact",
@@ -433,7 +433,7 @@ class TestAliasPreprocessing:
433
433
  @pytest.mark.asyncio
434
434
  async def test_json_alias_preprocessing(self):
435
435
  """Test JSON extraction in compare_json tool."""
436
- tools = {t.name: t for t in comparator_server._tool_manager._tools.values()}
436
+ tools = {t.name: t for t in comparator._tool_manager._tools.values()}
437
437
  json_tool = tools["compare_json"]
438
438
 
439
439
  assert isinstance(json_tool, FunctionTool)
@@ -448,7 +448,7 @@ class TestAliasPreprocessing:
448
448
  @pytest.mark.asyncio
449
449
  async def test_numeric_alias_preprocessing(self):
450
450
  """Test number extraction in numeric tools."""
451
- tools = {t.name: t for t in comparator_server._tool_manager._tools.values()}
451
+ tools = {t.name: t for t in comparator._tool_manager._tools.values()}
452
452
 
453
453
  # Float tool
454
454
  float_tool = tools["compare_float"]
@@ -471,7 +471,7 @@ class TestAliasPreprocessing:
471
471
  @pytest.mark.asyncio
472
472
  async def test_boolean_alias_preprocessing(self):
473
473
  """Test boolean extraction in compare_boolean tool."""
474
- tools = {t.name: t for t in comparator_server._tool_manager._tools.values()}
474
+ tools = {t.name: t for t in comparator._tool_manager._tools.values()}
475
475
  bool_tool = tools["compare_boolean"]
476
476
 
477
477
  assert isinstance(bool_tool, FunctionTool)
@@ -485,7 +485,7 @@ class TestAliasPreprocessing:
485
485
  @pytest.mark.asyncio
486
486
  async def test_list_alias_preprocessing(self):
487
487
  """Test list extraction in compare_list tool."""
488
- tools = {t.name: t for t in comparator_server._tool_manager._tools.values()}
488
+ tools = {t.name: t for t in comparator._tool_manager._tools.values()}
489
489
  list_tool = tools["compare_list"]
490
490
 
491
491
  assert isinstance(list_tool, FunctionTool)
@@ -499,7 +499,7 @@ class TestAliasPreprocessing:
499
499
  @pytest.mark.asyncio
500
500
  async def test_complex_llm_output(self):
501
501
  """Test extraction from complex LLM outputs with reasoning."""
502
- tools = {t.name: t for t in comparator_server._tool_manager._tools.values()}
502
+ tools = {t.name: t for t in comparator._tool_manager._tools.values()}
503
503
  json_tool = tools["compare_json"]
504
504
 
505
505
  llm_output = """
@@ -8,12 +8,12 @@ class TestNativeInit:
8
8
 
9
9
  def test_comparator_server_import(self):
10
10
  """Test that comparator server can be imported."""
11
- from hud.native.comparator import comparator_server
11
+ from hud.native.comparator import comparator
12
12
  from hud.server import MCPServer
13
13
 
14
14
  # Verify comparator is an MCPServer instance
15
- assert isinstance(comparator_server, MCPServer)
16
- assert comparator_server.name == "comparator"
15
+ assert isinstance(comparator, MCPServer)
16
+ assert comparator.name == "comparator"
17
17
 
18
18
  def test_all_exports(self):
19
19
  """Test that __all__ is properly defined."""
@@ -31,11 +31,11 @@ class TestNativeInit:
31
31
 
32
32
  def test_comparator_tools_registered(self):
33
33
  """Test that comparator server has tools registered."""
34
- from hud.native.comparator import comparator_server
34
+ from hud.native.comparator import comparator
35
35
 
36
36
  # The server should have tools registered
37
37
  # We can check that the tool manager has tools
38
- tool_names = [t.name for t in comparator_server._tool_manager._tools.values()]
38
+ tool_names = [t.name for t in comparator._tool_manager._tools.values()]
39
39
 
40
40
  # Should have the main compare tool
41
41
  assert "compare" in tool_names
@@ -64,16 +64,18 @@ class TestNativeInit:
64
64
 
65
65
  def test_comparator_tool_functionality(self):
66
66
  """Test that we can get the CompareTool from the comparator."""
67
- from hud.native.comparator import comparator_server
68
- from hud.tools import BaseTool
67
+ from hud.native.comparator import comparator
69
68
 
70
69
  # Get the compare tool
71
70
  compare_tool = None
72
- for tool in comparator_server._tool_manager._tools.values():
71
+ for tool in comparator._tool_manager._tools.values():
73
72
  if tool.name == "compare":
74
73
  compare_tool = tool
75
74
  break
76
75
 
77
76
  assert compare_tool is not None
78
- assert isinstance(compare_tool, BaseTool)
79
- assert hasattr(compare_tool, "__call__")
77
+ # FastMCP wraps tools as FunctionTool instances
78
+ assert hasattr(compare_tool, "name")
79
+ assert compare_tool.name == "compare"
80
+ # FunctionTool has a 'fn' attribute for the callable
81
+ assert hasattr(compare_tool, "fn") or hasattr(compare_tool, "__call__")
hud/rl/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  We suggest running hud rl (or with the --local flag) for optimal hyperparameters and native HuggingFace running.
2
2
 
3
- Install:
3
+ However, to run this independently, sping up an instance with at least 2 GPUs and run:
4
4
  ```bash
5
5
  sudo apt-get update -y && sudo apt-get install -y cuda-toolkit-12-6
6
6
  uv pip install -e .[rl]
@@ -8,8 +8,7 @@ uv pip install ninja
8
8
  uv pip install flash-attn --no-build-isolation
9
9
  ```
10
10
 
11
-
12
- However, if you want to run the training directly, launch a vllm server with:
11
+ Launch a vllm server with:
13
12
  ```bash
14
13
  export VLLM_ALLOW_RUNTIME_LORA_UPDATING=True
15
14
  export TOKENIZERS_PARALLELISM=false
hud/rl/learner.py CHANGED
@@ -103,10 +103,12 @@ class GRPOLearner:
103
103
 
104
104
  # Load processor/tokenizer based on model type
105
105
  if is_vl_model:
106
+ # Some environments require remote code for Qwen2.5-VL processors
106
107
  processor = AutoProcessor.from_pretrained(
107
108
  model_cfg.base_model,
108
109
  min_pixels=model_cfg.min_pixels,
109
110
  max_pixels=model_cfg.max_pixels,
111
+ trust_remote_code=True,
110
112
  )
111
113
  else:
112
114
  processor = AutoTokenizer.from_pretrained(model_cfg.base_model)
@@ -123,6 +125,7 @@ class GRPOLearner:
123
125
  model_cfg.base_model,
124
126
  torch_dtype=torch.bfloat16,
125
127
  attn_implementation=attn_implementation,
128
+ trust_remote_code=True,
126
129
  )
127
130
  self.log(f"Using {attn_implementation} for attention")
128
131
  except (ImportError, ValueError) as e:
hud/rl/train.py CHANGED
@@ -232,6 +232,9 @@ async def train(config: Config, tasks: list[Task]) -> None:
232
232
  )
233
233
  learner.save(str(checkpoint_path))
234
234
 
235
+ # Wait for 6 seconds to ensure the checkpoint is saved
236
+ await asyncio.sleep(6)
237
+
235
238
  adapter_name = f"{config.adapter_prefix}-{checkpoint_id}"
236
239
  if vllm.load_adapter(adapter_name, str(checkpoint_path)):
237
240
  actor.update_adapter(adapter_name)
hud/rl/vllm_adapter.py CHANGED
@@ -35,20 +35,38 @@ class VLLMAdapter:
35
35
  url = f"{self.base_url}/load_lora_adapter"
36
36
  headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
37
37
  payload = {"lora_name": adapter_name, "lora_path": adapter_path}
38
-
39
- try:
40
- response = requests.post(
41
- url, headers=headers, data=json.dumps(payload), timeout=timeout
42
- )
43
- response.raise_for_status()
44
-
45
- self.current_adapter = adapter_name
46
- hud_console.info(f"[VLLMAdapter] Loaded adapter: {adapter_name}")
47
- return True
48
-
49
- except requests.exceptions.RequestException as e:
50
- hud_console.error(f"[VLLMAdapter] Failed to load adapter {adapter_name}: {e}")
51
- return False
38
+ # Implement exponential backoff for retrying the adapter load request.
39
+ max_retries = 5
40
+ backoff_factor = 2
41
+ delay = 1 # initial delay in seconds
42
+
43
+ for attempt in range(1, max_retries + 1):
44
+ try:
45
+ response = requests.post(
46
+ url, headers=headers, data=json.dumps(payload), timeout=timeout
47
+ )
48
+ response.raise_for_status()
49
+
50
+ self.current_adapter = adapter_name
51
+ hud_console.info(f"[VLLMAdapter] Loaded adapter: {adapter_name}")
52
+ return True
53
+
54
+ except requests.exceptions.RequestException as e:
55
+ if attempt == max_retries:
56
+ hud_console.error(
57
+ f"[VLLMAdapter] Failed to load adapter {adapter_name} after {attempt} attempts: {e}" # noqa: E501
58
+ )
59
+ return False
60
+ else:
61
+ hud_console.warning(
62
+ f"[VLLMAdapter] Load adapter {adapter_name} failed (attempt {attempt}/{max_retries}): {e}. Retrying in {delay} seconds...", # noqa: E501
63
+ )
64
+ import time
65
+
66
+ time.sleep(delay)
67
+ delay *= backoff_factor
68
+
69
+ return False
52
70
 
53
71
  def unload_adapter(self, adapter_name: str) -> bool:
54
72
  """
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.29"
8
+ assert hud.__version__ == "0.4.31"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.29"
7
+ __version__ = "0.4.31"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.29
3
+ Version: 0.4.31
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -35,15 +35,20 @@ Classifier: Programming Language :: Python :: 3.11
35
35
  Classifier: Programming Language :: Python :: 3.12
36
36
  Classifier: Programming Language :: Python :: 3.13
37
37
  Requires-Python: <3.13,>=3.11
38
+ Requires-Dist: anthropic
39
+ Requires-Dist: datasets>=2.14.0
38
40
  Requires-Dist: httpx<1,>=0.23.0
39
41
  Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
40
42
  Requires-Dist: hud-mcp-python-sdk>=3.13.2
41
43
  Requires-Dist: hud-mcp-use-python-sdk>=2.3.16
44
+ Requires-Dist: numpy>=1.24.0
45
+ Requires-Dist: openai
42
46
  Requires-Dist: opentelemetry-api>=1.34.1
43
47
  Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
44
48
  Requires-Dist: opentelemetry-instrumentation-mcp==0.47.0
45
49
  Requires-Dist: opentelemetry-sdk>=1.34.1
46
50
  Requires-Dist: pathspec>=0.12.1
51
+ Requires-Dist: pillow>=11.1.0
47
52
  Requires-Dist: prompt-toolkit==3.0.51
48
53
  Requires-Dist: pydantic-settings<3,>=2
49
54
  Requires-Dist: pydantic<3,>=2
@@ -54,8 +59,6 @@ Requires-Dist: typer>=0.9.0
54
59
  Requires-Dist: watchfiles>=0.21.0
55
60
  Requires-Dist: wrapt>=1.14.0
56
61
  Provides-Extra: agent
57
- Requires-Dist: anthropic; extra == 'agent'
58
- Requires-Dist: datasets>=2.14.0; extra == 'agent'
59
62
  Requires-Dist: dotenv>=0.9.9; extra == 'agent'
60
63
  Requires-Dist: ipykernel; extra == 'agent'
61
64
  Requires-Dist: ipython<9; extra == 'agent'
@@ -64,12 +67,7 @@ Requires-Dist: jupyter-core; extra == 'agent'
64
67
  Requires-Dist: langchain; extra == 'agent'
65
68
  Requires-Dist: langchain-anthropic; extra == 'agent'
66
69
  Requires-Dist: langchain-openai; extra == 'agent'
67
- Requires-Dist: numpy>=1.24.0; extra == 'agent'
68
- Requires-Dist: openai; extra == 'agent'
69
- Requires-Dist: pillow>=11.1.0; extra == 'agent'
70
70
  Provides-Extra: agents
71
- Requires-Dist: anthropic; extra == 'agents'
72
- Requires-Dist: datasets>=2.14.0; extra == 'agents'
73
71
  Requires-Dist: dotenv>=0.9.9; extra == 'agents'
74
72
  Requires-Dist: ipykernel; extra == 'agents'
75
73
  Requires-Dist: ipython<9; extra == 'agents'
@@ -78,13 +76,8 @@ Requires-Dist: jupyter-core; extra == 'agents'
78
76
  Requires-Dist: langchain; extra == 'agents'
79
77
  Requires-Dist: langchain-anthropic; extra == 'agents'
80
78
  Requires-Dist: langchain-openai; extra == 'agents'
81
- Requires-Dist: numpy>=1.24.0; extra == 'agents'
82
- Requires-Dist: openai; extra == 'agents'
83
- Requires-Dist: pillow>=11.1.0; extra == 'agents'
84
79
  Provides-Extra: dev
85
80
  Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
86
- Requires-Dist: anthropic; extra == 'dev'
87
- Requires-Dist: datasets>=2.14.0; extra == 'dev'
88
81
  Requires-Dist: dotenv>=0.9.9; extra == 'dev'
89
82
  Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
90
83
  Requires-Dist: ipykernel; extra == 'dev'
@@ -94,8 +87,6 @@ Requires-Dist: jupyter-core; extra == 'dev'
94
87
  Requires-Dist: langchain; extra == 'dev'
95
88
  Requires-Dist: langchain-anthropic; extra == 'dev'
96
89
  Requires-Dist: langchain-openai; extra == 'dev'
97
- Requires-Dist: numpy>=1.24.0; extra == 'dev'
98
- Requires-Dist: openai; extra == 'dev'
99
90
  Requires-Dist: pillow>=11.1.0; extra == 'dev'
100
91
  Requires-Dist: playwright; extra == 'dev'
101
92
  Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
@@ -108,9 +99,7 @@ Requires-Dist: ruff>=0.11.8; extra == 'dev'
108
99
  Requires-Dist: setuptools; extra == 'dev'
109
100
  Requires-Dist: textdistance<5,>=4.5.0; extra == 'dev'
110
101
  Provides-Extra: rl
111
- Requires-Dist: anthropic; extra == 'rl'
112
102
  Requires-Dist: bitsandbytes>=0.41.0; (sys_platform == 'linux') and extra == 'rl'
113
- Requires-Dist: datasets>=2.14.0; extra == 'rl'
114
103
  Requires-Dist: dotenv>=0.9.9; extra == 'rl'
115
104
  Requires-Dist: ipykernel; extra == 'rl'
116
105
  Requires-Dist: ipython<9; extra == 'rl'
@@ -120,10 +109,7 @@ Requires-Dist: langchain; extra == 'rl'
120
109
  Requires-Dist: langchain-anthropic; extra == 'rl'
121
110
  Requires-Dist: langchain-openai; extra == 'rl'
122
111
  Requires-Dist: liger-kernel>=0.5.0; (sys_platform == 'linux') and extra == 'rl'
123
- Requires-Dist: numpy>=1.24.0; extra == 'rl'
124
- Requires-Dist: openai; extra == 'rl'
125
112
  Requires-Dist: peft>=0.17.1; extra == 'rl'
126
- Requires-Dist: pillow>=11.1.0; extra == 'rl'
127
113
  Requires-Dist: vllm==0.10.1.1; extra == 'rl'
128
114
  Description-Content-Type: text/markdown
129
115
 
@@ -239,21 +225,34 @@ The above example let's the agent play 2048 ([See replay](https://app.hud.so/tra
239
225
 
240
226
  ## Reinforcement Learning with GRPO
241
227
 
242
- This is a Qwen-2.5-3B agent training a policy on the [`text-2048`](environments/text_2048/) environment (see above) using [Verifiers](rl/):
228
+ This is a Qwen2.5‑VL‑3B agent training a policy on the 2048-basic browser environment:
243
229
 
244
230
  ![RL curve](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/rl_2.png)
245
231
 
246
- To start training, check out the [`rl/README.md`](rl/README.md) folder:
232
+ Train with the new interactive `hud rl` flow:
247
233
 
248
234
  ```bash
249
- git clone https://github.com/hud-evals/hud-python
250
- cd hud-python/rl
251
- python train_2048.py
235
+ # Install CLI with RL extras
236
+ uv tool install "hud-python[rl]"
237
+
238
+ # Option A: Run directly from a HuggingFace dataset
239
+ hud rl hud-evals/basic-2048
240
+
241
+ # Option B: Download first, modify, then train
242
+ hud get hud-evals/basic-2048
243
+ hud rl basic-2048.jsonl
244
+
245
+ # Optional: baseline evaluation
246
+ hud eval basic-2048.jsonl
252
247
  ```
253
248
 
254
- Any hud MCP environment and evaluation works with our RL pipeline. Even our remote configurations!
249
+ Supports multi‑turn RL for both:
250
+ - Language‑only models (e.g., `Qwen/Qwen2.5-7B-Instruct`)
251
+ - Vision‑Language models (e.g., `Qwen/Qwen2.5-VL-3B-Instruct`)
252
+
253
+ By default, `hud rl` provisions a persistant server and trainer in the cloud, streams telemetry to `app.hud.so`, and lets you monitor/manage models at `app.hud.so/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
255
254
 
256
- > The [`rl/README.md`](rl/README.md) walks you through several examples of RL training and takes less than 15 minutes to set up for your custom agent!
255
+ Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.so/train-agents/quickstart`.
257
256
 
258
257
  ## Benchmarking Agents
259
258
 
@@ -2,14 +2,14 @@ hud/__init__.py,sha256=JMDFUE1pP0J1Xl_miBdt7ERvoffZmTzSFe8yxz512A8,552
2
2
  hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
3
3
  hud/settings.py,sha256=sMS31iW1m-5VpWk-Blhi5-obLcUA0fwxWE1GgJz-vqU,2708
4
4
  hud/types.py,sha256=Cn9suZ_ZitLnxmnknfbCYVvmLsXRWI56kJ3LXtdfI6M,10157
5
- hud/version.py,sha256=I65QTO8NMhRiDAN0vYH7G47SRwt713WHRXjHgx1VsL0,105
5
+ hud/version.py,sha256=wQqwRzN9OMugX4H2oCdAp5bWBbjfBRrvQGeo_0_uaYs,105
6
6
  hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
7
- hud/agents/base.py,sha256=y3Aq3DtsDT5X2Q-_C-jqxFtDcHUlnBDOczmZIhqC0-w,35118
7
+ hud/agents/base.py,sha256=_u1zR3gXzZ1RlTCUYdMcvgHqdJBC4-AB1lZt0yBx8lg,35406
8
8
  hud/agents/claude.py,sha256=wHiw8iAnjnRmZyKRKcOhagCDQMhz9Z6rlSBWqH1X--M,15781
9
9
  hud/agents/grounded_openai.py,sha256=U-FHjB2Nh1_o0gmlxY5F17lWJ3oHsNRIB2a7z-IKB64,11231
10
10
  hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
11
11
  hud/agents/openai.py,sha256=ovARRWNuHqKkZ2Q_OCYSVCIZckrh8XY2jUB2p2x1m88,14259
12
- hud/agents/openai_chat_generic.py,sha256=T2HTZaSUmvqBhYIfurtreb-gxnlVBqDO64GySu_uFIA,11514
12
+ hud/agents/openai_chat_generic.py,sha256=7n7timn3fvNRnL2xzWyOTeNTchej2r9cAL1mU6YnFdY,11605
13
13
  hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
14
14
  hud/agents/misc/response_agent.py,sha256=OJdQJ76jP9xxQxVYJ-qPcdBxvFr8ABcwbP1f1I5zU5A,3227
15
15
  hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
@@ -27,20 +27,21 @@ hud/cli/debug.py,sha256=jtFW8J5F_3rhq1Hf1_SkJ7aLS3wjnyIs_LsC8k5cnzc,14200
27
27
  hud/cli/dev.py,sha256=56vQdH9oe_XGnOcRcFbNIsLEoBnpCl1eANlRFUeddHQ,31734
28
28
  hud/cli/eval.py,sha256=W_eY4uoIQwHcSCvxNaQeRfWC10uQA1UhBWiNQzQPuXM,22694
29
29
  hud/cli/get.py,sha256=sksKrdzBGZa7ZuSoQkc0haj-CvOGVSSikoVXeaUd3N4,6274
30
- hud/cli/init.py,sha256=XswZB2ZnzdE0pxP1kRmO3bHfWGCrKAyrME34ZyPzs98,19715
30
+ hud/cli/init.py,sha256=McZwpxZMXD-It_PXINCUy-SwUaPiQ7jdpSU5-F-caO8,19671
31
31
  hud/cli/list_func.py,sha256=EVi2Vc3Lb3glBNJxFx4MPnZknZ4xmuJz1OFg_dc8a_E,7177
32
32
  hud/cli/pull.py,sha256=Vd1l1-IwskyACzhtC8Df1SYINUZEYmFxrLl0s9cNN6c,12151
33
33
  hud/cli/push.py,sha256=JXUxu1QGU7BPWb0erSJq42CIq0sLbaDAO42yYDcvA1g,18347
34
34
  hud/cli/remove.py,sha256=8vGQyXDqgtjz85_vtusoIG8zurH4RHz6z8UMevQRYM4,6861
35
35
  hud/cli/flows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- hud/cli/flows/tasks.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- hud/cli/rl/__init__.py,sha256=HRIWMx5Dmmu2IorrmcyVu-Bs_3B45pwEsCC67M1z3Oo,22670
36
+ hud/cli/flows/tasks.py,sha256=8r-51oon3anwMAi5cyAOgC3iB48jnqlOUO3iTWmqsyI,6372
37
+ hud/cli/rl/__init__.py,sha256=BeqXdmzPwVBptz4j796XJRxSC5B_9tQta5aKd0jDMvo,5000
38
38
  hud/cli/rl/config.py,sha256=iNhCxotM33OEiP9gqPvn8A_AxrBVe6fcFCQTvc13xzA,2884
39
- hud/cli/rl/display.py,sha256=OTFcoIn5k3ehfi5-PmuClIkY04f4tpSMuNf7yd-bBr4,5212
39
+ hud/cli/rl/display.py,sha256=hqJVGmO9csYinladhZwjF-GMvppYWngxDHajTyIJ_gM,5214
40
40
  hud/cli/rl/gpu.py,sha256=peXS-NdUF5RyuSs0aZoCzGLboneBUpCy8f9f99WMrG0,2009
41
41
  hud/cli/rl/gpu_utils.py,sha256=H5ckPwgj5EVP3yJ5eVihR5R7Y6Gp6pt8ZUfWCCwcLG4,11072
42
+ hud/cli/rl/local_runner.py,sha256=GssmDgCxGfFsi31aFj22vwCiwa9ELllEwQjbActxSXY,21514
42
43
  hud/cli/rl/presets.py,sha256=DzOO82xL5QyzdVtlX-Do1CODMvDz9ILMPapjU92jcZg,3051
43
- hud/cli/rl/remote_runner.py,sha256=ILjWpjbdZyH7db-C-x1T0GwsrLu3PBKx3IoTZl-qamg,12733
44
+ hud/cli/rl/remote_runner.py,sha256=Umyjjbtw6ikyNNfVGjn6sY6Qnta8Uc1KC7KzMgFaJVw,13146
44
45
  hud/cli/rl/rl_api.py,sha256=INJobvSa50ccR037u_GPsDa_9WboWyNwqEaoh9hcXj0,4306
45
46
  hud/cli/rl/vllm.py,sha256=Gq_M6KsQArGz7FNIdemuM5mk16mu3xe8abpO2GCCuOE,6093
46
47
  hud/cli/tests/__init__.py,sha256=ZrGVkmH7DHXGqOvjOSNGZeMYaFIRB2K8c6hwr8FPJ-8,68
@@ -61,7 +62,7 @@ hud/cli/tests/test_registry.py,sha256=-o9MvQTcBElteqrg0XW8Bg59KrHCt88ZyPqeaAlyyT
61
62
  hud/cli/tests/test_utils.py,sha256=_oa2lTvgqJxXe0Mtovxb8x-Sug-f6oJJKvG67r5pFtA,13474
62
63
  hud/cli/utils/__init__.py,sha256=L6s0oNzY2LugGp9faodCPnjzM-ZUorUH05-HmYOq5hY,35
63
64
  hud/cli/utils/cursor.py,sha256=fy850p0rVp5k_1wwOCI7rK1SggbselJrywFInSQ2gio,3009
64
- hud/cli/utils/docker.py,sha256=VTUcoPqxh3uXOgvL6NSqYiSDhyCPRp3jTfFbnIDwumg,3774
65
+ hud/cli/utils/docker.py,sha256=-nAj7wRRIilbezG0-pCHA2-tleoqUJN9sDXHxvMWilU,7331
65
66
  hud/cli/utils/environment.py,sha256=y_c0ohxWrM054ZKid0KOQPzs2M2vh985AsumPG2wTPc,4282
66
67
  hud/cli/utils/interactive.py,sha256=tcwp9HkAyr2_GiM3Raba4h0P_OgCksQKram80BucPo4,16546
67
68
  hud/cli/utils/logging.py,sha256=DyOWuzZUg6HeKCqfs6ufb703XS3bW4G2pzaXVAvDqvA,9018
@@ -92,10 +93,10 @@ hud/datasets/utils.py,sha256=hdZfjWH5l3FVJaWBSHEEpjujAG7DqEam_vHgslL8MLs,4279
92
93
  hud/misc/__init__.py,sha256=m_pprQQ-G-Y0Sd0NEiR8MtAMbElnuFZ2OWT8TXrw7c4,43
93
94
  hud/misc/claude_plays_pokemon.py,sha256=IthAkjDVr2Q-GNvX-QLJyMzN7-0pHqqJbagGNv2m7yo,10453
94
95
  hud/native/__init__.py,sha256=TqM0KaiQnDb2Nv1zOgpEMiLVq8JPd4j_aaK4rUZ0IiA,232
95
- hud/native/comparator.py,sha256=A16wFGINLHPyuD23e8sEYxhRwWCUBYzYLb6TpvzJG9c,18297
96
+ hud/native/comparator.py,sha256=GCHs7iZa0fB425es6vvG91UW4yrbY6-BsWdabYJaNA4,18255
96
97
  hud/native/tests/__init__.py,sha256=gBTLMm6w5f6D-02Se2WleYsEEYyFt95JDcFzp3C2L_k,40
97
- hud/native/tests/test_comparator.py,sha256=x1gFLXEDRIiJhH8tg5Rd3ptY-modYaHgSm6-hCJ1EdY,18568
98
- hud/native/tests/test_native_init.py,sha256=00zSF6jBLIYYbDaslI7fk-ANTEofIC9SPrRtX942WM4,2738
98
+ hud/native/tests/test_comparator.py,sha256=pDch3r3xDi2o5YXF_bkoLfIdHcCjse3foAaqyr7PzkQ,18512
99
+ hud/native/tests/test_native_init.py,sha256=Z-2dinbQYEkrbCcfBrBOLGdpXtWWOtkfPzp7ZKri68Y,2839
99
100
  hud/otel/__init__.py,sha256=ii17ayoWiS5vAhA7UAmZ8TkmP52gs2pWyHsD46-uYbE,1003
100
101
  hud/otel/collector.py,sha256=jLZymZ8r7xt2VDuWexfbnT7PY1-0aiyLMgjBy8KDY1M,4497
101
102
  hud/otel/config.py,sha256=mricuAmtFd1yIfOYKw2aHI-u4piku0GXHWv6hjsWQLM,6806
@@ -105,18 +106,18 @@ hud/otel/instrumentation.py,sha256=fsFG9W89RdewFDxWKN9Ft4GUb7WbIKpfucTc16WxaZU,5
105
106
  hud/otel/processors.py,sha256=-gGRbwifplcExDQBLfx_9tqWreDImULJNcENgO9q7VU,4700
106
107
  hud/otel/tests/__init__.py,sha256=VNJKBMaxTtbn7trW-1Ph50zCvCok_wTSGcI1HD6GOLA,43
107
108
  hud/otel/tests/test_processors.py,sha256=np0R4ssd9j6LJSJykJ5bNjl0POwNYNhgb7BqOZHwcMY,6778
108
- hud/rl/README.md,sha256=OLOmRGOWuQGE-xjLqQJbazzX8ygUG17ECP6kjbM2C0g,1163
109
+ hud/rl/README.md,sha256=uFRpNFaEY8paq9k1C4miF7AGnbqHTGAsPmpcf9JIEeA,1189
109
110
  hud/rl/__init__.py,sha256=yYL7U1WV6L3mr3Hig48-4lhnryTaWj4nCXm4lG5vrYI,25
110
111
  hud/rl/actor.py,sha256=0YChXyxCz1wVBQ9lKb7vSl64_HQ24-DmYqCCxuORzJc,6747
111
112
  hud/rl/buffer.py,sha256=xz4FlvO9l945VsSS4lzRFMwH3rA9HafgbUfADSauXok,15210
112
113
  hud/rl/chat_template.jinja,sha256=XTdzI8oFGEcSA-exKxyHaprwRDmX5Am1KEb0VxvUc6U,4965
113
114
  hud/rl/config.py,sha256=PAKYPCsKl8yg_j3gJSE5SJUgLM7j0lFy0K_Vt4-otDM,5384
114
115
  hud/rl/distributed.py,sha256=8avhrb0lHYkhW22Z7MfkqSnlczWj5jMrUMEtkcoCf74,2473
115
- hud/rl/learner.py,sha256=JBlzPFgX16uk6f6xYINLXWdDD1i1tiyzm6GLryrzgYg,24545
116
- hud/rl/train.py,sha256=oZQGo0Wvb2LSrhh-7FLOsGCvI4G4AjgAAvF9P0k9l1Q,13436
116
+ hud/rl/learner.py,sha256=FKIgIIghsNiDr_g090xokOO_BxNmTSj1O-TSJzIq_Uw,24703
117
+ hud/rl/train.py,sha256=ZigkUKj-I1nsYmFByZprqaoDZ88LVDH-6auYneEPOsA,13555
117
118
  hud/rl/types.py,sha256=lrLKo7iaqodYth2EyeuOQfLiuzXfYM2eJjPmpObrD7c,3965
118
119
  hud/rl/utils.py,sha256=IsgVUUibxnUzb32a4mu1sYrgJC1CwoG9E-Dd5y5VDOA,19115
119
- hud/rl/vllm_adapter.py,sha256=TBNo5lyNzszg6ATk9JoEZAm-xk_tcUJmq9YXwF1NB5w,3961
120
+ hud/rl/vllm_adapter.py,sha256=O2_TdTGIyNr9zRGhCw18XWjOKYzEM3049wvlyL2x0sc,4751
120
121
  hud/rl/tests/__init__.py,sha256=PXmD3Gs6xOAwaYKb4HnwZERDjX05N1QF-aU6ya0dBtE,27
121
122
  hud/rl/tests/test_learner.py,sha256=qfSHFFROteRb98TjBuAKjFmZjCGfuWXPysVvTAWJ7wQ,6025
122
123
  hud/rl/utils/start_vllm_server.sh,sha256=ThPokrLK_Qm_uh916fHXXBfMlw1TC97P57-AEI5MuOc,910
@@ -197,10 +198,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
197
198
  hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
198
199
  hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
199
200
  hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
200
- hud/utils/tests/test_version.py,sha256=cXnWb-tAwTPK40YNpgJ0EwXe7op_7xDXBGf7sf5-ECU,160
201
+ hud/utils/tests/test_version.py,sha256=qN98qLlKKeM-W-AM1q1s0Lci8phBQ7SUd_L-0yVmujA,160
201
202
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
202
- hud_python-0.4.29.dist-info/METADATA,sha256=xT3yJCdo-1-xJ1q54lgIyFALitPOiU2-YSParihkYvM,21069
203
- hud_python-0.4.29.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
204
- hud_python-0.4.29.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
205
- hud_python-0.4.29.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
206
- hud_python-0.4.29.dist-info/RECORD,,
203
+ hud_python-0.4.31.dist-info/METADATA,sha256=UZMnmiRCMmHmXOD0BrkQs7Caqk5t8HcllxWPwR2SSzc,20861
204
+ hud_python-0.4.31.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
205
+ hud_python-0.4.31.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
206
+ hud_python-0.4.31.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
207
+ hud_python-0.4.31.dist-info/RECORD,,