hud-python 0.4.31__py3-none-any.whl → 0.4.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/cli/flows/tasks.py +83 -14
- hud/cli/push.py +1 -0
- hud/cli/rl/remote_runner.py +75 -62
- hud/clients/utils/mcp_use_retry.py +3 -3
- hud/rl/buffer.py +108 -77
- hud/samples/__init__.py +7 -0
- hud/samples/browser.py +33 -0
- hud/types.py +19 -6
- hud/utils/mcp.py +6 -1
- hud/utils/tests/test_version.py +1 -1
- hud/utils/tool_shorthand.py +59 -0
- hud/version.py +1 -1
- {hud_python-0.4.31.dist-info → hud_python-0.4.33.dist-info}/METADATA +1 -1
- {hud_python-0.4.31.dist-info → hud_python-0.4.33.dist-info}/RECORD +17 -14
- {hud_python-0.4.31.dist-info → hud_python-0.4.33.dist-info}/WHEEL +0 -0
- {hud_python-0.4.31.dist-info → hud_python-0.4.33.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.31.dist-info → hud_python-0.4.33.dist-info}/licenses/LICENSE +0 -0
hud/cli/flows/tasks.py
CHANGED
|
@@ -27,9 +27,28 @@ def _is_remote_url(url: str) -> bool:
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _validate_tasks(tasks: list[Task]) -> bool:
|
|
30
|
-
"""Validate the tasks file.
|
|
30
|
+
"""Validate the tasks file: return True if tasks already reference a remote MCP URL.
|
|
31
|
+
|
|
32
|
+
A task is considered remote if any "url" field anywhere inside mcp_config
|
|
33
|
+
is a valid remote URL (e.g., https://mcp.hud.so/v3/mcp).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def _has_remote_url(obj: Any) -> bool:
|
|
37
|
+
if isinstance(obj, dict):
|
|
38
|
+
for k, v in obj.items():
|
|
39
|
+
if k == "url" and isinstance(v, str) and _is_remote_url(v):
|
|
40
|
+
return True
|
|
41
|
+
if _has_remote_url(v):
|
|
42
|
+
return True
|
|
43
|
+
elif isinstance(obj, list):
|
|
44
|
+
for item in obj:
|
|
45
|
+
if _has_remote_url(item):
|
|
46
|
+
return True
|
|
47
|
+
return False
|
|
48
|
+
|
|
31
49
|
for task in tasks:
|
|
32
|
-
|
|
50
|
+
cfg = task.mcp_config or {}
|
|
51
|
+
if not _has_remote_url(cfg):
|
|
33
52
|
return False
|
|
34
53
|
return True
|
|
35
54
|
|
|
@@ -100,7 +119,7 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
|
|
|
100
119
|
require_docker_running()
|
|
101
120
|
|
|
102
121
|
# If Docker or login is not configured, the push function will fail and halt.
|
|
103
|
-
push_environment(str(env_dir))
|
|
122
|
+
push_environment(str(env_dir), yes=True)
|
|
104
123
|
|
|
105
124
|
# Reload lock after push
|
|
106
125
|
lock_path = env_dir / "hud.lock.yaml"
|
|
@@ -111,10 +130,24 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
|
|
|
111
130
|
|
|
112
131
|
|
|
113
132
|
def _derive_remote_image(lock_data: dict[str, Any]) -> str:
|
|
114
|
-
"""Derive org/name:tag from lock file
|
|
133
|
+
"""Derive org/name:tag from lock file for MCP header.
|
|
134
|
+
|
|
135
|
+
Preference order:
|
|
136
|
+
1) lock_data["push"]["image_with_tag"] if present
|
|
137
|
+
2) Derive from lock_data["image"] (may be a digest; falls back to latest)
|
|
138
|
+
"""
|
|
139
|
+
push_info = lock_data.get("push", {}) if isinstance(lock_data, dict) else {}
|
|
140
|
+
|
|
141
|
+
# 1) Exact image_with_tag if present
|
|
142
|
+
pushed_with_tag = str(push_info.get("image_with_tag", "")).strip()
|
|
143
|
+
if pushed_with_tag:
|
|
144
|
+
name, tag = extract_name_and_tag(pushed_with_tag)
|
|
145
|
+
return f"{name}:{tag}"
|
|
146
|
+
|
|
147
|
+
# Base name always comes from lock_data.image to preserve org/repo
|
|
115
148
|
image_ref = str(lock_data.get("image", "")).strip()
|
|
116
149
|
if not image_ref:
|
|
117
|
-
raise typer.Exit(
|
|
150
|
+
raise typer.Exit(1)
|
|
118
151
|
name, tag = extract_name_and_tag(image_ref)
|
|
119
152
|
return f"{name}:{tag}"
|
|
120
153
|
|
|
@@ -157,19 +190,55 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
157
190
|
# Derive remote image name org/name:tag
|
|
158
191
|
remote_image = _derive_remote_image(lock_data)
|
|
159
192
|
|
|
193
|
+
# Helper to strip extra fields from tool calls
|
|
194
|
+
def _simplify_tool_call(tool: Any) -> Any:
|
|
195
|
+
def _one(x: Any) -> dict[str, Any]:
|
|
196
|
+
try:
|
|
197
|
+
data = x.model_dump() if hasattr(x, "model_dump") else dict(x)
|
|
198
|
+
except Exception:
|
|
199
|
+
try:
|
|
200
|
+
data = dict(x)
|
|
201
|
+
except Exception:
|
|
202
|
+
return {}
|
|
203
|
+
# Keep only name and arguments
|
|
204
|
+
name = data.get("name")
|
|
205
|
+
arguments = data.get("arguments", {})
|
|
206
|
+
return {"name": name, "arguments": arguments}
|
|
207
|
+
|
|
208
|
+
if tool is None:
|
|
209
|
+
return None
|
|
210
|
+
if isinstance(tool, list):
|
|
211
|
+
return [_one(x) for x in tool]
|
|
212
|
+
return _one(tool)
|
|
213
|
+
|
|
160
214
|
# Convert to list[dict]
|
|
161
215
|
tasks_payload: list[dict[str, Any]] = []
|
|
162
216
|
for t in tasks:
|
|
163
|
-
item =
|
|
164
|
-
|
|
165
|
-
"
|
|
166
|
-
"
|
|
167
|
-
|
|
168
|
-
"
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
217
|
+
item: dict[str, Any] = {
|
|
218
|
+
"prompt": t.prompt,
|
|
219
|
+
"mcp_config": {
|
|
220
|
+
"hud": {
|
|
221
|
+
"url": "https://mcp.hud.so/v3/mcp",
|
|
222
|
+
"headers": {
|
|
223
|
+
"Authorization": "Bearer ${HUD_API_KEY}",
|
|
224
|
+
"Mcp-Image": remote_image,
|
|
225
|
+
},
|
|
226
|
+
}
|
|
227
|
+
},
|
|
172
228
|
}
|
|
229
|
+
|
|
230
|
+
# Optional fields, omit Nones
|
|
231
|
+
if t.setup_tool is not None:
|
|
232
|
+
item["setup_tool"] = _simplify_tool_call(t.setup_tool)
|
|
233
|
+
if t.evaluate_tool is not None:
|
|
234
|
+
item["evaluate_tool"] = _simplify_tool_call(t.evaluate_tool)
|
|
235
|
+
if t.agent_tools is not None:
|
|
236
|
+
item["agent_tools"] = t.agent_tools
|
|
237
|
+
if t.system_prompt is not None:
|
|
238
|
+
item["system_prompt"] = t.system_prompt
|
|
239
|
+
if t.metadata:
|
|
240
|
+
item["metadata"] = t.metadata
|
|
241
|
+
|
|
173
242
|
tasks_payload.append(item)
|
|
174
243
|
|
|
175
244
|
# Write new file: remote_<name>.json (always JSON array)
|
hud/cli/push.py
CHANGED
|
@@ -332,6 +332,7 @@ def push_environment(
|
|
|
332
332
|
"source": local_image,
|
|
333
333
|
"pushedAt": datetime.now(UTC).isoformat().replace("+00:00", "Z"),
|
|
334
334
|
"registry": pushed_digest.split("/")[0] if "/" in pushed_digest else "docker.io",
|
|
335
|
+
"image_with_tag": image,
|
|
335
336
|
}
|
|
336
337
|
|
|
337
338
|
# Save updated lock file
|
hud/cli/rl/remote_runner.py
CHANGED
|
@@ -9,6 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
import os
|
|
10
10
|
import subprocess
|
|
11
11
|
import time
|
|
12
|
+
import uuid
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
|
|
14
15
|
from rich.console import Console
|
|
@@ -29,6 +30,41 @@ GPU_PRICING = {
|
|
|
29
30
|
}
|
|
30
31
|
|
|
31
32
|
|
|
33
|
+
def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int = 600) -> None:
|
|
34
|
+
"""Deploy vLLM for a model if needed and wait until it's ready.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
model_name: The name of the model to deploy vLLM for
|
|
38
|
+
gpu_type: GPU type to use for deployment (e.g., A100, H100)
|
|
39
|
+
timeout: Max seconds to wait for vLLM to be ready
|
|
40
|
+
"""
|
|
41
|
+
# Check current model status
|
|
42
|
+
info = rl_api.get_model(model_name)
|
|
43
|
+
if info.vllm_url:
|
|
44
|
+
hud_console.success("vLLM server already running")
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
hud_console.info(f"Deploying vLLM server for {model_name}...")
|
|
48
|
+
rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
|
|
49
|
+
hud_console.success("vLLM deployment started")
|
|
50
|
+
|
|
51
|
+
hud_console.info("Waiting for vLLM server to be ready...")
|
|
52
|
+
start_time = time.time()
|
|
53
|
+
with hud_console.progress() as progress:
|
|
54
|
+
progress.update("Checking deployment status (see live status on https://app.hud.so/models)")
|
|
55
|
+
while True:
|
|
56
|
+
if time.time() - start_time > timeout:
|
|
57
|
+
hud_console.error("Timeout waiting for vLLM deployment")
|
|
58
|
+
raise ValueError("vLLM deployment timeout")
|
|
59
|
+
info = rl_api.get_model(model_name)
|
|
60
|
+
if info.vllm_url or info.status == "ready":
|
|
61
|
+
hud_console.success(
|
|
62
|
+
f"vLLM server ready at http://rl.hud.so/v1/models/{model_name}/vllm"
|
|
63
|
+
)
|
|
64
|
+
break
|
|
65
|
+
time.sleep(5)
|
|
66
|
+
|
|
67
|
+
|
|
32
68
|
def run_remote_training(
|
|
33
69
|
tasks_file: str | None,
|
|
34
70
|
model: str | None,
|
|
@@ -128,49 +164,55 @@ def run_remote_training(
|
|
|
128
164
|
from rich.prompt import Prompt
|
|
129
165
|
|
|
130
166
|
# Ask for model name
|
|
131
|
-
|
|
167
|
+
base_default = model_type.split("/")[-1].lower()
|
|
168
|
+
default_name = base_default
|
|
169
|
+
existing_names = {m.name for m in active_models}
|
|
170
|
+
suffix = 1
|
|
171
|
+
while default_name in existing_names:
|
|
172
|
+
default_name = f"{base_default}-{suffix}"
|
|
173
|
+
suffix += 1
|
|
174
|
+
|
|
132
175
|
hud_console.info(f"Enter model name (default: {default_name}):")
|
|
133
176
|
model_name = Prompt.ask("Model name", default=default_name)
|
|
134
177
|
model_name = model_name.replace("/", "-").lower()
|
|
135
178
|
|
|
136
|
-
# Create the model
|
|
179
|
+
# Create the model with retry on name conflict
|
|
137
180
|
hud_console.info(f"Creating model: {model_name}")
|
|
138
181
|
try:
|
|
139
182
|
rl_api.create_model(model_name, model_type)
|
|
140
183
|
hud_console.success(f"Created model: {model_name}")
|
|
184
|
+
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
141
185
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
hud_console.info("Waiting for vLLM server to be ready...")
|
|
149
|
-
max_wait = 600 # 10 minutes
|
|
150
|
-
start_time = time.time()
|
|
151
|
-
|
|
152
|
-
with hud_console.progress() as progress:
|
|
153
|
-
progress.update(
|
|
154
|
-
"Checking deployment status (see live status on https://app.hud.so/models)"
|
|
155
|
-
)
|
|
156
|
-
|
|
186
|
+
except Exception as e:
|
|
187
|
+
# If the name already exists, suggest a new name and prompt once
|
|
188
|
+
message = str(e)
|
|
189
|
+
if "already exists" in message or "409" in message:
|
|
190
|
+
alt_name = f"{model_name}-1"
|
|
191
|
+
i = 1
|
|
157
192
|
while True:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
model_info = rl_api.get_model(model_name)
|
|
163
|
-
if model_info.status == "ready":
|
|
164
|
-
hud_console.success(
|
|
165
|
-
f"vLLM server ready at http://rl.hud.so/v1/models/{model_name}/vllm"
|
|
166
|
-
)
|
|
193
|
+
candidate = f"{model_name}-{str(uuid.uuid4())[:4]}"
|
|
194
|
+
if candidate not in existing_names:
|
|
195
|
+
alt_name = candidate
|
|
167
196
|
break
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
197
|
+
i += 1
|
|
198
|
+
hud_console.warning(
|
|
199
|
+
f"Model '{model_name}' exists. Suggesting '{alt_name}' instead."
|
|
200
|
+
)
|
|
201
|
+
try:
|
|
202
|
+
from rich.prompt import Prompt as _Prompt
|
|
203
|
+
|
|
204
|
+
chosen = _Prompt.ask("Use different name", default=alt_name)
|
|
205
|
+
chosen = chosen.replace("/", "-").lower()
|
|
206
|
+
rl_api.create_model(chosen, model_type)
|
|
207
|
+
hud_console.success(f"Created model: {chosen}")
|
|
208
|
+
model_name = chosen
|
|
209
|
+
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
210
|
+
except Exception as e2:
|
|
211
|
+
hud_console.error(f"Failed to create model: {e2}")
|
|
212
|
+
raise
|
|
213
|
+
else:
|
|
214
|
+
hud_console.error(f"Failed to create model: {e}")
|
|
215
|
+
raise
|
|
174
216
|
|
|
175
217
|
else:
|
|
176
218
|
# Existing model selected
|
|
@@ -194,36 +236,7 @@ def run_remote_training(
|
|
|
194
236
|
return
|
|
195
237
|
|
|
196
238
|
# Ensure vLLM is deployed
|
|
197
|
-
|
|
198
|
-
hud_console.info(f"Deploying vLLM server for {model_name}...")
|
|
199
|
-
rl_api.deploy_vllm(model_name, gpu_type="A100")
|
|
200
|
-
hud_console.success("vLLM deployment started")
|
|
201
|
-
|
|
202
|
-
# Wait for deployment
|
|
203
|
-
hud_console.info("Waiting for vLLM server to be ready...")
|
|
204
|
-
max_wait = 600 # 10 minutes
|
|
205
|
-
start_time = time.time()
|
|
206
|
-
|
|
207
|
-
with hud_console.progress() as progress:
|
|
208
|
-
progress.update(
|
|
209
|
-
"Checking deployment status (see live status on https://app.hud.so/models)"
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
while True:
|
|
213
|
-
if time.time() - start_time > max_wait:
|
|
214
|
-
hud_console.error("Timeout waiting for vLLM deployment")
|
|
215
|
-
raise ValueError("vLLM deployment timeout")
|
|
216
|
-
|
|
217
|
-
model_info = rl_api.get_model(model_name)
|
|
218
|
-
if model_info.vllm_url:
|
|
219
|
-
hud_console.success(
|
|
220
|
-
f"vLLM server ready at http://rl.hud.so/v1/models/{model_name}/vllm"
|
|
221
|
-
)
|
|
222
|
-
break
|
|
223
|
-
|
|
224
|
-
time.sleep(5)
|
|
225
|
-
else:
|
|
226
|
-
hud_console.success("vLLM server already running")
|
|
239
|
+
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
227
240
|
except KeyboardInterrupt:
|
|
228
241
|
hud_console.dim_info("Training cancelled", "")
|
|
229
242
|
return
|
|
@@ -10,13 +10,13 @@ import asyncio
|
|
|
10
10
|
import logging
|
|
11
11
|
from typing import TYPE_CHECKING, Any, TypeVar
|
|
12
12
|
|
|
13
|
-
if TYPE_CHECKING:
|
|
14
|
-
from collections.abc import Callable
|
|
15
|
-
|
|
16
13
|
import requests
|
|
17
14
|
from requests.adapters import HTTPAdapter
|
|
18
15
|
from urllib3.util.retry import Retry
|
|
19
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
T = TypeVar("T")
|
hud/rl/buffer.py
CHANGED
|
@@ -219,12 +219,93 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
219
219
|
else:
|
|
220
220
|
raise ValueError(f"Invalid select strategy: {self.select_strategy}")
|
|
221
221
|
|
|
222
|
+
def _extract_group_key(self, trace: Trace) -> tuple[str, str]:
|
|
223
|
+
"""Return a stable grouping key for a trace.
|
|
224
|
+
|
|
225
|
+
Preference order:
|
|
226
|
+
1) task.id when present (kind='id')
|
|
227
|
+
2) task.prompt exact string (kind='prompt') when id is None
|
|
228
|
+
3) 'NA' for missing/errored entries (kind='NA')
|
|
229
|
+
"""
|
|
230
|
+
if getattr(trace, "isError", False):
|
|
231
|
+
return ("NA", "NA")
|
|
232
|
+
|
|
233
|
+
task = getattr(trace, "task", None)
|
|
234
|
+
if task is None:
|
|
235
|
+
return ("NA", "NA")
|
|
236
|
+
|
|
237
|
+
tid = getattr(task, "id", None)
|
|
238
|
+
if tid is not None:
|
|
239
|
+
return ("id", str(tid))
|
|
240
|
+
|
|
241
|
+
prompt = getattr(task, "prompt", None)
|
|
242
|
+
if prompt:
|
|
243
|
+
return ("prompt", str(prompt))
|
|
244
|
+
|
|
245
|
+
return ("NA", "NA")
|
|
246
|
+
|
|
247
|
+
def _validate_and_split_groups(
|
|
248
|
+
self, recent_traces: list[Trace]
|
|
249
|
+
) -> tuple[list[list[Trace]], list[tuple[str, str]]]:
|
|
250
|
+
"""Validate and split recent traces into homogeneous groups by id or prompt.
|
|
251
|
+
|
|
252
|
+
- Uses id when present; otherwise falls back to prompt equality.
|
|
253
|
+
- Any NA/error traces are excluded and the group is filled by duplicating
|
|
254
|
+
existing valid members in that group.
|
|
255
|
+
- Always returns len == groups_per_batch groups of size == group_size.
|
|
256
|
+
"""
|
|
257
|
+
from collections import Counter
|
|
258
|
+
|
|
259
|
+
groups_per_batch = self.batch_size // self.group_size
|
|
260
|
+
|
|
261
|
+
window_keys = [self._extract_group_key(t) for t in recent_traces]
|
|
262
|
+
window_counter = Counter(k for k in window_keys if k[0] != "NA")
|
|
263
|
+
|
|
264
|
+
validated_groups: list[list[Trace]] = []
|
|
265
|
+
selected_keys: list[tuple[str, str]] = []
|
|
266
|
+
|
|
267
|
+
for g_idx in range(groups_per_batch):
|
|
268
|
+
start = g_idx * self.group_size
|
|
269
|
+
end = start + self.group_size
|
|
270
|
+
chunk = recent_traces[start:end]
|
|
271
|
+
|
|
272
|
+
key_counts = Counter()
|
|
273
|
+
per_item_keys: list[tuple[str, str]] = []
|
|
274
|
+
for tr in chunk:
|
|
275
|
+
k = self._extract_group_key(tr)
|
|
276
|
+
per_item_keys.append(k)
|
|
277
|
+
if k[0] != "NA":
|
|
278
|
+
key_counts[k] += 1
|
|
279
|
+
|
|
280
|
+
if key_counts:
|
|
281
|
+
best_key = key_counts.most_common(1)[0][0]
|
|
282
|
+
elif window_counter:
|
|
283
|
+
best_key = window_counter.most_common(1)[0][0]
|
|
284
|
+
else:
|
|
285
|
+
best_key = ("NA", "NA")
|
|
286
|
+
|
|
287
|
+
homogeneous = [tr for tr, k in zip(chunk, per_item_keys, strict=False) if k == best_key]
|
|
288
|
+
|
|
289
|
+
while len(homogeneous) < self.group_size:
|
|
290
|
+
if homogeneous:
|
|
291
|
+
homogeneous.append(homogeneous[-1])
|
|
292
|
+
else:
|
|
293
|
+
idx = next((i for i, wk in enumerate(window_keys) if wk[0] != "NA"), None)
|
|
294
|
+
if idx is not None:
|
|
295
|
+
homogeneous.append(recent_traces[idx])
|
|
296
|
+
elif chunk:
|
|
297
|
+
homogeneous.append(chunk[0])
|
|
298
|
+
else:
|
|
299
|
+
homogeneous.append(recent_traces[0])
|
|
300
|
+
|
|
301
|
+
validated_groups.append(homogeneous)
|
|
302
|
+
selected_keys.append(best_key)
|
|
303
|
+
|
|
304
|
+
return validated_groups, selected_keys
|
|
305
|
+
|
|
222
306
|
def _sample_high_variance_traces(self) -> list[Trace]:
|
|
223
307
|
from collections import Counter, defaultdict, deque
|
|
224
308
|
|
|
225
|
-
# Expect recent window to already be grouped by task id
|
|
226
|
-
|
|
227
|
-
# Build recent window and earlier lookup (short form)
|
|
228
309
|
buf_list = list(self.buffer)
|
|
229
310
|
if len(buf_list) < self.batch_size:
|
|
230
311
|
hud_console.warning(
|
|
@@ -234,81 +315,32 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
234
315
|
take = min(len(buf_list) or 1, self.batch_size - len(buf_list))
|
|
235
316
|
buf_list.extend(buf_list[:take])
|
|
236
317
|
recent_traces = buf_list[-self.batch_size :]
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
)
|
|
318
|
+
|
|
319
|
+
recent_keys = [self._extract_group_key(t) for t in recent_traces]
|
|
320
|
+
hud_console.info(f"[group-sampler] recent-window histogram: {Counter(recent_keys)}")
|
|
240
321
|
|
|
241
322
|
hud_console.info(
|
|
242
323
|
f"[group-sampler] Building earlier traces lookup, buffer size: {len(buf_list)}"
|
|
243
324
|
)
|
|
244
|
-
|
|
325
|
+
earlier_traces_by_key: dict[tuple[str, str], deque[Trace]] = defaultdict(deque)
|
|
245
326
|
for tr in buf_list[: -self.batch_size]:
|
|
246
|
-
|
|
327
|
+
k = self._extract_group_key(tr)
|
|
328
|
+
if k[0] != "NA":
|
|
329
|
+
earlier_traces_by_key[k].append(tr)
|
|
330
|
+
|
|
331
|
+
groups, group_keys = self._validate_and_split_groups(recent_traces)
|
|
247
332
|
|
|
248
|
-
# Chunk from the most-recent end
|
|
249
333
|
final_traces: list[Trace] = []
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
end = start + self.group_size
|
|
255
|
-
group = recent_traces[start:end]
|
|
256
|
-
|
|
257
|
-
# Assert homogeneity: every trace in a group must share the same task id
|
|
258
|
-
cnt = Counter(getattr(t.task, "id", "NA") for t in group)
|
|
259
|
-
if len(cnt) != 1:
|
|
260
|
-
raise RuntimeError(f"Group {g_idx} is not homogeneous: {dict(cnt)}")
|
|
261
|
-
target_tid = next(iter(cnt.keys()))
|
|
262
|
-
|
|
263
|
-
# Build homogeneous group of target_tid, filling from earlier traces to increase spread
|
|
264
|
-
homogeneous: list[Trace] = [
|
|
265
|
-
t for t in group if getattr(t.task, "id", "NA") == target_tid
|
|
266
|
-
]
|
|
267
|
-
needed = self.group_size - len(homogeneous)
|
|
268
|
-
|
|
269
|
-
# Greedy fill: choose earlier traces (same task-id) farthest from current mean reward
|
|
270
|
-
def current_mean(homogeneous: list[Trace]) -> float:
|
|
271
|
-
if not homogeneous:
|
|
334
|
+
for g_idx, (homogeneous, target_key) in enumerate(zip(groups, group_keys, strict=False)):
|
|
335
|
+
|
|
336
|
+
def current_mean(h: list[Trace]) -> float:
|
|
337
|
+
if not h:
|
|
272
338
|
return 0.0
|
|
273
|
-
vals = [float(getattr(t, "reward", 0.0) or 0.0) for t in
|
|
339
|
+
vals = [float(getattr(t, "reward", 0.0) or 0.0) for t in h]
|
|
274
340
|
return sum(vals) / len(vals)
|
|
275
341
|
|
|
276
|
-
|
|
277
|
-
pool = earlier_traces_by_task.get(target_tid, deque())
|
|
278
|
-
if pool:
|
|
279
|
-
mu = current_mean(homogeneous)
|
|
280
|
-
# pick element farthest from current mean
|
|
281
|
-
best_i = None
|
|
282
|
-
best_dist = -1.0
|
|
283
|
-
for i, tr in enumerate(list(pool)):
|
|
284
|
-
r = float(getattr(tr, "reward", 0.0) or 0.0)
|
|
285
|
-
dist = abs(r - mu)
|
|
286
|
-
if dist > best_dist:
|
|
287
|
-
best_dist = dist
|
|
288
|
-
best_i = i
|
|
289
|
-
# pop selected
|
|
290
|
-
chosen = list(pool)[best_i] # type: ignore[index]
|
|
291
|
-
# remove from deque efficiently by rotating
|
|
292
|
-
left = list(pool)
|
|
293
|
-
if best_i is not None:
|
|
294
|
-
left.pop(best_i) # O(n) but pool is small in practice
|
|
295
|
-
earlier_traces_by_task[target_tid] = deque(left)
|
|
296
|
-
homogeneous.append(chosen)
|
|
297
|
-
else:
|
|
298
|
-
# duplicate extreme within current homogeneous set
|
|
299
|
-
if not homogeneous:
|
|
300
|
-
raise RuntimeError(f"Group {g_idx} has no traces for target {target_tid}")
|
|
301
|
-
mu = current_mean(homogeneous)
|
|
302
|
-
extreme = max(
|
|
303
|
-
homogeneous, key=lambda t: abs(float(getattr(t, "reward", 0.0) or 0.0) - mu)
|
|
304
|
-
)
|
|
305
|
-
homogeneous.append(extreme)
|
|
306
|
-
needed -= 1
|
|
307
|
-
|
|
308
|
-
# Replacement step: swap in earlier traces to increase reward spread
|
|
309
|
-
pool = earlier_traces_by_task.get(target_tid, deque())
|
|
342
|
+
pool = earlier_traces_by_key.get(target_key, deque())
|
|
310
343
|
if pool:
|
|
311
|
-
# Log pool stats
|
|
312
344
|
pool_vals = [float(getattr(tr, "reward", 0.0) or 0.0) for tr in list(pool)]
|
|
313
345
|
if pool_vals:
|
|
314
346
|
pool_mean = sum(pool_vals) / len(pool_vals)
|
|
@@ -316,16 +348,15 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
316
348
|
pool_vals
|
|
317
349
|
)
|
|
318
350
|
hud_console.info(
|
|
319
|
-
f"[group-sampler] Group {g_idx}: earlier-pool size={len(pool_vals)}
|
|
351
|
+
f"[group-sampler] Group {g_idx}: earlier-pool size={len(pool_vals)} "
|
|
352
|
+
f"mean={pool_mean:.4f} std={(pool_var**0.5):.4f}"
|
|
320
353
|
)
|
|
321
354
|
|
|
322
|
-
# Decide how many to replace (up to 1/4 of group, at least 1)
|
|
323
355
|
replace_k = max(1, self.group_size // 4)
|
|
324
356
|
replace_k = min(replace_k, len(pool), self.group_size)
|
|
325
357
|
|
|
326
358
|
if replace_k > 0:
|
|
327
359
|
mu = current_mean(homogeneous)
|
|
328
|
-
# Select replacement candidates from pool farthest from current mean
|
|
329
360
|
pool_list = list(pool)
|
|
330
361
|
pool_indices = list(range(len(pool_list)))
|
|
331
362
|
pool_indices.sort(
|
|
@@ -337,12 +368,11 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
337
368
|
chosen_pool_idx = set(pool_indices[:replace_k])
|
|
338
369
|
replacements = [pool_list[i] for i in pool_indices[:replace_k]]
|
|
339
370
|
|
|
340
|
-
# Remove chosen from pool deque
|
|
341
371
|
remaining = [tr for i, tr in enumerate(pool_list) if i not in chosen_pool_idx]
|
|
342
|
-
|
|
372
|
+
earlier_traces_by_key[target_key] = deque(remaining)
|
|
343
373
|
|
|
344
|
-
# Select current group positions closest to mean to replace
|
|
345
374
|
group_indices = list(range(len(homogeneous)))
|
|
375
|
+
mu = current_mean(homogeneous)
|
|
346
376
|
group_indices.sort(
|
|
347
377
|
key=lambda i: abs(
|
|
348
378
|
(float(getattr(homogeneous[i], "reward", 0.0) or 0.0)) - mu
|
|
@@ -353,18 +383,19 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
353
383
|
for pos, new_tr in zip(target_positions, replacements, strict=False):
|
|
354
384
|
homogeneous[pos] = new_tr
|
|
355
385
|
|
|
356
|
-
|
|
357
|
-
if any(getattr(t.task, "id", "NA") != target_tid for t in homogeneous):
|
|
386
|
+
if any(self._extract_group_key(t) != target_key for t in homogeneous):
|
|
358
387
|
raise RuntimeError(f"Group {g_idx} is not homogeneous after sampling")
|
|
359
388
|
final_traces.extend(homogeneous)
|
|
360
389
|
|
|
361
390
|
for i in range(0, len(final_traces), self.group_size):
|
|
362
391
|
block = final_traces[i : i + self.group_size]
|
|
363
|
-
|
|
392
|
+
keys = {self._extract_group_key(t) for t in block}
|
|
393
|
+
if len(keys) != 1:
|
|
364
394
|
raise RuntimeError(f"Homogeneity validation failed for block starting at index {i}")
|
|
365
395
|
|
|
366
396
|
hud_console.info(
|
|
367
|
-
f"[group-sampler] final histogram:
|
|
397
|
+
f"[group-sampler] final histogram: "
|
|
398
|
+
f"{Counter(self._extract_group_key(t) for t in final_traces)}"
|
|
368
399
|
)
|
|
369
400
|
return final_traces
|
|
370
401
|
|
hud/samples/__init__.py
ADDED
hud/samples/browser.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Sample browser task factory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from hud.settings import settings
|
|
10
|
+
from hud.types import MCPToolCall, Task
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BrowserTask(Task):
|
|
14
|
+
"""Task subclass with browser defaults for BrowserTask(prompt=...)."""
|
|
15
|
+
|
|
16
|
+
prompt: str = "Open Google and be ready to search."
|
|
17
|
+
mcp_config: dict[str, Any] = Field(
|
|
18
|
+
default_factory=lambda: {
|
|
19
|
+
"browser": {
|
|
20
|
+
"url": "https://mcp.hud.so/v3/mcp",
|
|
21
|
+
"headers": {
|
|
22
|
+
"Authorization": f"Bearer {settings.api_key}",
|
|
23
|
+
"Mcp-Image": "hudevals/hud-remote-browser:0.1.1",
|
|
24
|
+
},
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
)
|
|
28
|
+
setup_tool: MCPToolCall | list[MCPToolCall] | None = Field(
|
|
29
|
+
default_factory=lambda: MCPToolCall(
|
|
30
|
+
name="setup",
|
|
31
|
+
arguments={"name": "navigate_to_url", "arguments": {"url": "https://www.google.com"}},
|
|
32
|
+
)
|
|
33
|
+
)
|
hud/types.py
CHANGED
|
@@ -12,6 +12,7 @@ from mcp.types import CallToolRequestParams, CallToolResult
|
|
|
12
12
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
13
13
|
|
|
14
14
|
from hud.settings import settings
|
|
15
|
+
from hud.utils.tool_shorthand import normalize_to_tool_call_dict
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
17
18
|
|
|
@@ -59,8 +60,18 @@ class Task(BaseModel):
|
|
|
59
60
|
|
|
60
61
|
@field_validator("setup_tool", "evaluate_tool", mode="before")
|
|
61
62
|
@classmethod
|
|
62
|
-
def convert_dict_to_tool_call(cls, v: Any) -> Any:
|
|
63
|
-
"""Convert dict to MCPToolCall instance
|
|
63
|
+
def convert_dict_to_tool_call(cls, v: Any, info: Any) -> Any:
|
|
64
|
+
"""Convert dict (with shorthands) to MCPToolCall instance.
|
|
65
|
+
|
|
66
|
+
Supports nested forms by walking to the deepest tool name and its arguments.
|
|
67
|
+
Examples:
|
|
68
|
+
- {"name": "navigate", "arguments": {...}} -> name=navigate
|
|
69
|
+
- {"navigate": {...}} -> name=navigate
|
|
70
|
+
- {"setup": {"navigate": {...}}} -> name=navigate
|
|
71
|
+
- {"name": "setup", "arguments": {"name": "navigate", "arguments": {...}}}
|
|
72
|
+
-> name=navigate
|
|
73
|
+
- Lists are normalized element-wise
|
|
74
|
+
"""
|
|
64
75
|
if v is None:
|
|
65
76
|
return None
|
|
66
77
|
|
|
@@ -73,10 +84,12 @@ class Task(BaseModel):
|
|
|
73
84
|
|
|
74
85
|
raise HudConfigError(f"Invalid JSON string: {e}") from e
|
|
75
86
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if isinstance(
|
|
79
|
-
return
|
|
87
|
+
normalized = normalize_to_tool_call_dict(v)
|
|
88
|
+
|
|
89
|
+
if isinstance(normalized, dict):
|
|
90
|
+
return MCPToolCall(**normalized)
|
|
91
|
+
if isinstance(normalized, list):
|
|
92
|
+
return [MCPToolCall(**item) if isinstance(item, dict) else item for item in normalized]
|
|
80
93
|
return v
|
|
81
94
|
|
|
82
95
|
@field_validator("mcp_config", mode="before")
|
hud/utils/mcp.py
CHANGED
|
@@ -66,8 +66,13 @@ def setup_hud_telemetry(
|
|
|
66
66
|
auto_trace_cm = None
|
|
67
67
|
|
|
68
68
|
if not run_id and auto_trace:
|
|
69
|
+
# Start an auto trace and capture its ID for headers/metadata
|
|
69
70
|
auto_trace_cm = trace("My Trace")
|
|
70
|
-
|
|
71
|
+
_trace_obj = auto_trace_cm.__enter__()
|
|
72
|
+
try:
|
|
73
|
+
run_id = getattr(_trace_obj, "id", None) or str(_trace_obj)
|
|
74
|
+
except Exception: # pragma: no cover - fallback shouldn't fail lint
|
|
75
|
+
run_id = None
|
|
71
76
|
|
|
72
77
|
# Patch HUD servers with run-id (works whether auto or user trace)
|
|
73
78
|
if run_id:
|
hud/utils/tests/test_version.py
CHANGED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _is_call_like(obj: Any) -> bool:
|
|
7
|
+
if not isinstance(obj, dict):
|
|
8
|
+
return False
|
|
9
|
+
if "name" in obj and "arguments" in obj:
|
|
10
|
+
return True
|
|
11
|
+
if len(obj) == 1:
|
|
12
|
+
_, v = next(iter(obj.items()))
|
|
13
|
+
return isinstance(v, dict)
|
|
14
|
+
return False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _to_call_dict(obj: Any) -> Any:
|
|
18
|
+
"""Recursively convert shorthand/wrapped dicts into name/arguments templates.
|
|
19
|
+
|
|
20
|
+
Rules:
|
|
21
|
+
- If obj is a dict with {name, arguments}: return {name, arguments: recurse(arguments)}
|
|
22
|
+
- Else if obj is a single-key dict {k: v}: return {name: k, arguments: recurse(v)}
|
|
23
|
+
- Else: return obj unchanged (leaf arguments/value)
|
|
24
|
+
"""
|
|
25
|
+
if isinstance(obj, dict):
|
|
26
|
+
if "name" in obj and "arguments" in obj:
|
|
27
|
+
args = obj.get("arguments")
|
|
28
|
+
# Only recurse into arguments if it looks like another call
|
|
29
|
+
if _is_call_like(args):
|
|
30
|
+
return {"name": obj.get("name"), "arguments": _to_call_dict(args)}
|
|
31
|
+
return {"name": obj.get("name"), "arguments": args}
|
|
32
|
+
if len(obj) == 1:
|
|
33
|
+
k, v = next(iter(obj.items()))
|
|
34
|
+
if isinstance(v, dict):
|
|
35
|
+
return {"name": k, "arguments": _to_call_dict(v)}
|
|
36
|
+
return obj
|
|
37
|
+
return obj
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def normalize_to_tool_call_dict(value: Any) -> Any:
|
|
41
|
+
"""
|
|
42
|
+
Convert shorthand or nested forms into a direct tool call dict:
|
|
43
|
+
{"name": final_name, "arguments": final_arguments}
|
|
44
|
+
Lists are normalized element-wise.
|
|
45
|
+
"""
|
|
46
|
+
if value is None:
|
|
47
|
+
return value
|
|
48
|
+
|
|
49
|
+
def _normalize_one(item: Any) -> Any:
|
|
50
|
+
call = _to_call_dict(item)
|
|
51
|
+
return call
|
|
52
|
+
|
|
53
|
+
if isinstance(value, list):
|
|
54
|
+
return [_normalize_one(x) for x in value]
|
|
55
|
+
|
|
56
|
+
if isinstance(value, dict):
|
|
57
|
+
return _normalize_one(value)
|
|
58
|
+
|
|
59
|
+
return value
|
hud/version.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
hud/__init__.py,sha256=JMDFUE1pP0J1Xl_miBdt7ERvoffZmTzSFe8yxz512A8,552
|
|
2
2
|
hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
|
|
3
3
|
hud/settings.py,sha256=sMS31iW1m-5VpWk-Blhi5-obLcUA0fwxWE1GgJz-vqU,2708
|
|
4
|
-
hud/types.py,sha256=
|
|
5
|
-
hud/version.py,sha256=
|
|
4
|
+
hud/types.py,sha256=RtNM2fPU1NAujTmZLOydQIU-ybk3gVRCoJ2TM2hJOlw,10752
|
|
5
|
+
hud/version.py,sha256=7nCICMgtZOjBoirBGd5_5Ea-s2F7XAgLvEX_110KGAU,105
|
|
6
6
|
hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
|
|
7
7
|
hud/agents/base.py,sha256=_u1zR3gXzZ1RlTCUYdMcvgHqdJBC4-AB1lZt0yBx8lg,35406
|
|
8
8
|
hud/agents/claude.py,sha256=wHiw8iAnjnRmZyKRKcOhagCDQMhz9Z6rlSBWqH1X--M,15781
|
|
@@ -30,10 +30,10 @@ hud/cli/get.py,sha256=sksKrdzBGZa7ZuSoQkc0haj-CvOGVSSikoVXeaUd3N4,6274
|
|
|
30
30
|
hud/cli/init.py,sha256=McZwpxZMXD-It_PXINCUy-SwUaPiQ7jdpSU5-F-caO8,19671
|
|
31
31
|
hud/cli/list_func.py,sha256=EVi2Vc3Lb3glBNJxFx4MPnZknZ4xmuJz1OFg_dc8a_E,7177
|
|
32
32
|
hud/cli/pull.py,sha256=Vd1l1-IwskyACzhtC8Df1SYINUZEYmFxrLl0s9cNN6c,12151
|
|
33
|
-
hud/cli/push.py,sha256=
|
|
33
|
+
hud/cli/push.py,sha256=dmjF-hGlMfq73tquDxsTuM9t50zrkE9PFJqW5vRmYSw,18380
|
|
34
34
|
hud/cli/remove.py,sha256=8vGQyXDqgtjz85_vtusoIG8zurH4RHz6z8UMevQRYM4,6861
|
|
35
35
|
hud/cli/flows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
-
hud/cli/flows/tasks.py,sha256=
|
|
36
|
+
hud/cli/flows/tasks.py,sha256=CSdUA4vWMyLHke7pQgxsHzuPAj3CxmQuu66KjSvVai4,8822
|
|
37
37
|
hud/cli/rl/__init__.py,sha256=BeqXdmzPwVBptz4j796XJRxSC5B_9tQta5aKd0jDMvo,5000
|
|
38
38
|
hud/cli/rl/config.py,sha256=iNhCxotM33OEiP9gqPvn8A_AxrBVe6fcFCQTvc13xzA,2884
|
|
39
39
|
hud/cli/rl/display.py,sha256=hqJVGmO9csYinladhZwjF-GMvppYWngxDHajTyIJ_gM,5214
|
|
@@ -41,7 +41,7 @@ hud/cli/rl/gpu.py,sha256=peXS-NdUF5RyuSs0aZoCzGLboneBUpCy8f9f99WMrG0,2009
|
|
|
41
41
|
hud/cli/rl/gpu_utils.py,sha256=H5ckPwgj5EVP3yJ5eVihR5R7Y6Gp6pt8ZUfWCCwcLG4,11072
|
|
42
42
|
hud/cli/rl/local_runner.py,sha256=GssmDgCxGfFsi31aFj22vwCiwa9ELllEwQjbActxSXY,21514
|
|
43
43
|
hud/cli/rl/presets.py,sha256=DzOO82xL5QyzdVtlX-Do1CODMvDz9ILMPapjU92jcZg,3051
|
|
44
|
-
hud/cli/rl/remote_runner.py,sha256=
|
|
44
|
+
hud/cli/rl/remote_runner.py,sha256=JvLOf3X-a6svz_aoOKuuaWzoei6Nrp1ShHeg2KxYk7U,13725
|
|
45
45
|
hud/cli/rl/rl_api.py,sha256=INJobvSa50ccR037u_GPsDa_9WboWyNwqEaoh9hcXj0,4306
|
|
46
46
|
hud/cli/rl/vllm.py,sha256=Gq_M6KsQArGz7FNIdemuM5mk16mu3xe8abpO2GCCuOE,6093
|
|
47
47
|
hud/cli/tests/__init__.py,sha256=ZrGVkmH7DHXGqOvjOSNGZeMYaFIRB2K8c6hwr8FPJ-8,68
|
|
@@ -83,7 +83,7 @@ hud/clients/tests/test_fastmcp.py,sha256=4q3TzDjuieTZa89taiNJIrzbUncNkYOG4Maubyp
|
|
|
83
83
|
hud/clients/tests/test_mcp_use_retry.py,sha256=9FxLAz4L5Vv3OTtj4wdhRY23wDYALUpE12TYWl7fbJA,13299
|
|
84
84
|
hud/clients/tests/test_protocol.py,sha256=aK4CS4g3j1D5jPo83ykzZuHUvcZFAulYtIq9T9Hb_fQ,6640
|
|
85
85
|
hud/clients/utils/__init__.py,sha256=-zZjcKIWGj2tXbVDOW45UgoGghhLJzFQVZ6miKenuA4,595
|
|
86
|
-
hud/clients/utils/mcp_use_retry.py,sha256=
|
|
86
|
+
hud/clients/utils/mcp_use_retry.py,sha256=knsgOTR3YFXshmPFfPQE6K6C5GpR1ZBJe2J7ozEMikA,6675
|
|
87
87
|
hud/clients/utils/retry.py,sha256=mMs2T_mAlb8AYhSqMR4AmCw7838gqCC4mdG3zjMAYM4,5744
|
|
88
88
|
hud/clients/utils/retry_transport.py,sha256=Rsq25eiKKt_pM1bas78QEZvO0illK97X_3opmaS3A3w,6809
|
|
89
89
|
hud/datasets/__init__.py,sha256=-g05iDy76CU4JiRHjKBBhgh3STtiIjmWhUfPqgf5hJE,697
|
|
@@ -109,7 +109,7 @@ hud/otel/tests/test_processors.py,sha256=np0R4ssd9j6LJSJykJ5bNjl0POwNYNhgb7BqOZH
|
|
|
109
109
|
hud/rl/README.md,sha256=uFRpNFaEY8paq9k1C4miF7AGnbqHTGAsPmpcf9JIEeA,1189
|
|
110
110
|
hud/rl/__init__.py,sha256=yYL7U1WV6L3mr3Hig48-4lhnryTaWj4nCXm4lG5vrYI,25
|
|
111
111
|
hud/rl/actor.py,sha256=0YChXyxCz1wVBQ9lKb7vSl64_HQ24-DmYqCCxuORzJc,6747
|
|
112
|
-
hud/rl/buffer.py,sha256=
|
|
112
|
+
hud/rl/buffer.py,sha256=FWGivdJ0YEYZZPK0bUyvjiKparaUgiBE9GzQLZj8kcA,15372
|
|
113
113
|
hud/rl/chat_template.jinja,sha256=XTdzI8oFGEcSA-exKxyHaprwRDmX5Am1KEb0VxvUc6U,4965
|
|
114
114
|
hud/rl/config.py,sha256=PAKYPCsKl8yg_j3gJSE5SJUgLM7j0lFy0K_Vt4-otDM,5384
|
|
115
115
|
hud/rl/distributed.py,sha256=8avhrb0lHYkhW22Z7MfkqSnlczWj5jMrUMEtkcoCf74,2473
|
|
@@ -121,6 +121,8 @@ hud/rl/vllm_adapter.py,sha256=O2_TdTGIyNr9zRGhCw18XWjOKYzEM3049wvlyL2x0sc,4751
|
|
|
121
121
|
hud/rl/tests/__init__.py,sha256=PXmD3Gs6xOAwaYKb4HnwZERDjX05N1QF-aU6ya0dBtE,27
|
|
122
122
|
hud/rl/tests/test_learner.py,sha256=qfSHFFROteRb98TjBuAKjFmZjCGfuWXPysVvTAWJ7wQ,6025
|
|
123
123
|
hud/rl/utils/start_vllm_server.sh,sha256=ThPokrLK_Qm_uh916fHXXBfMlw1TC97P57-AEI5MuOc,910
|
|
124
|
+
hud/samples/__init__.py,sha256=wgcN1IOLHhR4C1fFKqyvA7Yl9lJhJFf34zfKs-UMSus,128
|
|
125
|
+
hud/samples/browser.py,sha256=7LkzGx2G5dA8RogZwORnxxpVsxMV2gF18D_hGJIEow8,973
|
|
124
126
|
hud/server/__init__.py,sha256=8LUwgsXO8xiViWP7uImDwcOsWLu01r5F4r8U8qH3rSY,91
|
|
125
127
|
hud/server/context.py,sha256=6bCdSzv1FGyItu9472HbbYef279H7QuMGJDR8EtYg5Y,3210
|
|
126
128
|
hud/server/low_level.py,sha256=XYs2pOJ9kN4OcJ6ahDmXM5mWkzq5wJLpKFInUYrWEok,4701
|
|
@@ -187,21 +189,22 @@ hud/utils/agent_factories.py,sha256=cvfXByqG6gOYHtm1VGeJjCpxoLxM4aJez8rH-AerP_A,
|
|
|
187
189
|
hud/utils/async_utils.py,sha256=5cKrJcnaHV2eJNxeyx0r7fPcdPTDBK7kM9-nLaF51X4,2409
|
|
188
190
|
hud/utils/group_eval.py,sha256=oaoBqlQN6g5gRQmuY_JmqM5bpuf2sFIgu4uDZ7X-3a0,8360
|
|
189
191
|
hud/utils/hud_console.py,sha256=ywTrzyNhWFoQN2PpzpDDKp_32b-ACDvfKQuWxDoF8iE,21898
|
|
190
|
-
hud/utils/mcp.py,sha256=
|
|
192
|
+
hud/utils/mcp.py,sha256=pMadd7A0DH6Y_aWywKU8jVYu2pRHGPEndV2ZQFrrj60,2888
|
|
191
193
|
hud/utils/pretty_errors.py,sha256=WGeL4CTHtlA6KgPuV_JSX5l6H4-xbuTp6Y6tw1bkiFg,2430
|
|
192
194
|
hud/utils/progress.py,sha256=suikwFM8sdSfkV10nAOEaInDhG4XKgOSvFePg4jSj1A,5927
|
|
193
195
|
hud/utils/tasks.py,sha256=JwFIq0cpPMpMYnICUmx_G4CF6uy9MtiCmmmN7eA6FsA,4682
|
|
194
196
|
hud/utils/telemetry.py,sha256=hrVIx2rUjSGyy9IVxTZ_3Jii83PiHjyFRd5ls2whimM,1863
|
|
197
|
+
hud/utils/tool_shorthand.py,sha256=nWo-Z7D4w8qF1lWKP7TkXMHZiU3vj4jAwfcBXkwrpnE,1833
|
|
195
198
|
hud/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
196
199
|
hud/utils/tests/test_async_utils.py,sha256=RkdSnYErRV3Jn7dfg6CPlcE1RSUL__2B627oIqAyy1s,5945
|
|
197
200
|
hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,383
|
|
198
201
|
hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
|
|
199
202
|
hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
|
|
200
203
|
hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
|
|
201
|
-
hud/utils/tests/test_version.py,sha256=
|
|
204
|
+
hud/utils/tests/test_version.py,sha256=Wdb1xAhmZ4EgoOIqiOcIr3iRZIxEMUCPCgee6cAlR3s,160
|
|
202
205
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
203
|
-
hud_python-0.4.
|
|
204
|
-
hud_python-0.4.
|
|
205
|
-
hud_python-0.4.
|
|
206
|
-
hud_python-0.4.
|
|
207
|
-
hud_python-0.4.
|
|
206
|
+
hud_python-0.4.33.dist-info/METADATA,sha256=EQgm-qxFqkYHk78gbjyHW0KTUu03JKnBidSwzfDG4ZY,20861
|
|
207
|
+
hud_python-0.4.33.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
208
|
+
hud_python-0.4.33.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
209
|
+
hud_python-0.4.33.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
210
|
+
hud_python-0.4.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|