hud-python 0.4.42__py3-none-any.whl → 0.4.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/openai_chat_generic.py +1 -1
- hud/cli/__init__.py +6 -0
- hud/cli/dev.py +24 -2
- hud/cli/eval.py +10 -11
- hud/cli/flows/tasks.py +4 -5
- hud/cli/rl/__init__.py +6 -0
- hud/cli/rl/config.py +2 -2
- hud/cli/rl/gpu_utils.py +5 -3
- hud/cli/rl/remote_runner.py +18 -9
- hud/cli/rl/rl_api.py +2 -2
- hud/cli/utils/environment.py +1 -5
- hud/rl/config.py +14 -9
- hud/rl/train.py +9 -6
- hud/utils/group_eval.py +2 -2
- hud/utils/tasks.py +1 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.42.dist-info → hud_python-0.4.43.dist-info}/METADATA +1 -1
- {hud_python-0.4.42.dist-info → hud_python-0.4.43.dist-info}/RECORD +22 -22
- {hud_python-0.4.42.dist-info → hud_python-0.4.43.dist-info}/WHEEL +0 -0
- {hud_python-0.4.42.dist-info → hud_python-0.4.43.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.42.dist-info → hud_python-0.4.43.dist-info}/licenses/LICENSE +0 -0
hud/cli/__init__.py
CHANGED
|
@@ -1178,6 +1178,11 @@ def rl(
|
|
|
1178
1178
|
"--vllm-gpu",
|
|
1179
1179
|
help="Specific GPU for vLLM server",
|
|
1180
1180
|
),
|
|
1181
|
+
vllm_gpu_count: int = typer.Option(
|
|
1182
|
+
1,
|
|
1183
|
+
"--vllm-gpu-count",
|
|
1184
|
+
help="Number of GPUs for vLLM server",
|
|
1185
|
+
),
|
|
1181
1186
|
skip_vllm_startup: bool = typer.Option(
|
|
1182
1187
|
False,
|
|
1183
1188
|
"--skip-vllm-startup",
|
|
@@ -1199,6 +1204,7 @@ def rl(
|
|
|
1199
1204
|
no_ddp=no_ddp,
|
|
1200
1205
|
ddp_gpus=ddp_gpus,
|
|
1201
1206
|
vllm_gpu=vllm_gpu,
|
|
1207
|
+
vllm_gpu_count=vllm_gpu_count,
|
|
1202
1208
|
yes=yes,
|
|
1203
1209
|
skip_vllm_startup=skip_vllm_startup,
|
|
1204
1210
|
)
|
hud/cli/dev.py
CHANGED
|
@@ -73,6 +73,24 @@ def create_proxy_server(
|
|
|
73
73
|
"PYTHONUNBUFFERED=1", # Ensure Python output is not buffered
|
|
74
74
|
]
|
|
75
75
|
|
|
76
|
+
# Check for .env file in the project directory and add env vars
|
|
77
|
+
env_file = project_path / ".env"
|
|
78
|
+
loaded_env_vars = {}
|
|
79
|
+
if env_file.exists():
|
|
80
|
+
try:
|
|
81
|
+
from hud.cli.utils.config import parse_env_file
|
|
82
|
+
|
|
83
|
+
env_contents = env_file.read_text(encoding="utf-8")
|
|
84
|
+
loaded_env_vars = parse_env_file(env_contents)
|
|
85
|
+
for key, value in loaded_env_vars.items():
|
|
86
|
+
docker_cmd.extend(["-e", f"{key}={value}"])
|
|
87
|
+
if verbose and loaded_env_vars:
|
|
88
|
+
hud_console.info(
|
|
89
|
+
f"Loaded {len(loaded_env_vars)} environment variable(s) from .env file"
|
|
90
|
+
)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
hud_console.warning(f"Failed to load .env file: {e}")
|
|
93
|
+
|
|
76
94
|
# Add user-provided Docker arguments
|
|
77
95
|
if docker_args:
|
|
78
96
|
docker_cmd.extend(docker_args)
|
|
@@ -112,8 +130,12 @@ def create_proxy_server(
|
|
|
112
130
|
hud_console.info("The container's CMD determines reload behavior")
|
|
113
131
|
hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
|
|
114
132
|
|
|
115
|
-
# Show the full Docker command if there are environment variables
|
|
116
|
-
|
|
133
|
+
# Show the full Docker command if there are environment variables (from .env or args)
|
|
134
|
+
has_env_from_args = docker_args and any(
|
|
135
|
+
arg == "-e" or arg.startswith("--env") for arg in docker_args
|
|
136
|
+
)
|
|
137
|
+
has_env_from_file = bool(loaded_env_vars)
|
|
138
|
+
if has_env_from_args or has_env_from_file:
|
|
117
139
|
hud_console.info("")
|
|
118
140
|
hud_console.info("Docker command with environment variables:")
|
|
119
141
|
hud_console.info(" ".join(docker_cmd))
|
hud/cli/eval.py
CHANGED
|
@@ -298,16 +298,15 @@ async def run_single_task(
|
|
|
298
298
|
agent_config["allowed_tools"] = allowed_tools
|
|
299
299
|
|
|
300
300
|
# Run with grouping
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
)
|
|
301
|
+
stats = await run_tasks_grouped(
|
|
302
|
+
tasks=[task],
|
|
303
|
+
agent_class=agent_class,
|
|
304
|
+
agent_config=agent_config,
|
|
305
|
+
group_size=group_size,
|
|
306
|
+
max_parallel_episodes=48, # Same as RL default
|
|
307
|
+
max_steps=max_steps,
|
|
308
|
+
verbose=verbose,
|
|
309
|
+
)
|
|
311
310
|
|
|
312
311
|
# Display results
|
|
313
312
|
display_group_statistics(stats, show_details=True)
|
|
@@ -499,7 +498,7 @@ async def run_full_dataset(
|
|
|
499
498
|
)
|
|
500
499
|
|
|
501
500
|
# Display results
|
|
502
|
-
display_group_statistics(stats, show_details=len(stats) <=
|
|
501
|
+
display_group_statistics(stats, show_details=len(stats) <= 50)
|
|
503
502
|
|
|
504
503
|
# Return stats for consistency with other modes
|
|
505
504
|
return stats
|
hud/cli/flows/tasks.py
CHANGED
|
@@ -212,17 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
212
212
|
# Check if tasks already have remote URLs
|
|
213
213
|
already_remote = _validate_tasks(tasks)
|
|
214
214
|
|
|
215
|
-
# If tasks already reference a remote MCP URL, do not require a local environment
|
|
216
|
-
# or attempt any image updates. Use the dataset as-is.
|
|
217
|
-
if already_remote:
|
|
218
|
-
return str(tasks_path)
|
|
219
|
-
|
|
220
215
|
# Extract existing images from tasks
|
|
221
216
|
existing_images = _extract_existing_images(tasks)
|
|
222
217
|
|
|
223
218
|
# Locate environment
|
|
224
219
|
env_dir = find_environment_dir(tasks_path)
|
|
225
220
|
if not env_dir:
|
|
221
|
+
if already_remote:
|
|
222
|
+
return str(tasks_path)
|
|
226
223
|
hud_console.error("Could not locate an environment directory (Dockerfile + pyproject.toml)")
|
|
227
224
|
hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
|
|
228
225
|
raise typer.Exit(1)
|
|
@@ -373,6 +370,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
373
370
|
item["system_prompt"] = t.system_prompt
|
|
374
371
|
if t.metadata:
|
|
375
372
|
item["metadata"] = t.metadata
|
|
373
|
+
if t.id is not None:
|
|
374
|
+
item["id"] = t.id
|
|
376
375
|
|
|
377
376
|
tasks_payload.append(item)
|
|
378
377
|
|
hud/cli/rl/__init__.py
CHANGED
|
@@ -78,6 +78,11 @@ def rl_command(
|
|
|
78
78
|
"-y",
|
|
79
79
|
help="Auto-accept all prompts and use defaults (lazy mode)",
|
|
80
80
|
),
|
|
81
|
+
vllm_gpu_count: int = typer.Option(
|
|
82
|
+
None,
|
|
83
|
+
"--vllm-gpu-count",
|
|
84
|
+
help="Number of GPUs for vLLM server",
|
|
85
|
+
),
|
|
81
86
|
skip_vllm_startup: bool = typer.Option(
|
|
82
87
|
False,
|
|
83
88
|
"--skip-vllm-startup",
|
|
@@ -145,6 +150,7 @@ def rl_command(
|
|
|
145
150
|
model=model,
|
|
146
151
|
config_file=config_file,
|
|
147
152
|
output_dir=output_dir,
|
|
153
|
+
vllm_gpu_count=vllm_gpu_count,
|
|
148
154
|
yes=yes,
|
|
149
155
|
)
|
|
150
156
|
return
|
hud/cli/rl/config.py
CHANGED
|
@@ -84,7 +84,7 @@ def save_config(config: Config, path: Path) -> None:
|
|
|
84
84
|
"""Save configuration to a JSON file."""
|
|
85
85
|
config_dict = config.to_dict()
|
|
86
86
|
|
|
87
|
-
with open(path, "w") as f:
|
|
87
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
88
88
|
json.dump(config_dict, f, indent=2)
|
|
89
89
|
f.write("\n") # Add newline at end of file
|
|
90
90
|
|
|
@@ -94,7 +94,7 @@ def save_config(config: Config, path: Path) -> None:
|
|
|
94
94
|
|
|
95
95
|
def load_config(path: Path) -> Config:
|
|
96
96
|
"""Load configuration from a JSON file."""
|
|
97
|
-
with open(path) as f:
|
|
97
|
+
with open(path, encoding="utf-8") as f:
|
|
98
98
|
data = json.load(f)
|
|
99
99
|
|
|
100
100
|
# Use Config.from_dict which handles missing fields gracefully
|
hud/cli/rl/gpu_utils.py
CHANGED
|
@@ -245,10 +245,12 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
|
|
|
245
245
|
# Apply scaling rule
|
|
246
246
|
if num_gpus == 1:
|
|
247
247
|
# Special case: 2 groups for single GPU
|
|
248
|
+
groups_per_gpu = 2
|
|
248
249
|
config.training.batch_size = 2 * group_size
|
|
249
250
|
else:
|
|
250
|
-
|
|
251
|
-
|
|
251
|
+
groups_per_gpu = config.training.batch_size // group_size
|
|
252
|
+
# Multi-GPU: each GPU processes groups_per_gpu groups
|
|
253
|
+
config.training.batch_size = num_gpus * group_size * groups_per_gpu
|
|
252
254
|
|
|
253
255
|
# Update max_parallel_episodes to match
|
|
254
256
|
config.actor.max_parallel_episodes = config.training.batch_size
|
|
@@ -263,7 +265,7 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
|
|
|
263
265
|
f"\n[cyan]📊 Adjusted batch_size to {config.training.batch_size} ({config.training.batch_size // group_size} groups)[/cyan]" # noqa: E501
|
|
264
266
|
)
|
|
265
267
|
console.print(
|
|
266
|
-
f"[cyan] Each of the {num_gpus} GPU(s) will process {
|
|
268
|
+
f"[cyan] Each of the {num_gpus} GPU(s) will process {groups_per_gpu} group(s) in parallel[/cyan]" # noqa: E501
|
|
267
269
|
)
|
|
268
270
|
|
|
269
271
|
return config
|
hud/cli/rl/remote_runner.py
CHANGED
|
@@ -32,7 +32,9 @@ GPU_PRICING = {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def ensure_vllm_deployed(
|
|
35
|
+
def ensure_vllm_deployed(
|
|
36
|
+
model_name: str, gpu_type: str = "A100", gpu_count: int = 1, timeout: int = 600
|
|
37
|
+
) -> None:
|
|
36
38
|
"""Deploy vLLM for a model if needed and wait until it's ready.
|
|
37
39
|
|
|
38
40
|
Args:
|
|
@@ -47,7 +49,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
|
|
|
47
49
|
return
|
|
48
50
|
|
|
49
51
|
hud_console.info(f"Deploying vLLM server for {model_name}...")
|
|
50
|
-
rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
|
|
52
|
+
rl_api.deploy_vllm(model_name, gpu_type=gpu_type, gpu_count=gpu_count)
|
|
51
53
|
hud_console.success("vLLM deployment started")
|
|
52
54
|
|
|
53
55
|
hud_console.info("Waiting for vLLM server to be ready...")
|
|
@@ -72,6 +74,7 @@ def run_remote_training(
|
|
|
72
74
|
model: str | None,
|
|
73
75
|
config_file: Path | None,
|
|
74
76
|
output_dir: str,
|
|
77
|
+
vllm_gpu_count: int = 1,
|
|
75
78
|
yes: bool = False,
|
|
76
79
|
) -> None:
|
|
77
80
|
"""Run RL training remotely via the API server following the new interactive flow."""
|
|
@@ -183,14 +186,18 @@ def run_remote_training(
|
|
|
183
186
|
|
|
184
187
|
# Ask for model type
|
|
185
188
|
if yes:
|
|
186
|
-
|
|
189
|
+
if config_file:
|
|
190
|
+
config = load_config(config_file)
|
|
191
|
+
model_type = config.model.base_model
|
|
192
|
+
else:
|
|
193
|
+
model_type = "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
187
194
|
hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
|
|
188
195
|
else:
|
|
189
196
|
model_type = hud_console.select(
|
|
190
197
|
"Select base model type:",
|
|
191
198
|
choices=[
|
|
192
199
|
{"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
|
|
193
|
-
|
|
200
|
+
{"name": "Qwen2.5-3B-Instruct", "value": "Qwen/Qwen2.5-3B-Instruct"},
|
|
194
201
|
],
|
|
195
202
|
default=0,
|
|
196
203
|
)
|
|
@@ -218,7 +225,7 @@ def run_remote_training(
|
|
|
218
225
|
try:
|
|
219
226
|
rl_api.create_model(model_name, model_type)
|
|
220
227
|
hud_console.success(f"Created model: {model_name}")
|
|
221
|
-
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
228
|
+
ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
|
|
222
229
|
|
|
223
230
|
except Exception as e:
|
|
224
231
|
# If the name already exists, suggest a new name and prompt once
|
|
@@ -247,7 +254,7 @@ def run_remote_training(
|
|
|
247
254
|
rl_api.create_model(chosen, model_type)
|
|
248
255
|
hud_console.success(f"Created model: {chosen}")
|
|
249
256
|
model_name = chosen
|
|
250
|
-
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
257
|
+
ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
|
|
251
258
|
except Exception as e2:
|
|
252
259
|
hud_console.error(f"Failed to create model: {e2}")
|
|
253
260
|
raise
|
|
@@ -281,7 +288,7 @@ def run_remote_training(
|
|
|
281
288
|
return
|
|
282
289
|
|
|
283
290
|
# Ensure vLLM is deployed
|
|
284
|
-
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
291
|
+
ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
|
|
285
292
|
except KeyboardInterrupt:
|
|
286
293
|
hud_console.dim_info("Training cancelled", "")
|
|
287
294
|
return
|
|
@@ -323,7 +330,7 @@ def run_remote_training(
|
|
|
323
330
|
)
|
|
324
331
|
|
|
325
332
|
if yes:
|
|
326
|
-
num_gpus = 2
|
|
333
|
+
num_gpus = 2 # Default to 2 GPUs in yes mode
|
|
327
334
|
hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
|
|
328
335
|
else:
|
|
329
336
|
num_gpus = hud_console.select(
|
|
@@ -425,10 +432,12 @@ def run_remote_training(
|
|
|
425
432
|
# Load provided config
|
|
426
433
|
hud_console.info(f"Loading configuration from: {config_file}")
|
|
427
434
|
config = load_config(config_file)
|
|
428
|
-
config_dict = config.to_dict()
|
|
429
435
|
gpu_choice = config.training.gpu_type
|
|
430
436
|
num_gpus = config.training.num_gpus
|
|
431
437
|
|
|
438
|
+
config = adjust_config_for_ddp(config, int(num_gpus))
|
|
439
|
+
config_dict = config.to_dict()
|
|
440
|
+
|
|
432
441
|
# Launch training
|
|
433
442
|
try:
|
|
434
443
|
# Little celebration before launching
|
hud/cli/rl/rl_api.py
CHANGED
|
@@ -61,12 +61,12 @@ def list_models() -> list[RLModelInfo]:
|
|
|
61
61
|
]
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
|
|
64
|
+
def deploy_vllm(model_name: str, gpu_type: str = "A100", gpu_count: int = 1) -> dict[str, Any]:
|
|
65
65
|
"""Deploy a vLLM server for a model."""
|
|
66
66
|
return make_request_sync(
|
|
67
67
|
method="POST",
|
|
68
68
|
url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
|
|
69
|
-
json={"gpu_type": gpu_type},
|
|
69
|
+
json={"gpu_type": gpu_type, "gpu_count": gpu_count},
|
|
70
70
|
api_key=settings.api_key,
|
|
71
71
|
)
|
|
72
72
|
|
hud/cli/utils/environment.py
CHANGED
|
@@ -127,8 +127,4 @@ def is_environment_directory(path: str | Path) -> bool:
|
|
|
127
127
|
return False
|
|
128
128
|
|
|
129
129
|
# Must have pyproject.toml
|
|
130
|
-
|
|
131
|
-
hud_console.error("pyproject.toml not found")
|
|
132
|
-
return False
|
|
133
|
-
|
|
134
|
-
return True
|
|
130
|
+
return (dir_path / "pyproject.toml").exists()
|
hud/rl/config.py
CHANGED
|
@@ -13,6 +13,7 @@ SUPPORTED_MODELS = [
|
|
|
13
13
|
"Qwen/Qwen2.5-VL-32B-Instruct",
|
|
14
14
|
"Qwen/Qwen2.5-VL-72B-Instruct",
|
|
15
15
|
"Qwen/Qwen2.5-7B-Instruct",
|
|
16
|
+
"Qwen/Qwen2.5-3B-Instruct",
|
|
16
17
|
]
|
|
17
18
|
|
|
18
19
|
|
|
@@ -39,9 +40,9 @@ class ModelConfig:
|
|
|
39
40
|
"""Model and LoRA configuration."""
|
|
40
41
|
|
|
41
42
|
base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
42
|
-
lora_r: int =
|
|
43
|
-
lora_alpha: int =
|
|
44
|
-
lora_dropout: float = 0.
|
|
43
|
+
lora_r: int = 16
|
|
44
|
+
lora_alpha: int = 32
|
|
45
|
+
lora_dropout: float = 0.1
|
|
45
46
|
target_modules: tuple[str, ...] = (
|
|
46
47
|
"q_proj",
|
|
47
48
|
"k_proj",
|
|
@@ -61,6 +62,7 @@ class ModelConfig:
|
|
|
61
62
|
@dataclass
|
|
62
63
|
class TrainingConfig:
|
|
63
64
|
"""Training hyperparameters."""
|
|
65
|
+
|
|
64
66
|
# GPU parameters
|
|
65
67
|
gpu_type: str = "A100"
|
|
66
68
|
num_gpus: int = 2
|
|
@@ -71,9 +73,9 @@ class TrainingConfig:
|
|
|
71
73
|
save_every_batches: int = 1
|
|
72
74
|
|
|
73
75
|
# Batching parameters
|
|
74
|
-
epochs: int =
|
|
75
|
-
batch_size: int =
|
|
76
|
-
group_size: int =
|
|
76
|
+
epochs: int = 1
|
|
77
|
+
batch_size: int = 16
|
|
78
|
+
group_size: int = 8
|
|
77
79
|
mini_batch_size: int = 1
|
|
78
80
|
update_after_group: bool = True # Whether to update the policy after each task group
|
|
79
81
|
accumulate_over_minibatches: bool = False # Whether to accumulate over minibatches
|
|
@@ -84,7 +86,7 @@ class TrainingConfig:
|
|
|
84
86
|
leave_one_out: bool = True
|
|
85
87
|
|
|
86
88
|
# Replay buffer parameters
|
|
87
|
-
buffer_steps: int =
|
|
89
|
+
buffer_steps: int = 8
|
|
88
90
|
select_strategy: Literal["recent", "variance", "random"] = "variance"
|
|
89
91
|
|
|
90
92
|
# Aggregation parameters
|
|
@@ -92,8 +94,8 @@ class TrainingConfig:
|
|
|
92
94
|
token_agg: Literal["mean", "sum"] = "mean" # noqa: S105
|
|
93
95
|
|
|
94
96
|
# Regularization parameters
|
|
95
|
-
kl_beta: float = 0.
|
|
96
|
-
entropy_beta: float = 0.
|
|
97
|
+
kl_beta: float = 0.001
|
|
98
|
+
entropy_beta: float = 0.001
|
|
97
99
|
top_eps: float = 0.2
|
|
98
100
|
bottom_eps: float = 0.1
|
|
99
101
|
|
|
@@ -143,6 +145,7 @@ class Config:
|
|
|
143
145
|
job_id: str | None = None # Use existing job ID if provided
|
|
144
146
|
stats_interval: int = 1
|
|
145
147
|
verbose: bool = False
|
|
148
|
+
very_verbose: bool = False
|
|
146
149
|
|
|
147
150
|
# Paths
|
|
148
151
|
out_dir: str = "./checkpoints"
|
|
@@ -166,6 +169,7 @@ class Config:
|
|
|
166
169
|
job_id=d.get("job_id"),
|
|
167
170
|
stats_interval=d.get("stats_interval", 1),
|
|
168
171
|
verbose=d.get("verbose", False),
|
|
172
|
+
very_verbose=d.get("very_verbose", False),
|
|
169
173
|
out_dir=d.get("out_dir", "./checkpoints"),
|
|
170
174
|
adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
|
|
171
175
|
seed=d.get("seed", 1234),
|
|
@@ -181,6 +185,7 @@ class Config:
|
|
|
181
185
|
"job_id": self.job_id,
|
|
182
186
|
"stats_interval": self.stats_interval,
|
|
183
187
|
"verbose": self.verbose,
|
|
188
|
+
"very_verbose": self.very_verbose,
|
|
184
189
|
"out_dir": self.out_dir,
|
|
185
190
|
"adapter_prefix": self.adapter_prefix,
|
|
186
191
|
"seed": self.seed,
|
hud/rl/train.py
CHANGED
|
@@ -56,6 +56,10 @@ async def train(config: Config, tasks: list[Task]) -> None:
|
|
|
56
56
|
logging.basicConfig(level=logging.INFO)
|
|
57
57
|
# Remove httpx logger
|
|
58
58
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
59
|
+
if config.very_verbose:
|
|
60
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
61
|
+
# Remove httpx logger
|
|
62
|
+
logging.getLogger("httpx").setLevel(logging.INFO)
|
|
59
63
|
|
|
60
64
|
if is_main_process():
|
|
61
65
|
hud_console.header("Starting GRPO Training")
|
|
@@ -103,10 +107,9 @@ async def train(config: Config, tasks: list[Task]) -> None:
|
|
|
103
107
|
if is_main_process():
|
|
104
108
|
hud_console.info(f"Creating job with config.job_id: {config.job_id}")
|
|
105
109
|
job_obj = hud.create_job(
|
|
106
|
-
job_id=config.job_id,
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
110
|
+
job_id=config.job_id,
|
|
111
|
+
name=config.job_name,
|
|
112
|
+
metadata={"config": config.to_dict(), "agent_class": config.model.base_model},
|
|
110
113
|
)
|
|
111
114
|
hud_console.info(f"Created job with job_obj.id: {job_obj.id}")
|
|
112
115
|
job_obj.update_status_sync("running")
|
|
@@ -299,7 +302,7 @@ async def main() -> None:
|
|
|
299
302
|
|
|
300
303
|
# Load config
|
|
301
304
|
if args.config:
|
|
302
|
-
with open(args.config) as f: # noqa: ASYNC230
|
|
305
|
+
with open(args.config, encoding="utf-8") as f: # noqa: ASYNC230
|
|
303
306
|
config_dict = json.load(f)
|
|
304
307
|
config = Config.from_dict(config_dict)
|
|
305
308
|
else:
|
|
@@ -337,7 +340,7 @@ async def main() -> None:
|
|
|
337
340
|
# Load tasks
|
|
338
341
|
if args.tasks_json:
|
|
339
342
|
# Tasks provided as JSON list via command line
|
|
340
|
-
tasks = load_tasks(args.
|
|
343
|
+
tasks = load_tasks(args.tasks_json)
|
|
341
344
|
elif args.tasks:
|
|
342
345
|
# Tasks provided as file path or HuggingFace dataset
|
|
343
346
|
tasks = load_tasks(args.tasks)
|
hud/utils/group_eval.py
CHANGED
|
@@ -189,7 +189,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
|
|
|
189
189
|
hud_console.info(f"Overall mean reward: {overall_mean:.3f} ± {overall_std:.3f}")
|
|
190
190
|
|
|
191
191
|
# Detailed table
|
|
192
|
-
if show_details and len(stats) <=
|
|
192
|
+
if show_details and len(stats) <= 50: # Only show for reasonable dataset sizes
|
|
193
193
|
table = Table(title="\nPer-Task Performance Distribution")
|
|
194
194
|
table.add_column("Task", style="cyan", no_wrap=True)
|
|
195
195
|
table.add_column("Mean±Std", justify="right", style="green")
|
|
@@ -216,7 +216,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
|
|
|
216
216
|
# High variance tasks
|
|
217
217
|
high_variance_tasks = [s for s in stats if s["std_reward"] > 0.3 and s["group_size"] > 1]
|
|
218
218
|
if high_variance_tasks:
|
|
219
|
-
hud_console.warning(f"\n
|
|
219
|
+
hud_console.warning(f"\n{len(high_variance_tasks)} tasks show high variance (std > 0.3)")
|
|
220
220
|
for task in high_variance_tasks[:3]:
|
|
221
221
|
hud_console.info(
|
|
222
222
|
f" • {task['task_id']}: μ={task['mean_reward']:.3f}, σ={task['std_reward']:.3f}" # noqa: RUF001
|
hud/utils/tasks.py
CHANGED
|
@@ -40,7 +40,7 @@ def load_tasks(tasks_input: str | list[dict], *, raw: bool = False) -> list[Task
|
|
|
40
40
|
if Path(tasks_input).exists():
|
|
41
41
|
file_path = Path(tasks_input)
|
|
42
42
|
|
|
43
|
-
with open(file_path) as f:
|
|
43
|
+
with open(file_path, encoding="utf-8") as f:
|
|
44
44
|
# Handle JSON files (array of tasks)
|
|
45
45
|
if file_path.suffix.lower() == ".json":
|
|
46
46
|
data = json.load(f)
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED
|
@@ -2,7 +2,7 @@ hud/__init__.py,sha256=JMDFUE1pP0J1Xl_miBdt7ERvoffZmTzSFe8yxz512A8,552
|
|
|
2
2
|
hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
|
|
3
3
|
hud/settings.py,sha256=disObWa-DgXzoDcCDp3y1dTPaNsbR0IvoMJL9Eg4zyo,3947
|
|
4
4
|
hud/types.py,sha256=pmPj_8emfMIfEY_fRS8NgIJ56kCsolWSqQjyCzXDaGY,11072
|
|
5
|
-
hud/version.py,sha256=
|
|
5
|
+
hud/version.py,sha256=AekBbsq3gM3fHm0EOZE1KbYDAcZYlNagJ4ps3KU-byo,105
|
|
6
6
|
hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
|
|
7
7
|
hud/agents/base.py,sha256=_u1zR3gXzZ1RlTCUYdMcvgHqdJBC4-AB1lZt0yBx8lg,35406
|
|
8
8
|
hud/agents/claude.py,sha256=TGhm5gE2ltINDAdEsDxKuT9iGMQ5G87R6kmabU3KPt8,16101
|
|
@@ -10,7 +10,7 @@ hud/agents/grounded_openai.py,sha256=U-FHjB2Nh1_o0gmlxY5F17lWJ3oHsNRIB2a7z-IKB64
|
|
|
10
10
|
hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
|
|
11
11
|
hud/agents/lite_llm.py,sha256=_3wbUiYCp7q8Vyu9rhaoJDvmb_bsyUsLYWP3iQJ2bHo,2239
|
|
12
12
|
hud/agents/openai.py,sha256=O1xV1h1l-W8lmnmXqTYr5CwnmnaniMqOxAZbl2CTTng,14576
|
|
13
|
-
hud/agents/openai_chat_generic.py,sha256=
|
|
13
|
+
hud/agents/openai_chat_generic.py,sha256=_vAID9dZ_UxL0elYwafskRcsdrSsLsxJ4zPrP58oBiw,12151
|
|
14
14
|
hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
|
|
15
15
|
hud/agents/misc/response_agent.py,sha256=uMuRDkz5QgaMQliNzBRepond5sb7KyqIiKm3LstjVnw,3753
|
|
16
16
|
hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
|
|
@@ -19,14 +19,14 @@ hud/agents/tests/test_claude.py,sha256=0nZnfsbGoECvsLPdmaRnc9jVmrehVvc3kxeyiCQI2
|
|
|
19
19
|
hud/agents/tests/test_client.py,sha256=uikgh6yhjPPX2RBU4XJQMz1mNox9uXjuwsP8t93id18,13337
|
|
20
20
|
hud/agents/tests/test_grounded_openai_agent.py,sha256=VK8lUvHIjWicMX00VKPE-FZyjiJqTEhb80MuRRa9fVc,5437
|
|
21
21
|
hud/agents/tests/test_openai.py,sha256=Npbdr0acgLExGLbrleXze-k3w9LHfmqzQjPk9TnjN68,7620
|
|
22
|
-
hud/cli/__init__.py,sha256=
|
|
22
|
+
hud/cli/__init__.py,sha256=lwyaA7z7H4BOt9ksySpT0AnRERoYEiVgUdwV_5s9wIg,45768
|
|
23
23
|
hud/cli/__main__.py,sha256=fDH7XITyuDITwSDIVwRso06aouADO0CzTHKqp5TOwJE,143
|
|
24
24
|
hud/cli/analyze.py,sha256=4u5oYfJMquOjT9PzzRTYVcTZDxDi0ilNP_g532_hpOU,14716
|
|
25
25
|
hud/cli/build.py,sha256=h-4SAoe3j8Pth3mPYf26vh7q1Do5JADlvKKwkZrf2AU,19551
|
|
26
26
|
hud/cli/clone.py,sha256=AwVDIuhr8mHb1oT2Af2HrD25SiTdwATpE6zd93vzLgA,6099
|
|
27
27
|
hud/cli/debug.py,sha256=jtFW8J5F_3rhq1Hf1_SkJ7aLS3wjnyIs_LsC8k5cnzc,14200
|
|
28
|
-
hud/cli/dev.py,sha256=
|
|
29
|
-
hud/cli/eval.py,sha256=
|
|
28
|
+
hud/cli/dev.py,sha256=2zUeVz5S__WrV-DLSDqOlQawcJS7eYPKiDRVUaJ8mAk,31579
|
|
29
|
+
hud/cli/eval.py,sha256=zoRC9ExxrsOEj3myTUz_72LVSnFF557lS1aJfhQ9kHg,25681
|
|
30
30
|
hud/cli/get.py,sha256=sksKrdzBGZa7ZuSoQkc0haj-CvOGVSSikoVXeaUd3N4,6274
|
|
31
31
|
hud/cli/init.py,sha256=YkWxkIDCnhnxGGpbm7IvYMcfDqWuO1X9wxDxE4k-9ew,9721
|
|
32
32
|
hud/cli/list_func.py,sha256=EVi2Vc3Lb3glBNJxFx4MPnZknZ4xmuJz1OFg_dc8a_E,7177
|
|
@@ -34,17 +34,17 @@ hud/cli/pull.py,sha256=XGEZ8n60tbzLQP_8d9h7XYmzyCW0e2-Rkr3_tLG7jvw,12449
|
|
|
34
34
|
hud/cli/push.py,sha256=DsXFrMtWBZ-HUxt6VoLihpklk8JJIe2gy-GA4AMg6Kw,18805
|
|
35
35
|
hud/cli/remove.py,sha256=8vGQyXDqgtjz85_vtusoIG8zurH4RHz6z8UMevQRYM4,6861
|
|
36
36
|
hud/cli/flows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
hud/cli/flows/tasks.py,sha256=
|
|
38
|
-
hud/cli/rl/__init__.py,sha256=
|
|
37
|
+
hud/cli/flows/tasks.py,sha256=z3qdn6MBLJRK7dvfp3IFkdCzuImKRps7jbcedmCTjDA,14485
|
|
38
|
+
hud/cli/rl/__init__.py,sha256=pGx4WGaL-yGdogJNzgEE7BtjFtT4I9CTI_UdCm49h98,5376
|
|
39
39
|
hud/cli/rl/celebrate.py,sha256=trGEJn3xebexlHwFVKPJKhRujVVV8sy7TQTJvRd2p9A,5947
|
|
40
|
-
hud/cli/rl/config.py,sha256=
|
|
40
|
+
hud/cli/rl/config.py,sha256=A-4WWwAS68GRKx1cP_DJ-NZD_96cFNnGwx0P3pQT1ps,3271
|
|
41
41
|
hud/cli/rl/display.py,sha256=hqJVGmO9csYinladhZwjF-GMvppYWngxDHajTyIJ_gM,5214
|
|
42
42
|
hud/cli/rl/gpu.py,sha256=peXS-NdUF5RyuSs0aZoCzGLboneBUpCy8f9f99WMrG0,2009
|
|
43
|
-
hud/cli/rl/gpu_utils.py,sha256=
|
|
43
|
+
hud/cli/rl/gpu_utils.py,sha256=VSdEWJDH-P9LjRZscQXPju5vB3FomP4Iy2znPcpUZc4,11199
|
|
44
44
|
hud/cli/rl/local_runner.py,sha256=NFsNmRZ4nenPnb45ZtdsILeICKEq11wmpLwq9E-a8ZE,22614
|
|
45
45
|
hud/cli/rl/presets.py,sha256=DzOO82xL5QyzdVtlX-Do1CODMvDz9ILMPapjU92jcZg,3051
|
|
46
|
-
hud/cli/rl/remote_runner.py,sha256=
|
|
47
|
-
hud/cli/rl/rl_api.py,sha256=
|
|
46
|
+
hud/cli/rl/remote_runner.py,sha256=fKmOVKSBUWfakunfe9-HAllpUJDxfRNZwL00fPw-QTI,17837
|
|
47
|
+
hud/cli/rl/rl_api.py,sha256=fvRMWQXhTSLM2zQaWWDas_u47RreH8erLgtXRKnQGeA,4350
|
|
48
48
|
hud/cli/rl/viewer.py,sha256=ExQs1IX3T8x_9aBzc4JojZ779jmFvFTh7EjOYIHzYsU,4441
|
|
49
49
|
hud/cli/rl/vllm.py,sha256=Gq_M6KsQArGz7FNIdemuM5mk16mu3xe8abpO2GCCuOE,6093
|
|
50
50
|
hud/cli/rl/wait_utils.py,sha256=FyIvqYWLOydANTetukoE5Rp2AOQi67qkiAlIJp4HpL8,2577
|
|
@@ -69,7 +69,7 @@ hud/cli/utils/config.py,sha256=AnsN6FEa8V3jg3EWaqUJN38-UuYC6tVZxPfBb_5LFBs,2652
|
|
|
69
69
|
hud/cli/utils/cursor.py,sha256=fy850p0rVp5k_1wwOCI7rK1SggbselJrywFInSQ2gio,3009
|
|
70
70
|
hud/cli/utils/docker.py,sha256=oGVzPfp0Rn89o9d6tgSEziKy9GXFrYaWn_mjBmGRHe4,6326
|
|
71
71
|
hud/cli/utils/env_check.py,sha256=TqsmwgTfMDzfP0Ii50YxDkOP4_T5nqks9JMTxIq60-s,7095
|
|
72
|
-
hud/cli/utils/environment.py,sha256=
|
|
72
|
+
hud/cli/utils/environment.py,sha256=cxsNwCfwX2PtCHht9xH_Yo5jpcqANf7h0wa3gfiy5tY,4278
|
|
73
73
|
hud/cli/utils/interactive.py,sha256=sHhTjaImxlwlZ5_DTXb23Jwrjy5oJ7diB-8duhHbImU,16647
|
|
74
74
|
hud/cli/utils/local_runner.py,sha256=jnPFoJu3sCq65LSUapKCkakdlEuz__96oJU_FfOYtEg,6542
|
|
75
75
|
hud/cli/utils/logging.py,sha256=DyOWuzZUg6HeKCqfs6ufb703XS3bW4G2pzaXVAvDqvA,9018
|
|
@@ -120,10 +120,10 @@ hud/rl/__init__.py,sha256=yYL7U1WV6L3mr3Hig48-4lhnryTaWj4nCXm4lG5vrYI,25
|
|
|
120
120
|
hud/rl/actor.py,sha256=H6gwRGRY1YpkOyiaJ9yai8yQwcI-Gx0dFxd18jpLx_Q,6950
|
|
121
121
|
hud/rl/buffer.py,sha256=z47HOjOBJx3umUzzUfdtq_N4ZoJ8FMBPkX8YQKBtd3A,15457
|
|
122
122
|
hud/rl/chat_template.jinja,sha256=XTdzI8oFGEcSA-exKxyHaprwRDmX5Am1KEb0VxvUc6U,4965
|
|
123
|
-
hud/rl/config.py,sha256=
|
|
123
|
+
hud/rl/config.py,sha256=akQ2a53NX3Dh1UWgMyw7mTxq33eiQbZcBpmKTzd79Xk,5624
|
|
124
124
|
hud/rl/distributed.py,sha256=8avhrb0lHYkhW22Z7MfkqSnlczWj5jMrUMEtkcoCf74,2473
|
|
125
125
|
hud/rl/learner.py,sha256=FKIgIIghsNiDr_g090xokOO_BxNmTSj1O-TSJzIq_Uw,24703
|
|
126
|
-
hud/rl/train.py,sha256=
|
|
126
|
+
hud/rl/train.py,sha256=hmobsaGp5UwK8u9oJGFdxCfI7rrxM-XbeVy-TkzQwxU,13804
|
|
127
127
|
hud/rl/types.py,sha256=lrLKo7iaqodYth2EyeuOQfLiuzXfYM2eJjPmpObrD7c,3965
|
|
128
128
|
hud/rl/utils.py,sha256=IsgVUUibxnUzb32a4mu1sYrgJC1CwoG9E-Dd5y5VDOA,19115
|
|
129
129
|
hud/rl/vllm_adapter.py,sha256=2wnTfoXPI4C9EzhVxk0GU-ArLjX7hgXS0BndMwN8Ppg,4751
|
|
@@ -204,12 +204,12 @@ hud/tools/tests/test_utils.py,sha256=qaujM1uyTMaKqWIeEgxty5GOFyfSUtrYCEHhmIazoy4
|
|
|
204
204
|
hud/utils/__init__.py,sha256=nk9Re6ls2RudAWnAHDWYbLG28AwNF4qMFYf5xQIJhQA,181
|
|
205
205
|
hud/utils/agent_factories.py,sha256=cvfXByqG6gOYHtm1VGeJjCpxoLxM4aJez8rH-AerP_A,3186
|
|
206
206
|
hud/utils/async_utils.py,sha256=5cKrJcnaHV2eJNxeyx0r7fPcdPTDBK7kM9-nLaF51X4,2409
|
|
207
|
-
hud/utils/group_eval.py,sha256=
|
|
207
|
+
hud/utils/group_eval.py,sha256=6yXEH8ZRKkR4bBy9-QWGmjlm2IbCnTUZppEFbjTvndY,8352
|
|
208
208
|
hud/utils/hud_console.py,sha256=ywTrzyNhWFoQN2PpzpDDKp_32b-ACDvfKQuWxDoF8iE,21898
|
|
209
209
|
hud/utils/mcp.py,sha256=pMadd7A0DH6Y_aWywKU8jVYu2pRHGPEndV2ZQFrrj60,2888
|
|
210
210
|
hud/utils/pretty_errors.py,sha256=WGeL4CTHtlA6KgPuV_JSX5l6H4-xbuTp6Y6tw1bkiFg,2430
|
|
211
211
|
hud/utils/progress.py,sha256=suikwFM8sdSfkV10nAOEaInDhG4XKgOSvFePg4jSj1A,5927
|
|
212
|
-
hud/utils/tasks.py,sha256=
|
|
212
|
+
hud/utils/tasks.py,sha256=7i36ck84gz1GZxhn9jryMBvKgMmcvLVu1YH5n3Y23-c,4985
|
|
213
213
|
hud/utils/telemetry.py,sha256=hrVIx2rUjSGyy9IVxTZ_3Jii83PiHjyFRd5ls2whimM,1863
|
|
214
214
|
hud/utils/tool_shorthand.py,sha256=_haLgK3yazLR2Y0jlEHUUQjw9uZCxi9yTipAwdOAJ70,2148
|
|
215
215
|
hud/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -218,10 +218,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
|
|
|
218
218
|
hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
|
|
219
219
|
hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
|
|
220
220
|
hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
|
|
221
|
-
hud/utils/tests/test_version.py,sha256=
|
|
221
|
+
hud/utils/tests/test_version.py,sha256=5Kh7gFTdzSIvPh8KSkpP9Rq-4a4rJchabweQncIcYHQ,160
|
|
222
222
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
223
|
-
hud_python-0.4.
|
|
224
|
-
hud_python-0.4.
|
|
225
|
-
hud_python-0.4.
|
|
226
|
-
hud_python-0.4.
|
|
227
|
-
hud_python-0.4.
|
|
223
|
+
hud_python-0.4.43.dist-info/METADATA,sha256=qTNrSt6NhfZR1_KzmtIGgZXbvAUZBlsh1xp_1JZMZaU,22275
|
|
224
|
+
hud_python-0.4.43.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
225
|
+
hud_python-0.4.43.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
|
|
226
|
+
hud_python-0.4.43.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
|
|
227
|
+
hud_python-0.4.43.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|