hud-python 0.4.41__py3-none-any.whl → 0.4.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

@@ -205,7 +205,7 @@ class GenericOpenAIChatAgent(MCPAgent):
205
205
  try:
206
206
  response = await self._invoke_chat_completion(
207
207
  messages=messages,
208
- tools=tools, # type: ignore
208
+ tools=tools, # type: ignore
209
209
  extra=extra,
210
210
  )
211
211
  except Exception as e:
hud/cli/__init__.py CHANGED
@@ -1178,6 +1178,11 @@ def rl(
1178
1178
  "--vllm-gpu",
1179
1179
  help="Specific GPU for vLLM server",
1180
1180
  ),
1181
+ vllm_gpu_count: int = typer.Option(
1182
+ 1,
1183
+ "--vllm-gpu-count",
1184
+ help="Number of GPUs for vLLM server",
1185
+ ),
1181
1186
  skip_vllm_startup: bool = typer.Option(
1182
1187
  False,
1183
1188
  "--skip-vllm-startup",
@@ -1199,6 +1204,7 @@ def rl(
1199
1204
  no_ddp=no_ddp,
1200
1205
  ddp_gpus=ddp_gpus,
1201
1206
  vllm_gpu=vllm_gpu,
1207
+ vllm_gpu_count=vllm_gpu_count,
1202
1208
  yes=yes,
1203
1209
  skip_vllm_startup=skip_vllm_startup,
1204
1210
  )
hud/cli/dev.py CHANGED
@@ -73,6 +73,24 @@ def create_proxy_server(
73
73
  "PYTHONUNBUFFERED=1", # Ensure Python output is not buffered
74
74
  ]
75
75
 
76
+ # Check for .env file in the project directory and add env vars
77
+ env_file = project_path / ".env"
78
+ loaded_env_vars = {}
79
+ if env_file.exists():
80
+ try:
81
+ from hud.cli.utils.config import parse_env_file
82
+
83
+ env_contents = env_file.read_text(encoding="utf-8")
84
+ loaded_env_vars = parse_env_file(env_contents)
85
+ for key, value in loaded_env_vars.items():
86
+ docker_cmd.extend(["-e", f"{key}={value}"])
87
+ if verbose and loaded_env_vars:
88
+ hud_console.info(
89
+ f"Loaded {len(loaded_env_vars)} environment variable(s) from .env file"
90
+ )
91
+ except Exception as e:
92
+ hud_console.warning(f"Failed to load .env file: {e}")
93
+
76
94
  # Add user-provided Docker arguments
77
95
  if docker_args:
78
96
  docker_cmd.extend(docker_args)
@@ -112,8 +130,12 @@ def create_proxy_server(
112
130
  hud_console.info("The container's CMD determines reload behavior")
113
131
  hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
114
132
 
115
- # Show the full Docker command if there are environment variables
116
- if docker_args and any(arg == "-e" or arg.startswith("--env") for arg in docker_args):
133
+ # Show the full Docker command if there are environment variables (from .env or args)
134
+ has_env_from_args = docker_args and any(
135
+ arg == "-e" or arg.startswith("--env") for arg in docker_args
136
+ )
137
+ has_env_from_file = bool(loaded_env_vars)
138
+ if has_env_from_args or has_env_from_file:
117
139
  hud_console.info("")
118
140
  hud_console.info("Docker command with environment variables:")
119
141
  hud_console.info(" ".join(docker_cmd))
hud/cli/eval.py CHANGED
@@ -298,16 +298,15 @@ async def run_single_task(
298
298
  agent_config["allowed_tools"] = allowed_tools
299
299
 
300
300
  # Run with grouping
301
- with hud.trace(name=f"{task_prompt} (group_size={group_size})"):
302
- stats = await run_tasks_grouped(
303
- tasks=[task],
304
- agent_class=agent_class,
305
- agent_config=agent_config,
306
- group_size=group_size,
307
- max_parallel_episodes=48, # Same as RL default
308
- max_steps=max_steps,
309
- verbose=verbose,
310
- )
301
+ stats = await run_tasks_grouped(
302
+ tasks=[task],
303
+ agent_class=agent_class,
304
+ agent_config=agent_config,
305
+ group_size=group_size,
306
+ max_parallel_episodes=48, # Same as RL default
307
+ max_steps=max_steps,
308
+ verbose=verbose,
309
+ )
311
310
 
312
311
  # Display results
313
312
  display_group_statistics(stats, show_details=True)
@@ -499,7 +498,7 @@ async def run_full_dataset(
499
498
  )
500
499
 
501
500
  # Display results
502
- display_group_statistics(stats, show_details=len(stats) <= 20)
501
+ display_group_statistics(stats, show_details=len(stats) <= 50)
503
502
 
504
503
  # Return stats for consistency with other modes
505
504
  return stats
hud/cli/flows/tasks.py CHANGED
@@ -212,17 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
212
212
  # Check if tasks already have remote URLs
213
213
  already_remote = _validate_tasks(tasks)
214
214
 
215
- # If tasks already reference a remote MCP URL, do not require a local environment
216
- # or attempt any image updates. Use the dataset as-is.
217
- if already_remote:
218
- return str(tasks_path)
219
-
220
215
  # Extract existing images from tasks
221
216
  existing_images = _extract_existing_images(tasks)
222
217
 
223
218
  # Locate environment
224
219
  env_dir = find_environment_dir(tasks_path)
225
220
  if not env_dir:
221
+ if already_remote:
222
+ return str(tasks_path)
226
223
  hud_console.error("Could not locate an environment directory (Dockerfile + pyproject.toml)")
227
224
  hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
228
225
  raise typer.Exit(1)
@@ -373,6 +370,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
373
370
  item["system_prompt"] = t.system_prompt
374
371
  if t.metadata:
375
372
  item["metadata"] = t.metadata
373
+ if t.id is not None:
374
+ item["id"] = t.id
376
375
 
377
376
  tasks_payload.append(item)
378
377
 
hud/cli/rl/__init__.py CHANGED
@@ -78,6 +78,11 @@ def rl_command(
78
78
  "-y",
79
79
  help="Auto-accept all prompts and use defaults (lazy mode)",
80
80
  ),
81
+ vllm_gpu_count: int = typer.Option(
82
+ None,
83
+ "--vllm-gpu-count",
84
+ help="Number of GPUs for vLLM server",
85
+ ),
81
86
  skip_vllm_startup: bool = typer.Option(
82
87
  False,
83
88
  "--skip-vllm-startup",
@@ -145,6 +150,7 @@ def rl_command(
145
150
  model=model,
146
151
  config_file=config_file,
147
152
  output_dir=output_dir,
153
+ vllm_gpu_count=vllm_gpu_count,
148
154
  yes=yes,
149
155
  )
150
156
  return
hud/cli/rl/config.py CHANGED
@@ -84,7 +84,7 @@ def save_config(config: Config, path: Path) -> None:
84
84
  """Save configuration to a JSON file."""
85
85
  config_dict = config.to_dict()
86
86
 
87
- with open(path, "w") as f:
87
+ with open(path, "w", encoding="utf-8") as f:
88
88
  json.dump(config_dict, f, indent=2)
89
89
  f.write("\n") # Add newline at end of file
90
90
 
@@ -94,7 +94,7 @@ def save_config(config: Config, path: Path) -> None:
94
94
 
95
95
  def load_config(path: Path) -> Config:
96
96
  """Load configuration from a JSON file."""
97
- with open(path) as f:
97
+ with open(path, encoding="utf-8") as f:
98
98
  data = json.load(f)
99
99
 
100
100
  # Use Config.from_dict which handles missing fields gracefully
hud/cli/rl/gpu_utils.py CHANGED
@@ -245,14 +245,18 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
245
245
  # Apply scaling rule
246
246
  if num_gpus == 1:
247
247
  # Special case: 2 groups for single GPU
248
+ groups_per_gpu = 2
248
249
  config.training.batch_size = 2 * group_size
249
250
  else:
250
- # Multi-GPU: each GPU processes 1 group
251
- config.training.batch_size = num_gpus * group_size
251
+ groups_per_gpu = config.training.batch_size // group_size
252
+ # Multi-GPU: each GPU processes groups_per_gpu groups
253
+ config.training.batch_size = num_gpus * group_size * groups_per_gpu
252
254
 
253
255
  # Update max_parallel_episodes to match
254
256
  config.actor.max_parallel_episodes = config.training.batch_size
255
257
 
258
+ config.training.num_gpus = num_gpus
259
+
256
260
  # Log the adjustment
257
261
  from rich.console import Console
258
262
 
@@ -261,7 +265,7 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
261
265
  f"\n[cyan]📊 Adjusted batch_size to {config.training.batch_size} ({config.training.batch_size // group_size} groups)[/cyan]" # noqa: E501
262
266
  )
263
267
  console.print(
264
- f"[cyan] Each of the {num_gpus} GPU(s) will process {config.training.batch_size // group_size // num_gpus} group(s) in parallel[/cyan]" # noqa: E501
268
+ f"[cyan] Each of the {num_gpus} GPU(s) will process {groups_per_gpu} group(s) in parallel[/cyan]" # noqa: E501
265
269
  )
266
270
 
267
271
  return config
@@ -32,7 +32,9 @@ GPU_PRICING = {
32
32
  }
33
33
 
34
34
 
35
- def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int = 600) -> None:
35
+ def ensure_vllm_deployed(
36
+ model_name: str, gpu_type: str = "A100", gpu_count: int = 1, timeout: int = 600
37
+ ) -> None:
36
38
  """Deploy vLLM for a model if needed and wait until it's ready.
37
39
 
38
40
  Args:
@@ -47,7 +49,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
47
49
  return
48
50
 
49
51
  hud_console.info(f"Deploying vLLM server for {model_name}...")
50
- rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
52
+ rl_api.deploy_vllm(model_name, gpu_type=gpu_type, gpu_count=gpu_count)
51
53
  hud_console.success("vLLM deployment started")
52
54
 
53
55
  hud_console.info("Waiting for vLLM server to be ready...")
@@ -72,6 +74,7 @@ def run_remote_training(
72
74
  model: str | None,
73
75
  config_file: Path | None,
74
76
  output_dir: str,
77
+ vllm_gpu_count: int = 1,
75
78
  yes: bool = False,
76
79
  ) -> None:
77
80
  """Run RL training remotely via the API server following the new interactive flow."""
@@ -183,14 +186,18 @@ def run_remote_training(
183
186
 
184
187
  # Ask for model type
185
188
  if yes:
186
- model_type = "Qwen/Qwen2.5-VL-3B-Instruct" # Default model in yes mode
189
+ if config_file:
190
+ config = load_config(config_file)
191
+ model_type = config.model.base_model
192
+ else:
193
+ model_type = "Qwen/Qwen2.5-VL-3B-Instruct"
187
194
  hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
188
195
  else:
189
196
  model_type = hud_console.select(
190
197
  "Select base model type:",
191
198
  choices=[
192
199
  {"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
193
- # {"name": "Qwen2.5-VL-7B-Instruct", "value": "Qwen/Qwen2.5-VL-7B-Instruct"}, # noqa: E501
200
+ {"name": "Qwen2.5-3B-Instruct", "value": "Qwen/Qwen2.5-3B-Instruct"},
194
201
  ],
195
202
  default=0,
196
203
  )
@@ -218,7 +225,7 @@ def run_remote_training(
218
225
  try:
219
226
  rl_api.create_model(model_name, model_type)
220
227
  hud_console.success(f"Created model: {model_name}")
221
- ensure_vllm_deployed(model_name, gpu_type="A100")
228
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
222
229
 
223
230
  except Exception as e:
224
231
  # If the name already exists, suggest a new name and prompt once
@@ -247,7 +254,7 @@ def run_remote_training(
247
254
  rl_api.create_model(chosen, model_type)
248
255
  hud_console.success(f"Created model: {chosen}")
249
256
  model_name = chosen
250
- ensure_vllm_deployed(model_name, gpu_type="A100")
257
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
251
258
  except Exception as e2:
252
259
  hud_console.error(f"Failed to create model: {e2}")
253
260
  raise
@@ -281,7 +288,7 @@ def run_remote_training(
281
288
  return
282
289
 
283
290
  # Ensure vLLM is deployed
284
- ensure_vllm_deployed(model_name, gpu_type="A100")
291
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
285
292
  except KeyboardInterrupt:
286
293
  hud_console.dim_info("Training cancelled", "")
287
294
  return
@@ -310,7 +317,7 @@ def run_remote_training(
310
317
  # console.print(gpu_table)
311
318
 
312
319
  if yes:
313
- gpu_choice = "A100" # Default GPU in yes mode
320
+ gpu_choice = "A100"
314
321
  hud_console.info(f"Auto-selecting GPU: {gpu_choice} 80GB (--yes mode)")
315
322
  else:
316
323
  gpu_choice = hud_console.select(
@@ -323,7 +330,7 @@ def run_remote_training(
323
330
  )
324
331
 
325
332
  if yes:
326
- num_gpus = 1 # Default to 1 GPU in yes mode
333
+ num_gpus = 2 # Default to 2 GPUs in yes mode
327
334
  hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
328
335
  else:
329
336
  num_gpus = hud_console.select(
@@ -350,6 +357,8 @@ def run_remote_training(
350
357
 
351
358
  config = adjust_config_for_ddp(config, int(num_gpus))
352
359
 
360
+ config.training.gpu_type = gpu_choice
361
+
353
362
  # Use a short label for tasks (avoid full absolute paths)
354
363
  try:
355
364
  if tasks_file and Path(tasks_file).exists():
@@ -423,9 +432,11 @@ def run_remote_training(
423
432
  # Load provided config
424
433
  hud_console.info(f"Loading configuration from: {config_file}")
425
434
  config = load_config(config_file)
435
+ gpu_choice = config.training.gpu_type
436
+ num_gpus = config.training.num_gpus
437
+
438
+ config = adjust_config_for_ddp(config, int(num_gpus))
426
439
  config_dict = config.to_dict()
427
- gpu_choice = "A100" # Default
428
- num_gpus = 1 # Default for non-interactive mode
429
440
 
430
441
  # Launch training
431
442
  try:
hud/cli/rl/rl_api.py CHANGED
@@ -61,12 +61,12 @@ def list_models() -> list[RLModelInfo]:
61
61
  ]
62
62
 
63
63
 
64
- def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
64
+ def deploy_vllm(model_name: str, gpu_type: str = "A100", gpu_count: int = 1) -> dict[str, Any]:
65
65
  """Deploy a vLLM server for a model."""
66
66
  return make_request_sync(
67
67
  method="POST",
68
68
  url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
69
- json={"gpu_type": gpu_type},
69
+ json={"gpu_type": gpu_type, "gpu_count": gpu_count},
70
70
  api_key=settings.api_key,
71
71
  )
72
72
 
@@ -127,8 +127,4 @@ def is_environment_directory(path: str | Path) -> bool:
127
127
  return False
128
128
 
129
129
  # Must have pyproject.toml
130
- if not (dir_path / "pyproject.toml").exists():
131
- hud_console.error("pyproject.toml not found")
132
- return False
133
-
134
- return True
130
+ return (dir_path / "pyproject.toml").exists()
hud/rl/actor.py CHANGED
@@ -109,7 +109,7 @@ class Actor:
109
109
 
110
110
  # Run the task
111
111
  try:
112
- with hud.trace(f"Training | {task.id}", job_id=job_id):
112
+ with hud.trace(f"Training | {task.prompt}", job_id=job_id):
113
113
  result = await agent.run(task, max_steps=self.actor_config.max_steps_per_episode)
114
114
 
115
115
  except Exception:
hud/rl/config.py CHANGED
@@ -13,6 +13,7 @@ SUPPORTED_MODELS = [
13
13
  "Qwen/Qwen2.5-VL-32B-Instruct",
14
14
  "Qwen/Qwen2.5-VL-72B-Instruct",
15
15
  "Qwen/Qwen2.5-7B-Instruct",
16
+ "Qwen/Qwen2.5-3B-Instruct",
16
17
  ]
17
18
 
18
19
 
@@ -39,9 +40,9 @@ class ModelConfig:
39
40
  """Model and LoRA configuration."""
40
41
 
41
42
  base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
42
- lora_r: int = 8
43
- lora_alpha: int = 16
44
- lora_dropout: float = 0.05
43
+ lora_r: int = 16
44
+ lora_alpha: int = 32
45
+ lora_dropout: float = 0.1
45
46
  target_modules: tuple[str, ...] = (
46
47
  "q_proj",
47
48
  "k_proj",
@@ -62,15 +63,19 @@ class ModelConfig:
62
63
  class TrainingConfig:
63
64
  """Training hyperparameters."""
64
65
 
66
+ # GPU parameters
67
+ gpu_type: str = "A100"
68
+ num_gpus: int = 2
69
+
65
70
  # Training parameters
66
71
  training_steps: int = 100
67
72
  shuffle_dataset: bool = False
68
73
  save_every_batches: int = 1
69
74
 
70
75
  # Batching parameters
71
- epochs: int = 2
72
- batch_size: int = 24
73
- group_size: int = 4
76
+ epochs: int = 1
77
+ batch_size: int = 16
78
+ group_size: int = 8
74
79
  mini_batch_size: int = 1
75
80
  update_after_group: bool = True # Whether to update the policy after each task group
76
81
  accumulate_over_minibatches: bool = False # Whether to accumulate over minibatches
@@ -81,7 +86,7 @@ class TrainingConfig:
81
86
  leave_one_out: bool = True
82
87
 
83
88
  # Replay buffer parameters
84
- buffer_steps: int = 4
89
+ buffer_steps: int = 8
85
90
  select_strategy: Literal["recent", "variance", "random"] = "variance"
86
91
 
87
92
  # Aggregation parameters
@@ -89,8 +94,8 @@ class TrainingConfig:
89
94
  token_agg: Literal["mean", "sum"] = "mean" # noqa: S105
90
95
 
91
96
  # Regularization parameters
92
- kl_beta: float = 0.0
93
- entropy_beta: float = 0.0
97
+ kl_beta: float = 0.001
98
+ entropy_beta: float = 0.001
94
99
  top_eps: float = 0.2
95
100
  bottom_eps: float = 0.1
96
101
 
@@ -140,6 +145,7 @@ class Config:
140
145
  job_id: str | None = None # Use existing job ID if provided
141
146
  stats_interval: int = 1
142
147
  verbose: bool = False
148
+ very_verbose: bool = False
143
149
 
144
150
  # Paths
145
151
  out_dir: str = "./checkpoints"
@@ -163,6 +169,7 @@ class Config:
163
169
  job_id=d.get("job_id"),
164
170
  stats_interval=d.get("stats_interval", 1),
165
171
  verbose=d.get("verbose", False),
172
+ very_verbose=d.get("very_verbose", False),
166
173
  out_dir=d.get("out_dir", "./checkpoints"),
167
174
  adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
168
175
  seed=d.get("seed", 1234),
@@ -178,6 +185,7 @@ class Config:
178
185
  "job_id": self.job_id,
179
186
  "stats_interval": self.stats_interval,
180
187
  "verbose": self.verbose,
188
+ "very_verbose": self.very_verbose,
181
189
  "out_dir": self.out_dir,
182
190
  "adapter_prefix": self.adapter_prefix,
183
191
  "seed": self.seed,
hud/rl/train.py CHANGED
@@ -56,6 +56,10 @@ async def train(config: Config, tasks: list[Task]) -> None:
56
56
  logging.basicConfig(level=logging.INFO)
57
57
  # Remove httpx logger
58
58
  logging.getLogger("httpx").setLevel(logging.WARNING)
59
+ if config.very_verbose:
60
+ logging.basicConfig(level=logging.DEBUG)
61
+ # Remove httpx logger
62
+ logging.getLogger("httpx").setLevel(logging.INFO)
59
63
 
60
64
  if is_main_process():
61
65
  hud_console.header("Starting GRPO Training")
@@ -103,7 +107,9 @@ async def train(config: Config, tasks: list[Task]) -> None:
103
107
  if is_main_process():
104
108
  hud_console.info(f"Creating job with config.job_id: {config.job_id}")
105
109
  job_obj = hud.create_job(
106
- job_id=config.job_id, name=config.job_name, metadata={"config": config.to_dict()}
110
+ job_id=config.job_id,
111
+ name=config.job_name,
112
+ metadata={"config": config.to_dict(), "agent_class": config.model.base_model},
107
113
  )
108
114
  hud_console.info(f"Created job with job_obj.id: {job_obj.id}")
109
115
  job_obj.update_status_sync("running")
@@ -296,7 +302,7 @@ async def main() -> None:
296
302
 
297
303
  # Load config
298
304
  if args.config:
299
- with open(args.config) as f: # noqa: ASYNC230
305
+ with open(args.config, encoding="utf-8") as f: # noqa: ASYNC230
300
306
  config_dict = json.load(f)
301
307
  config = Config.from_dict(config_dict)
302
308
  else:
@@ -334,7 +340,7 @@ async def main() -> None:
334
340
  # Load tasks
335
341
  if args.tasks_json:
336
342
  # Tasks provided as JSON list via command line
337
- tasks = load_tasks(args.tasks_jso)
343
+ tasks = load_tasks(args.tasks_json)
338
344
  elif args.tasks:
339
345
  # Tasks provided as file path or HuggingFace dataset
340
346
  tasks = load_tasks(args.tasks)
hud/utils/group_eval.py CHANGED
@@ -189,7 +189,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
189
189
  hud_console.info(f"Overall mean reward: {overall_mean:.3f} ± {overall_std:.3f}")
190
190
 
191
191
  # Detailed table
192
- if show_details and len(stats) <= 20: # Only show for reasonable dataset sizes
192
+ if show_details and len(stats) <= 50: # Only show for reasonable dataset sizes
193
193
  table = Table(title="\nPer-Task Performance Distribution")
194
194
  table.add_column("Task", style="cyan", no_wrap=True)
195
195
  table.add_column("Mean±Std", justify="right", style="green")
@@ -216,7 +216,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
216
216
  # High variance tasks
217
217
  high_variance_tasks = [s for s in stats if s["std_reward"] > 0.3 and s["group_size"] > 1]
218
218
  if high_variance_tasks:
219
- hud_console.warning(f"\n⚠️ {len(high_variance_tasks)} tasks show high variance (std > 0.3)")
219
+ hud_console.warning(f"\n{len(high_variance_tasks)} tasks show high variance (std > 0.3)")
220
220
  for task in high_variance_tasks[:3]:
221
221
  hud_console.info(
222
222
  f" • {task['task_id']}: μ={task['mean_reward']:.3f}, σ={task['std_reward']:.3f}" # noqa: RUF001
hud/utils/tasks.py CHANGED
@@ -40,7 +40,7 @@ def load_tasks(tasks_input: str | list[dict], *, raw: bool = False) -> list[Task
40
40
  if Path(tasks_input).exists():
41
41
  file_path = Path(tasks_input)
42
42
 
43
- with open(file_path) as f:
43
+ with open(file_path, encoding="utf-8") as f:
44
44
  # Handle JSON files (array of tasks)
45
45
  if file_path.suffix.lower() == ".json":
46
46
  data = json.load(f)
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.41"
8
+ assert hud.__version__ == "0.4.43"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.41"
7
+ __version__ = "0.4.43"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.41
3
+ Version: 0.4.43
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -2,7 +2,7 @@ hud/__init__.py,sha256=JMDFUE1pP0J1Xl_miBdt7ERvoffZmTzSFe8yxz512A8,552
2
2
  hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
3
3
  hud/settings.py,sha256=disObWa-DgXzoDcCDp3y1dTPaNsbR0IvoMJL9Eg4zyo,3947
4
4
  hud/types.py,sha256=pmPj_8emfMIfEY_fRS8NgIJ56kCsolWSqQjyCzXDaGY,11072
5
- hud/version.py,sha256=C1Fy_xyAmtsGqUTTCdTCms68E9dj7ZaxR50aXhxj6cc,105
5
+ hud/version.py,sha256=AekBbsq3gM3fHm0EOZE1KbYDAcZYlNagJ4ps3KU-byo,105
6
6
  hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
7
7
  hud/agents/base.py,sha256=_u1zR3gXzZ1RlTCUYdMcvgHqdJBC4-AB1lZt0yBx8lg,35406
8
8
  hud/agents/claude.py,sha256=TGhm5gE2ltINDAdEsDxKuT9iGMQ5G87R6kmabU3KPt8,16101
@@ -10,7 +10,7 @@ hud/agents/grounded_openai.py,sha256=U-FHjB2Nh1_o0gmlxY5F17lWJ3oHsNRIB2a7z-IKB64
10
10
  hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
11
11
  hud/agents/lite_llm.py,sha256=_3wbUiYCp7q8Vyu9rhaoJDvmb_bsyUsLYWP3iQJ2bHo,2239
12
12
  hud/agents/openai.py,sha256=O1xV1h1l-W8lmnmXqTYr5CwnmnaniMqOxAZbl2CTTng,14576
13
- hud/agents/openai_chat_generic.py,sha256=RUfXDZSUbOXQ2leR4_8PGpqvUzz5PJOWeR3PTticKUY,12150
13
+ hud/agents/openai_chat_generic.py,sha256=_vAID9dZ_UxL0elYwafskRcsdrSsLsxJ4zPrP58oBiw,12151
14
14
  hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
15
15
  hud/agents/misc/response_agent.py,sha256=uMuRDkz5QgaMQliNzBRepond5sb7KyqIiKm3LstjVnw,3753
16
16
  hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
@@ -19,14 +19,14 @@ hud/agents/tests/test_claude.py,sha256=0nZnfsbGoECvsLPdmaRnc9jVmrehVvc3kxeyiCQI2
19
19
  hud/agents/tests/test_client.py,sha256=uikgh6yhjPPX2RBU4XJQMz1mNox9uXjuwsP8t93id18,13337
20
20
  hud/agents/tests/test_grounded_openai_agent.py,sha256=VK8lUvHIjWicMX00VKPE-FZyjiJqTEhb80MuRRa9fVc,5437
21
21
  hud/agents/tests/test_openai.py,sha256=Npbdr0acgLExGLbrleXze-k3w9LHfmqzQjPk9TnjN68,7620
22
- hud/cli/__init__.py,sha256=v4602N3FWOXD2raK1h3APJXFshsCrJV8P3JFcK--IvM,45596
22
+ hud/cli/__init__.py,sha256=lwyaA7z7H4BOt9ksySpT0AnRERoYEiVgUdwV_5s9wIg,45768
23
23
  hud/cli/__main__.py,sha256=fDH7XITyuDITwSDIVwRso06aouADO0CzTHKqp5TOwJE,143
24
24
  hud/cli/analyze.py,sha256=4u5oYfJMquOjT9PzzRTYVcTZDxDi0ilNP_g532_hpOU,14716
25
25
  hud/cli/build.py,sha256=h-4SAoe3j8Pth3mPYf26vh7q1Do5JADlvKKwkZrf2AU,19551
26
26
  hud/cli/clone.py,sha256=AwVDIuhr8mHb1oT2Af2HrD25SiTdwATpE6zd93vzLgA,6099
27
27
  hud/cli/debug.py,sha256=jtFW8J5F_3rhq1Hf1_SkJ7aLS3wjnyIs_LsC8k5cnzc,14200
28
- hud/cli/dev.py,sha256=J0Q_ndHbQcXe64gMjXfqiccWYWpdiYWvTKbJhCAvlgI,30666
29
- hud/cli/eval.py,sha256=d1RouB3rxP3axca2sRblNWZMNvHGP1EugST5fCJ-7tc,25790
28
+ hud/cli/dev.py,sha256=2zUeVz5S__WrV-DLSDqOlQawcJS7eYPKiDRVUaJ8mAk,31579
29
+ hud/cli/eval.py,sha256=zoRC9ExxrsOEj3myTUz_72LVSnFF557lS1aJfhQ9kHg,25681
30
30
  hud/cli/get.py,sha256=sksKrdzBGZa7ZuSoQkc0haj-CvOGVSSikoVXeaUd3N4,6274
31
31
  hud/cli/init.py,sha256=YkWxkIDCnhnxGGpbm7IvYMcfDqWuO1X9wxDxE4k-9ew,9721
32
32
  hud/cli/list_func.py,sha256=EVi2Vc3Lb3glBNJxFx4MPnZknZ4xmuJz1OFg_dc8a_E,7177
@@ -34,17 +34,17 @@ hud/cli/pull.py,sha256=XGEZ8n60tbzLQP_8d9h7XYmzyCW0e2-Rkr3_tLG7jvw,12449
34
34
  hud/cli/push.py,sha256=DsXFrMtWBZ-HUxt6VoLihpklk8JJIe2gy-GA4AMg6Kw,18805
35
35
  hud/cli/remove.py,sha256=8vGQyXDqgtjz85_vtusoIG8zurH4RHz6z8UMevQRYM4,6861
36
36
  hud/cli/flows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- hud/cli/flows/tasks.py,sha256=d-RsIV0rvzBuYN6WR7AOId19wzEy0z7SSW9c5RnEaqQ,14564
38
- hud/cli/rl/__init__.py,sha256=q0GIYRARpNkY8o1Sza5CjQq6cks_1W_SjGprybaCxq4,5193
37
+ hud/cli/flows/tasks.py,sha256=z3qdn6MBLJRK7dvfp3IFkdCzuImKRps7jbcedmCTjDA,14485
38
+ hud/cli/rl/__init__.py,sha256=pGx4WGaL-yGdogJNzgEE7BtjFtT4I9CTI_UdCm49h98,5376
39
39
  hud/cli/rl/celebrate.py,sha256=trGEJn3xebexlHwFVKPJKhRujVVV8sy7TQTJvRd2p9A,5947
40
- hud/cli/rl/config.py,sha256=VZ8fiOI22Aw6YTRk7gj1ozpF-TU7NK8QWQgWFwMbNs0,3235
40
+ hud/cli/rl/config.py,sha256=A-4WWwAS68GRKx1cP_DJ-NZD_96cFNnGwx0P3pQT1ps,3271
41
41
  hud/cli/rl/display.py,sha256=hqJVGmO9csYinladhZwjF-GMvppYWngxDHajTyIJ_gM,5214
42
42
  hud/cli/rl/gpu.py,sha256=peXS-NdUF5RyuSs0aZoCzGLboneBUpCy8f9f99WMrG0,2009
43
- hud/cli/rl/gpu_utils.py,sha256=H5ckPwgj5EVP3yJ5eVihR5R7Y6Gp6pt8ZUfWCCwcLG4,11072
43
+ hud/cli/rl/gpu_utils.py,sha256=VSdEWJDH-P9LjRZscQXPju5vB3FomP4Iy2znPcpUZc4,11199
44
44
  hud/cli/rl/local_runner.py,sha256=NFsNmRZ4nenPnb45ZtdsILeICKEq11wmpLwq9E-a8ZE,22614
45
45
  hud/cli/rl/presets.py,sha256=DzOO82xL5QyzdVtlX-Do1CODMvDz9ILMPapjU92jcZg,3051
46
- hud/cli/rl/remote_runner.py,sha256=FkFr6IOHLdmlNOZTqjaNBOCYyGm7Tf2BAT2YCWOoIGM,17486
47
- hud/cli/rl/rl_api.py,sha256=INJobvSa50ccR037u_GPsDa_9WboWyNwqEaoh9hcXj0,4306
46
+ hud/cli/rl/remote_runner.py,sha256=fKmOVKSBUWfakunfe9-HAllpUJDxfRNZwL00fPw-QTI,17837
47
+ hud/cli/rl/rl_api.py,sha256=fvRMWQXhTSLM2zQaWWDas_u47RreH8erLgtXRKnQGeA,4350
48
48
  hud/cli/rl/viewer.py,sha256=ExQs1IX3T8x_9aBzc4JojZ779jmFvFTh7EjOYIHzYsU,4441
49
49
  hud/cli/rl/vllm.py,sha256=Gq_M6KsQArGz7FNIdemuM5mk16mu3xe8abpO2GCCuOE,6093
50
50
  hud/cli/rl/wait_utils.py,sha256=FyIvqYWLOydANTetukoE5Rp2AOQi67qkiAlIJp4HpL8,2577
@@ -69,7 +69,7 @@ hud/cli/utils/config.py,sha256=AnsN6FEa8V3jg3EWaqUJN38-UuYC6tVZxPfBb_5LFBs,2652
69
69
  hud/cli/utils/cursor.py,sha256=fy850p0rVp5k_1wwOCI7rK1SggbselJrywFInSQ2gio,3009
70
70
  hud/cli/utils/docker.py,sha256=oGVzPfp0Rn89o9d6tgSEziKy9GXFrYaWn_mjBmGRHe4,6326
71
71
  hud/cli/utils/env_check.py,sha256=TqsmwgTfMDzfP0Ii50YxDkOP4_T5nqks9JMTxIq60-s,7095
72
- hud/cli/utils/environment.py,sha256=EfATQyAz8Jybj4N9QNaaADUrpiZ2JMp2elQYnAG9gU8,4371
72
+ hud/cli/utils/environment.py,sha256=cxsNwCfwX2PtCHht9xH_Yo5jpcqANf7h0wa3gfiy5tY,4278
73
73
  hud/cli/utils/interactive.py,sha256=sHhTjaImxlwlZ5_DTXb23Jwrjy5oJ7diB-8duhHbImU,16647
74
74
  hud/cli/utils/local_runner.py,sha256=jnPFoJu3sCq65LSUapKCkakdlEuz__96oJU_FfOYtEg,6542
75
75
  hud/cli/utils/logging.py,sha256=DyOWuzZUg6HeKCqfs6ufb703XS3bW4G2pzaXVAvDqvA,9018
@@ -117,13 +117,13 @@ hud/otel/tests/__init__.py,sha256=VNJKBMaxTtbn7trW-1Ph50zCvCok_wTSGcI1HD6GOLA,43
117
117
  hud/otel/tests/test_processors.py,sha256=np0R4ssd9j6LJSJykJ5bNjl0POwNYNhgb7BqOZHwcMY,6778
118
118
  hud/rl/README.md,sha256=uFRpNFaEY8paq9k1C4miF7AGnbqHTGAsPmpcf9JIEeA,1189
119
119
  hud/rl/__init__.py,sha256=yYL7U1WV6L3mr3Hig48-4lhnryTaWj4nCXm4lG5vrYI,25
120
- hud/rl/actor.py,sha256=n2f2BI9IOK__x7Seirq6EQI0yyicMBYd5BjPsc4T9rQ,6946
120
+ hud/rl/actor.py,sha256=H6gwRGRY1YpkOyiaJ9yai8yQwcI-Gx0dFxd18jpLx_Q,6950
121
121
  hud/rl/buffer.py,sha256=z47HOjOBJx3umUzzUfdtq_N4ZoJ8FMBPkX8YQKBtd3A,15457
122
122
  hud/rl/chat_template.jinja,sha256=XTdzI8oFGEcSA-exKxyHaprwRDmX5Am1KEb0VxvUc6U,4965
123
- hud/rl/config.py,sha256=PAKYPCsKl8yg_j3gJSE5SJUgLM7j0lFy0K_Vt4-otDM,5384
123
+ hud/rl/config.py,sha256=akQ2a53NX3Dh1UWgMyw7mTxq33eiQbZcBpmKTzd79Xk,5624
124
124
  hud/rl/distributed.py,sha256=8avhrb0lHYkhW22Z7MfkqSnlczWj5jMrUMEtkcoCf74,2473
125
125
  hud/rl/learner.py,sha256=FKIgIIghsNiDr_g090xokOO_BxNmTSj1O-TSJzIq_Uw,24703
126
- hud/rl/train.py,sha256=ZigkUKj-I1nsYmFByZprqaoDZ88LVDH-6auYneEPOsA,13555
126
+ hud/rl/train.py,sha256=hmobsaGp5UwK8u9oJGFdxCfI7rrxM-XbeVy-TkzQwxU,13804
127
127
  hud/rl/types.py,sha256=lrLKo7iaqodYth2EyeuOQfLiuzXfYM2eJjPmpObrD7c,3965
128
128
  hud/rl/utils.py,sha256=IsgVUUibxnUzb32a4mu1sYrgJC1CwoG9E-Dd5y5VDOA,19115
129
129
  hud/rl/vllm_adapter.py,sha256=2wnTfoXPI4C9EzhVxk0GU-ArLjX7hgXS0BndMwN8Ppg,4751
@@ -204,12 +204,12 @@ hud/tools/tests/test_utils.py,sha256=qaujM1uyTMaKqWIeEgxty5GOFyfSUtrYCEHhmIazoy4
204
204
  hud/utils/__init__.py,sha256=nk9Re6ls2RudAWnAHDWYbLG28AwNF4qMFYf5xQIJhQA,181
205
205
  hud/utils/agent_factories.py,sha256=cvfXByqG6gOYHtm1VGeJjCpxoLxM4aJez8rH-AerP_A,3186
206
206
  hud/utils/async_utils.py,sha256=5cKrJcnaHV2eJNxeyx0r7fPcdPTDBK7kM9-nLaF51X4,2409
207
- hud/utils/group_eval.py,sha256=oaoBqlQN6g5gRQmuY_JmqM5bpuf2sFIgu4uDZ7X-3a0,8360
207
+ hud/utils/group_eval.py,sha256=6yXEH8ZRKkR4bBy9-QWGmjlm2IbCnTUZppEFbjTvndY,8352
208
208
  hud/utils/hud_console.py,sha256=ywTrzyNhWFoQN2PpzpDDKp_32b-ACDvfKQuWxDoF8iE,21898
209
209
  hud/utils/mcp.py,sha256=pMadd7A0DH6Y_aWywKU8jVYu2pRHGPEndV2ZQFrrj60,2888
210
210
  hud/utils/pretty_errors.py,sha256=WGeL4CTHtlA6KgPuV_JSX5l6H4-xbuTp6Y6tw1bkiFg,2430
211
211
  hud/utils/progress.py,sha256=suikwFM8sdSfkV10nAOEaInDhG4XKgOSvFePg4jSj1A,5927
212
- hud/utils/tasks.py,sha256=4v8FvVhXXefbWFldf564XPCQIActYI3tmA510-SA4LE,4967
212
+ hud/utils/tasks.py,sha256=7i36ck84gz1GZxhn9jryMBvKgMmcvLVu1YH5n3Y23-c,4985
213
213
  hud/utils/telemetry.py,sha256=hrVIx2rUjSGyy9IVxTZ_3Jii83PiHjyFRd5ls2whimM,1863
214
214
  hud/utils/tool_shorthand.py,sha256=_haLgK3yazLR2Y0jlEHUUQjw9uZCxi9yTipAwdOAJ70,2148
215
215
  hud/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -218,10 +218,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
218
218
  hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
219
219
  hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
220
220
  hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
221
- hud/utils/tests/test_version.py,sha256=Y5eL4qx41L0b6Ih_xxAcO_cnvcUmfISmHlut9W0haOA,160
221
+ hud/utils/tests/test_version.py,sha256=5Kh7gFTdzSIvPh8KSkpP9Rq-4a4rJchabweQncIcYHQ,160
222
222
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
223
- hud_python-0.4.41.dist-info/METADATA,sha256=Hx89Ngi8RmMOFtvMNF5y2m_1Y_lXI3C5dtTH_g4Fco0,22275
224
- hud_python-0.4.41.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
225
- hud_python-0.4.41.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
226
- hud_python-0.4.41.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
227
- hud_python-0.4.41.dist-info/RECORD,,
223
+ hud_python-0.4.43.dist-info/METADATA,sha256=qTNrSt6NhfZR1_KzmtIGgZXbvAUZBlsh1xp_1JZMZaU,22275
224
+ hud_python-0.4.43.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
225
+ hud_python-0.4.43.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
226
+ hud_python-0.4.43.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
227
+ hud_python-0.4.43.dist-info/RECORD,,