PyPI - hud-python - Versions diffs - 0.4.42__tar.gz → 0.4.44__tar.gz - Mend

hud-python 0.4.42tar.gz → 0.4.44tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (246) hide show

{hud_python-0.4.42 → hud_python-0.4.44}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.42
+Version: 0.4.44
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/openai_chat_generic.py RENAMED Viewed

@@ -205,7 +205,7 @@ class GenericOpenAIChatAgent(MCPAgent):
         try:
             response = await self._invoke_chat_completion(
                 messages=messages,
-                tools=tools, # type: ignore
+                tools=tools,  # type: ignore
                 extra=extra,
             )
         except Exception as e:

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/__init__.py RENAMED Viewed

@@ -1178,6 +1178,11 @@ def rl(
         "--vllm-gpu",
         help="Specific GPU for vLLM server",
     ),
+    vllm_gpu_count: int = typer.Option(
+        1,
+        "--vllm-gpu-count",
+        help="Number of GPUs for vLLM server",
+    ),
     skip_vllm_startup: bool = typer.Option(
         False,
         "--skip-vllm-startup",
@@ -1199,6 +1204,7 @@ def rl(
         no_ddp=no_ddp,
         ddp_gpus=ddp_gpus,
         vllm_gpu=vllm_gpu,
+        vllm_gpu_count=vllm_gpu_count,
         yes=yes,
         skip_vllm_startup=skip_vllm_startup,
     )

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/dev.py RENAMED Viewed

@@ -73,6 +73,24 @@ def create_proxy_server(
         "PYTHONUNBUFFERED=1",  # Ensure Python output is not buffered
     ]
+    # Check for .env file in the project directory and add env vars
+    env_file = project_path / ".env"
+    loaded_env_vars = {}
+    if env_file.exists():
+        try:
+            from hud.cli.utils.config import parse_env_file
+            env_contents = env_file.read_text(encoding="utf-8")
+            loaded_env_vars = parse_env_file(env_contents)
+            for key, value in loaded_env_vars.items():
+                docker_cmd.extend(["-e", f"{key}={value}"])
+            if verbose and loaded_env_vars:
+                hud_console.info(
+                    f"Loaded {len(loaded_env_vars)} environment variable(s) from .env file"
+                )
+        except Exception as e:
+            hud_console.warning(f"Failed to load .env file: {e}")
     # Add user-provided Docker arguments
     if docker_args:
         docker_cmd.extend(docker_args)
@@ -112,8 +130,12 @@ def create_proxy_server(
             hud_console.info("The container's CMD determines reload behavior")
         hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
-        # Show the full Docker command if there are environment variables
-        if docker_args and any(arg == "-e" or arg.startswith("--env") for arg in docker_args):
+        # Show the full Docker command if there are environment variables (from .env or args)
+        has_env_from_args = docker_args and any(
+            arg == "-e" or arg.startswith("--env") for arg in docker_args
+        )
+        has_env_from_file = bool(loaded_env_vars)
+        if has_env_from_args or has_env_from_file:
             hud_console.info("")
             hud_console.info("Docker command with environment variables:")
             hud_console.info(" ".join(docker_cmd))

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/eval.py RENAMED Viewed

@@ -298,16 +298,15 @@ async def run_single_task(
                 agent_config["allowed_tools"] = allowed_tools
         # Run with grouping
-        with hud.trace(name=f"{task_prompt} (group_size={group_size})"):
-            stats = await run_tasks_grouped(
-                tasks=[task],
-                agent_class=agent_class,
-                agent_config=agent_config,
-                group_size=group_size,
-                max_parallel_episodes=48,  # Same as RL default
-                max_steps=max_steps,
-                verbose=verbose,
-            )
+        stats = await run_tasks_grouped(
+            tasks=[task],
+            agent_class=agent_class,
+            agent_config=agent_config,
+            group_size=group_size,
+            max_parallel_episodes=48,  # Same as RL default
+            max_steps=max_steps,
+            verbose=verbose,
+        )
         # Display results
         display_group_statistics(stats, show_details=True)
@@ -499,7 +498,7 @@ async def run_full_dataset(
             )
         # Display results
-        display_group_statistics(stats, show_details=len(stats) <= 20)
+        display_group_statistics(stats, show_details=len(stats) <= 50)
         # Return stats for consistency with other modes
         return stats

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/flows/tasks.py RENAMED Viewed

@@ -212,17 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
     # Check if tasks already have remote URLs
     already_remote = _validate_tasks(tasks)
-    # If tasks already reference a remote MCP URL, do not require a local environment
-    # or attempt any image updates. Use the dataset as-is.
-    if already_remote:
-        return str(tasks_path)
     # Extract existing images from tasks
     existing_images = _extract_existing_images(tasks)
     # Locate environment
     env_dir = find_environment_dir(tasks_path)
     if not env_dir:
+        if already_remote:
+            return str(tasks_path)
         hud_console.error("Could not locate an environment directory (Dockerfile + pyproject.toml)")
         hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
         raise typer.Exit(1)
@@ -373,6 +370,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
             item["system_prompt"] = t.system_prompt
         if t.metadata:
             item["metadata"] = t.metadata
+        if t.id is not None:
+            item["id"] = t.id
         tasks_payload.append(item)

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/__init__.py RENAMED Viewed

@@ -78,6 +78,11 @@ def rl_command(
         "-y",
         help="Auto-accept all prompts and use defaults (lazy mode)",
     ),
+    vllm_gpu_count: int = typer.Option(
+        None,
+        "--vllm-gpu-count",
+        help="Number of GPUs for vLLM server",
+    ),
     skip_vllm_startup: bool = typer.Option(
         False,
         "--skip-vllm-startup",
@@ -145,6 +150,7 @@ def rl_command(
                 model=model,
                 config_file=config_file,
                 output_dir=output_dir,
+                vllm_gpu_count=vllm_gpu_count,
                 yes=yes,
             )
             return

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/config.py RENAMED Viewed

@@ -84,7 +84,7 @@ def save_config(config: Config, path: Path) -> None:
     """Save configuration to a JSON file."""
     config_dict = config.to_dict()
-    with open(path, "w") as f:
+    with open(path, "w", encoding="utf-8") as f:
         json.dump(config_dict, f, indent=2)
         f.write("\n")  # Add newline at end of file
@@ -94,7 +94,7 @@ def save_config(config: Config, path: Path) -> None:
 def load_config(path: Path) -> Config:
     """Load configuration from a JSON file."""
-    with open(path) as f:
+    with open(path, encoding="utf-8") as f:
         data = json.load(f)
     # Use Config.from_dict which handles missing fields gracefully

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/gpu_utils.py RENAMED Viewed

@@ -245,10 +245,12 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
     # Apply scaling rule
     if num_gpus == 1:
         # Special case: 2 groups for single GPU
+        groups_per_gpu = 2
         config.training.batch_size = 2 * group_size
     else:
-        # Multi-GPU: each GPU processes 1 group
-        config.training.batch_size = num_gpus * group_size
+        groups_per_gpu = config.training.batch_size // group_size
+        # Multi-GPU: each GPU processes groups_per_gpu groups
+        config.training.batch_size = num_gpus * group_size * groups_per_gpu
     # Update max_parallel_episodes to match
     config.actor.max_parallel_episodes = config.training.batch_size
@@ -263,7 +265,7 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
         f"\n[cyan]📊 Adjusted batch_size to {config.training.batch_size} ({config.training.batch_size // group_size} groups)[/cyan]"  # noqa: E501
     )
     console.print(
-        f"[cyan]   Each of the {num_gpus} GPU(s) will process {config.training.batch_size // group_size // num_gpus} group(s) in parallel[/cyan]"  # noqa: E501
+        f"[cyan]   Each of the {num_gpus} GPU(s) will process {groups_per_gpu} group(s) in parallel[/cyan]"  # noqa: E501
     )
     return config

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/remote_runner.py RENAMED Viewed

@@ -32,7 +32,9 @@ GPU_PRICING = {
 }
-def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int = 600) -> None:
+def ensure_vllm_deployed(
+    model_name: str, gpu_type: str = "A100", gpu_count: int = 1, timeout: int = 600
+) -> None:
     """Deploy vLLM for a model if needed and wait until it's ready.
     Args:
@@ -47,7 +49,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
         return
     hud_console.info(f"Deploying vLLM server for {model_name}...")
-    rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
+    rl_api.deploy_vllm(model_name, gpu_type=gpu_type, gpu_count=gpu_count)
     hud_console.success("vLLM deployment started")
     hud_console.info("Waiting for vLLM server to be ready...")
@@ -72,6 +74,7 @@ def run_remote_training(
     model: str | None,
     config_file: Path | None,
     output_dir: str,
+    vllm_gpu_count: int = 1,
     yes: bool = False,
 ) -> None:
     """Run RL training remotely via the API server following the new interactive flow."""
@@ -183,14 +186,18 @@ def run_remote_training(
             # Ask for model type
             if yes:
-                model_type = "Qwen/Qwen2.5-VL-3B-Instruct"  # Default model in yes mode
+                if config_file:
+                    config = load_config(config_file)
+                    model_type = config.model.base_model
+                else:
+                    model_type = "Qwen/Qwen2.5-VL-3B-Instruct"
                 hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
             else:
                 model_type = hud_console.select(
                     "Select base model type:",
                     choices=[
                         {"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
-                        # {"name": "Qwen2.5-VL-7B-Instruct", "value": "Qwen/Qwen2.5-VL-7B-Instruct"}, # noqa: E501
+                        {"name": "Qwen2.5-3B-Instruct", "value": "Qwen/Qwen2.5-3B-Instruct"},
                     ],
                     default=0,
                 )
@@ -218,7 +225,7 @@ def run_remote_training(
             try:
                 rl_api.create_model(model_name, model_type)
                 hud_console.success(f"Created model: {model_name}")
-                ensure_vllm_deployed(model_name, gpu_type="A100")
+                ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
             except Exception as e:
                 # If the name already exists, suggest a new name and prompt once
@@ -247,7 +254,7 @@ def run_remote_training(
                         rl_api.create_model(chosen, model_type)
                         hud_console.success(f"Created model: {chosen}")
                         model_name = chosen
-                        ensure_vllm_deployed(model_name, gpu_type="A100")
+                        ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
                     except Exception as e2:
                         hud_console.error(f"Failed to create model: {e2}")
                         raise
@@ -281,7 +288,7 @@ def run_remote_training(
                     return
             # Ensure vLLM is deployed
-            ensure_vllm_deployed(model_name, gpu_type="A100")
+            ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
     except KeyboardInterrupt:
         hud_console.dim_info("Training cancelled", "")
         return
@@ -323,7 +330,7 @@ def run_remote_training(
             )
         if yes:
-            num_gpus = 2 # Default to 2 GPUs in yes mode
+            num_gpus = 2  # Default to 2 GPUs in yes mode
             hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
         else:
             num_gpus = hud_console.select(
@@ -425,10 +432,12 @@ def run_remote_training(
         # Load provided config
         hud_console.info(f"Loading configuration from: {config_file}")
         config = load_config(config_file)
-        config_dict = config.to_dict()
         gpu_choice = config.training.gpu_type
         num_gpus = config.training.num_gpus
+        config = adjust_config_for_ddp(config, int(num_gpus))
+        config_dict = config.to_dict()
     # Launch training
     try:
         # Little celebration before launching

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/rl_api.py RENAMED Viewed

@@ -61,12 +61,12 @@ def list_models() -> list[RLModelInfo]:
     ]
-def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
+def deploy_vllm(model_name: str, gpu_type: str = "A100", gpu_count: int = 1) -> dict[str, Any]:
     """Deploy a vLLM server for a model."""
     return make_request_sync(
         method="POST",
         url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
-        json={"gpu_type": gpu_type},
+        json={"gpu_type": gpu_type, "gpu_count": gpu_count},
         api_key=settings.api_key,
     )

{hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/environment.py RENAMED Viewed

@@ -127,8 +127,4 @@ def is_environment_directory(path: str | Path) -> bool:
         return False
     # Must have pyproject.toml
-    if not (dir_path / "pyproject.toml").exists():
-        hud_console.error("pyproject.toml not found")
-        return False
-    return True
+    return (dir_path / "pyproject.toml").exists()

{hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/config.py RENAMED Viewed

@@ -13,6 +13,7 @@ SUPPORTED_MODELS = [
     "Qwen/Qwen2.5-VL-32B-Instruct",
     "Qwen/Qwen2.5-VL-72B-Instruct",
     "Qwen/Qwen2.5-7B-Instruct",
+    "Qwen/Qwen2.5-3B-Instruct",
 ]
@@ -39,9 +40,9 @@ class ModelConfig:
     """Model and LoRA configuration."""
     base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
-    lora_r: int = 8
-    lora_alpha: int = 16
-    lora_dropout: float = 0.05
+    lora_r: int = 16
+    lora_alpha: int = 32
+    lora_dropout: float = 0.1
     target_modules: tuple[str, ...] = (
         "q_proj",
         "k_proj",
@@ -61,6 +62,7 @@ class ModelConfig:
 @dataclass
 class TrainingConfig:
     """Training hyperparameters."""
     # GPU parameters
     gpu_type: str = "A100"
     num_gpus: int = 2
@@ -71,9 +73,9 @@ class TrainingConfig:
     save_every_batches: int = 1
     # Batching parameters
-    epochs: int = 2
-    batch_size: int = 24
-    group_size: int = 4
+    epochs: int = 1
+    batch_size: int = 16
+    group_size: int = 8
     mini_batch_size: int = 1
     update_after_group: bool = True  # Whether to update the policy after each task group
     accumulate_over_minibatches: bool = False  # Whether to accumulate over minibatches
@@ -84,7 +86,7 @@ class TrainingConfig:
     leave_one_out: bool = True
     # Replay buffer parameters
-    buffer_steps: int = 4
+    buffer_steps: int = 8
     select_strategy: Literal["recent", "variance", "random"] = "variance"
     # Aggregation parameters
@@ -92,8 +94,8 @@ class TrainingConfig:
     token_agg: Literal["mean", "sum"] = "mean"  # noqa: S105
     # Regularization parameters
-    kl_beta: float = 0.0
-    entropy_beta: float = 0.0
+    kl_beta: float = 0.001
+    entropy_beta: float = 0.001
     top_eps: float = 0.2
     bottom_eps: float = 0.1
@@ -143,6 +145,7 @@ class Config:
     job_id: str | None = None  # Use existing job ID if provided
     stats_interval: int = 1
     verbose: bool = False
+    very_verbose: bool = False
     # Paths
     out_dir: str = "./checkpoints"
@@ -166,6 +169,7 @@ class Config:
             job_id=d.get("job_id"),
             stats_interval=d.get("stats_interval", 1),
             verbose=d.get("verbose", False),
+            very_verbose=d.get("very_verbose", False),
             out_dir=d.get("out_dir", "./checkpoints"),
             adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
             seed=d.get("seed", 1234),
@@ -181,6 +185,7 @@ class Config:
             "job_id": self.job_id,
             "stats_interval": self.stats_interval,
             "verbose": self.verbose,
+            "very_verbose": self.very_verbose,
             "out_dir": self.out_dir,
             "adapter_prefix": self.adapter_prefix,
             "seed": self.seed,

{hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/distributed.py RENAMED Viewed

@@ -66,7 +66,13 @@ def all_reduce_mean(tensor: torch.Tensor) -> torch.Tensor:
 def broadcast_object(obj: Any, src: int = 0) -> Any:
-    """Broadcast a Python object from src rank to all ranks."""
+    """Broadcast a Python object from src rank to all ranks.
+    Args:
+        obj: Object to broadcast (used on src rank)
+        src: Source rank
+        device: Device for temporary tensor buffer during pickling transfer
+    """
     if not dist.is_initialized():
         return obj
@@ -75,6 +81,33 @@ def broadcast_object(obj: Any, src: int = 0) -> Any:
     return obj_list[0]
+def scatter_object(
+    obj_list: list[Any] | None,
+    src: int = 0,
+) -> Any:
+    """Scatter a list of Python objects from src so each rank receives one object.
+    Usage:
+        - On src rank: pass the full list (length == world_size)
+        - On non-src ranks: pass None
+    Returns:
+        The object intended for this rank.
+    """
+    if not dist.is_initialized():
+        # Single-process: return first element if provided, else None
+        if obj_list is None or len(obj_list) == 0:
+            return None
+        return obj_list[0]
+    out: list[Any] = [None]
+    if dist.get_rank() == src:
+        dist.scatter_object_list(out, obj_list, src=src)
+    else:
+        dist.scatter_object_list(out, None, src=src)
+    return out[0]
 def gather_tensors(tensor: torch.Tensor) -> list[torch.Tensor] | None:
     """Gather tensors from all ranks to rank 0.

{hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/learner.py RENAMED Viewed

@@ -240,6 +240,8 @@ class GRPOLearner:
                 if sample.inputs:
                     sample = sample.to_device(self.device)
                     sample.old_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
+                    # Free GPU memory for this sample immediately
+                    sample.to_device(torch.device("cpu"))
             policy_module = self.policy.module if hasattr(self.policy, "module") else self.policy
             with policy_module.disable_adapter():
@@ -247,7 +249,10 @@ class GRPOLearner:
                     if is_main_process():
                         progress.update(f"Processing batch of traces... {i}/{len(batch)}")
                     if sample.inputs:
+                        # Move back to GPU for reference computation, then free
+                        sample = sample.to_device(self.device)
                         sample.ref_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
+                        sample.to_device(torch.device("cpu"))
         hud_console.info_log("Creating mini-batches...")
         group_size = self.config.training.group_size
@@ -488,15 +493,21 @@ class GRPOLearner:
             out = model(**model_inputs)
             logits = out.logits / self.config.actor.temperature
-            log_probs = F.log_softmax(logits, dim=-1)
+            # Compute token log-probs via negative cross-entropy to avoid materializing full log_probs
             targets = inputs["input_ids"][:, 1:]
-            token_log_probs = log_probs[:, :-1].gather(-1, targets.unsqueeze(-1)).squeeze(-1)
+            logits_slice = logits[:, :-1, :]
+            loss_flat = F.cross_entropy(
+                logits_slice.reshape(-1, logits_slice.size(-1)),
+                targets.reshape(-1),
+                reduction="none",
+            )
+            token_log_probs = (-loss_flat).reshape_as(targets)
             # Compute entropy only for assistant tokens to save memory
             assistant_mask = inputs["assistant_mask"]
             entropy = torch.zeros_like(token_log_probs)
-            if assistant_mask.any():
+            if assistant_mask.any() and getattr(self.config.training, "entropy_beta", 0.0) != 0.0:
                 entropy[assistant_mask] = entropy_from_logits(logits[:, :-1][assistant_mask])
             return token_log_probs, entropy
@@ -506,8 +517,20 @@ class GRPOLearner:
             # Return dummy values that match expected shapes
             seq_len = inputs["input_ids"].shape[1] - 1 if "input_ids" in inputs else 0
             batch_size = inputs["input_ids"].shape[0] if "input_ids" in inputs else 1
-            dummy_logprobs = torch.zeros(batch_size, seq_len, device=self.device)
-            dummy_entropy = torch.zeros(batch_size, seq_len, device=self.device)
+            # Create dummy tensors that still participate in autograd so backward doesn't fail
+            try:
+                param_sum = torch.sum(
+                    next(self.policy.parameters())
+                )  # touch params to build a graph
+                base = param_sum * 0.0
+            except StopIteration:
+                base = torch.tensor(0.0, device=self.device)
+            dummy_logprobs = (
+                base + torch.zeros(batch_size, seq_len, device=self.device)
+            ).requires_grad_(True)
+            dummy_entropy = (
+                base + torch.zeros(batch_size, seq_len, device=self.device)
+            ).requires_grad_(True)
             return dummy_logprobs, dummy_entropy
     def save(self, path: str) -> None:

hud-python 0.4.42__tar.gz → 0.4.44__tar.gz

Potentially problematic release.

hud-python 0.4.42tar.gz → 0.4.44tar.gz