PyPI - hud-python - Versions diffs - 0.4.42__tar.gz → 0.4.43__tar.gz - Mend

hud-python 0.4.42tar.gz → 0.4.43tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (246) hide show

{hud_python-0.4.42 → hud_python-0.4.43}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.42
+Version: 0.4.43
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/openai_chat_generic.py RENAMED Viewed

@@ -205,7 +205,7 @@ class GenericOpenAIChatAgent(MCPAgent):
         try:
             response = await self._invoke_chat_completion(
                 messages=messages,
-                tools=tools, # type: ignore
+                tools=tools,  # type: ignore
                 extra=extra,
             )
         except Exception as e:

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/__init__.py RENAMED Viewed

@@ -1178,6 +1178,11 @@ def rl(
         "--vllm-gpu",
         help="Specific GPU for vLLM server",
     ),
+    vllm_gpu_count: int = typer.Option(
+        1,
+        "--vllm-gpu-count",
+        help="Number of GPUs for vLLM server",
+    ),
     skip_vllm_startup: bool = typer.Option(
         False,
         "--skip-vllm-startup",
@@ -1199,6 +1204,7 @@ def rl(
         no_ddp=no_ddp,
         ddp_gpus=ddp_gpus,
         vllm_gpu=vllm_gpu,
+        vllm_gpu_count=vllm_gpu_count,
         yes=yes,
         skip_vllm_startup=skip_vllm_startup,
     )

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/dev.py RENAMED Viewed

@@ -73,6 +73,24 @@ def create_proxy_server(
         "PYTHONUNBUFFERED=1",  # Ensure Python output is not buffered
     ]
+    # Check for .env file in the project directory and add env vars
+    env_file = project_path / ".env"
+    loaded_env_vars = {}
+    if env_file.exists():
+        try:
+            from hud.cli.utils.config import parse_env_file
+            env_contents = env_file.read_text(encoding="utf-8")
+            loaded_env_vars = parse_env_file(env_contents)
+            for key, value in loaded_env_vars.items():
+                docker_cmd.extend(["-e", f"{key}={value}"])
+            if verbose and loaded_env_vars:
+                hud_console.info(
+                    f"Loaded {len(loaded_env_vars)} environment variable(s) from .env file"
+                )
+        except Exception as e:
+            hud_console.warning(f"Failed to load .env file: {e}")
     # Add user-provided Docker arguments
     if docker_args:
         docker_cmd.extend(docker_args)
@@ -112,8 +130,12 @@ def create_proxy_server(
             hud_console.info("The container's CMD determines reload behavior")
         hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
-        # Show the full Docker command if there are environment variables
-        if docker_args and any(arg == "-e" or arg.startswith("--env") for arg in docker_args):
+        # Show the full Docker command if there are environment variables (from .env or args)
+        has_env_from_args = docker_args and any(
+            arg == "-e" or arg.startswith("--env") for arg in docker_args
+        )
+        has_env_from_file = bool(loaded_env_vars)
+        if has_env_from_args or has_env_from_file:
             hud_console.info("")
             hud_console.info("Docker command with environment variables:")
             hud_console.info(" ".join(docker_cmd))

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/eval.py RENAMED Viewed

@@ -298,16 +298,15 @@ async def run_single_task(
                 agent_config["allowed_tools"] = allowed_tools
         # Run with grouping
-        with hud.trace(name=f"{task_prompt} (group_size={group_size})"):
-            stats = await run_tasks_grouped(
-                tasks=[task],
-                agent_class=agent_class,
-                agent_config=agent_config,
-                group_size=group_size,
-                max_parallel_episodes=48,  # Same as RL default
-                max_steps=max_steps,
-                verbose=verbose,
-            )
+        stats = await run_tasks_grouped(
+            tasks=[task],
+            agent_class=agent_class,
+            agent_config=agent_config,
+            group_size=group_size,
+            max_parallel_episodes=48,  # Same as RL default
+            max_steps=max_steps,
+            verbose=verbose,
+        )
         # Display results
         display_group_statistics(stats, show_details=True)
@@ -499,7 +498,7 @@ async def run_full_dataset(
             )
         # Display results
-        display_group_statistics(stats, show_details=len(stats) <= 20)
+        display_group_statistics(stats, show_details=len(stats) <= 50)
         # Return stats for consistency with other modes
         return stats

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/flows/tasks.py RENAMED Viewed

@@ -212,17 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
     # Check if tasks already have remote URLs
     already_remote = _validate_tasks(tasks)
-    # If tasks already reference a remote MCP URL, do not require a local environment
-    # or attempt any image updates. Use the dataset as-is.
-    if already_remote:
-        return str(tasks_path)
     # Extract existing images from tasks
     existing_images = _extract_existing_images(tasks)
     # Locate environment
     env_dir = find_environment_dir(tasks_path)
     if not env_dir:
+        if already_remote:
+            return str(tasks_path)
         hud_console.error("Could not locate an environment directory (Dockerfile + pyproject.toml)")
         hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
         raise typer.Exit(1)
@@ -373,6 +370,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
             item["system_prompt"] = t.system_prompt
         if t.metadata:
             item["metadata"] = t.metadata
+        if t.id is not None:
+            item["id"] = t.id
         tasks_payload.append(item)

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/__init__.py RENAMED Viewed

@@ -78,6 +78,11 @@ def rl_command(
         "-y",
         help="Auto-accept all prompts and use defaults (lazy mode)",
     ),
+    vllm_gpu_count: int = typer.Option(
+        None,
+        "--vllm-gpu-count",
+        help="Number of GPUs for vLLM server",
+    ),
     skip_vllm_startup: bool = typer.Option(
         False,
         "--skip-vllm-startup",
@@ -145,6 +150,7 @@ def rl_command(
                 model=model,
                 config_file=config_file,
                 output_dir=output_dir,
+                vllm_gpu_count=vllm_gpu_count,
                 yes=yes,
             )
             return

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/config.py RENAMED Viewed

@@ -84,7 +84,7 @@ def save_config(config: Config, path: Path) -> None:
     """Save configuration to a JSON file."""
     config_dict = config.to_dict()
-    with open(path, "w") as f:
+    with open(path, "w", encoding="utf-8") as f:
         json.dump(config_dict, f, indent=2)
         f.write("\n")  # Add newline at end of file
@@ -94,7 +94,7 @@ def save_config(config: Config, path: Path) -> None:
 def load_config(path: Path) -> Config:
     """Load configuration from a JSON file."""
-    with open(path) as f:
+    with open(path, encoding="utf-8") as f:
         data = json.load(f)
     # Use Config.from_dict which handles missing fields gracefully

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/gpu_utils.py RENAMED Viewed

@@ -245,10 +245,12 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
     # Apply scaling rule
     if num_gpus == 1:
         # Special case: 2 groups for single GPU
+        groups_per_gpu = 2
         config.training.batch_size = 2 * group_size
     else:
-        # Multi-GPU: each GPU processes 1 group
-        config.training.batch_size = num_gpus * group_size
+        groups_per_gpu = config.training.batch_size // group_size
+        # Multi-GPU: each GPU processes groups_per_gpu groups
+        config.training.batch_size = num_gpus * group_size * groups_per_gpu
     # Update max_parallel_episodes to match
     config.actor.max_parallel_episodes = config.training.batch_size
@@ -263,7 +265,7 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
         f"\n[cyan]📊 Adjusted batch_size to {config.training.batch_size} ({config.training.batch_size // group_size} groups)[/cyan]"  # noqa: E501
     )
     console.print(
-        f"[cyan]   Each of the {num_gpus} GPU(s) will process {config.training.batch_size // group_size // num_gpus} group(s) in parallel[/cyan]"  # noqa: E501
+        f"[cyan]   Each of the {num_gpus} GPU(s) will process {groups_per_gpu} group(s) in parallel[/cyan]"  # noqa: E501
     )
     return config

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/remote_runner.py RENAMED Viewed

@@ -32,7 +32,9 @@ GPU_PRICING = {
 }
-def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int = 600) -> None:
+def ensure_vllm_deployed(
+    model_name: str, gpu_type: str = "A100", gpu_count: int = 1, timeout: int = 600
+) -> None:
     """Deploy vLLM for a model if needed and wait until it's ready.
     Args:
@@ -47,7 +49,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
         return
     hud_console.info(f"Deploying vLLM server for {model_name}...")
-    rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
+    rl_api.deploy_vllm(model_name, gpu_type=gpu_type, gpu_count=gpu_count)
     hud_console.success("vLLM deployment started")
     hud_console.info("Waiting for vLLM server to be ready...")
@@ -72,6 +74,7 @@ def run_remote_training(
     model: str | None,
     config_file: Path | None,
     output_dir: str,
+    vllm_gpu_count: int = 1,
     yes: bool = False,
 ) -> None:
     """Run RL training remotely via the API server following the new interactive flow."""
@@ -183,14 +186,18 @@ def run_remote_training(
             # Ask for model type
             if yes:
-                model_type = "Qwen/Qwen2.5-VL-3B-Instruct"  # Default model in yes mode
+                if config_file:
+                    config = load_config(config_file)
+                    model_type = config.model.base_model
+                else:
+                    model_type = "Qwen/Qwen2.5-VL-3B-Instruct"
                 hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
             else:
                 model_type = hud_console.select(
                     "Select base model type:",
                     choices=[
                         {"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
-                        # {"name": "Qwen2.5-VL-7B-Instruct", "value": "Qwen/Qwen2.5-VL-7B-Instruct"}, # noqa: E501
+                        {"name": "Qwen2.5-3B-Instruct", "value": "Qwen/Qwen2.5-3B-Instruct"},
                     ],
                     default=0,
                 )
@@ -218,7 +225,7 @@ def run_remote_training(
             try:
                 rl_api.create_model(model_name, model_type)
                 hud_console.success(f"Created model: {model_name}")
-                ensure_vllm_deployed(model_name, gpu_type="A100")
+                ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
             except Exception as e:
                 # If the name already exists, suggest a new name and prompt once
@@ -247,7 +254,7 @@ def run_remote_training(
                         rl_api.create_model(chosen, model_type)
                         hud_console.success(f"Created model: {chosen}")
                         model_name = chosen
-                        ensure_vllm_deployed(model_name, gpu_type="A100")
+                        ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
                     except Exception as e2:
                         hud_console.error(f"Failed to create model: {e2}")
                         raise
@@ -281,7 +288,7 @@ def run_remote_training(
                     return
             # Ensure vLLM is deployed
-            ensure_vllm_deployed(model_name, gpu_type="A100")
+            ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
     except KeyboardInterrupt:
         hud_console.dim_info("Training cancelled", "")
         return
@@ -323,7 +330,7 @@ def run_remote_training(
             )
         if yes:
-            num_gpus = 2 # Default to 2 GPUs in yes mode
+            num_gpus = 2  # Default to 2 GPUs in yes mode
             hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
         else:
             num_gpus = hud_console.select(
@@ -425,10 +432,12 @@ def run_remote_training(
         # Load provided config
         hud_console.info(f"Loading configuration from: {config_file}")
         config = load_config(config_file)
-        config_dict = config.to_dict()
         gpu_choice = config.training.gpu_type
         num_gpus = config.training.num_gpus
+        config = adjust_config_for_ddp(config, int(num_gpus))
+        config_dict = config.to_dict()
     # Launch training
     try:
         # Little celebration before launching

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/rl_api.py RENAMED Viewed

@@ -61,12 +61,12 @@ def list_models() -> list[RLModelInfo]:
     ]
-def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
+def deploy_vllm(model_name: str, gpu_type: str = "A100", gpu_count: int = 1) -> dict[str, Any]:
     """Deploy a vLLM server for a model."""
     return make_request_sync(
         method="POST",
         url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
-        json={"gpu_type": gpu_type},
+        json={"gpu_type": gpu_type, "gpu_count": gpu_count},
         api_key=settings.api_key,
     )

{hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/environment.py RENAMED Viewed

@@ -127,8 +127,4 @@ def is_environment_directory(path: str | Path) -> bool:
         return False
     # Must have pyproject.toml
-    if not (dir_path / "pyproject.toml").exists():
-        hud_console.error("pyproject.toml not found")
-        return False
-    return True
+    return (dir_path / "pyproject.toml").exists()

{hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/config.py RENAMED Viewed

@@ -13,6 +13,7 @@ SUPPORTED_MODELS = [
     "Qwen/Qwen2.5-VL-32B-Instruct",
     "Qwen/Qwen2.5-VL-72B-Instruct",
     "Qwen/Qwen2.5-7B-Instruct",
+    "Qwen/Qwen2.5-3B-Instruct",
 ]
@@ -39,9 +40,9 @@ class ModelConfig:
     """Model and LoRA configuration."""
     base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
-    lora_r: int = 8
-    lora_alpha: int = 16
-    lora_dropout: float = 0.05
+    lora_r: int = 16
+    lora_alpha: int = 32
+    lora_dropout: float = 0.1
     target_modules: tuple[str, ...] = (
         "q_proj",
         "k_proj",
@@ -61,6 +62,7 @@ class ModelConfig:
 @dataclass
 class TrainingConfig:
     """Training hyperparameters."""
     # GPU parameters
     gpu_type: str = "A100"
     num_gpus: int = 2
@@ -71,9 +73,9 @@ class TrainingConfig:
     save_every_batches: int = 1
     # Batching parameters
-    epochs: int = 2
-    batch_size: int = 24
-    group_size: int = 4
+    epochs: int = 1
+    batch_size: int = 16
+    group_size: int = 8
     mini_batch_size: int = 1
     update_after_group: bool = True  # Whether to update the policy after each task group
     accumulate_over_minibatches: bool = False  # Whether to accumulate over minibatches
@@ -84,7 +86,7 @@ class TrainingConfig:
     leave_one_out: bool = True
     # Replay buffer parameters
-    buffer_steps: int = 4
+    buffer_steps: int = 8
     select_strategy: Literal["recent", "variance", "random"] = "variance"
     # Aggregation parameters
@@ -92,8 +94,8 @@ class TrainingConfig:
     token_agg: Literal["mean", "sum"] = "mean"  # noqa: S105
     # Regularization parameters
-    kl_beta: float = 0.0
-    entropy_beta: float = 0.0
+    kl_beta: float = 0.001
+    entropy_beta: float = 0.001
     top_eps: float = 0.2
     bottom_eps: float = 0.1
@@ -143,6 +145,7 @@ class Config:
     job_id: str | None = None  # Use existing job ID if provided
     stats_interval: int = 1
     verbose: bool = False
+    very_verbose: bool = False
     # Paths
     out_dir: str = "./checkpoints"
@@ -166,6 +169,7 @@ class Config:
             job_id=d.get("job_id"),
             stats_interval=d.get("stats_interval", 1),
             verbose=d.get("verbose", False),
+            very_verbose=d.get("very_verbose", False),
             out_dir=d.get("out_dir", "./checkpoints"),
             adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
             seed=d.get("seed", 1234),
@@ -181,6 +185,7 @@ class Config:
             "job_id": self.job_id,
             "stats_interval": self.stats_interval,
             "verbose": self.verbose,
+            "very_verbose": self.very_verbose,
             "out_dir": self.out_dir,
             "adapter_prefix": self.adapter_prefix,
             "seed": self.seed,

{hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/train.py RENAMED Viewed

@@ -56,6 +56,10 @@ async def train(config: Config, tasks: list[Task]) -> None:
         logging.basicConfig(level=logging.INFO)
         # Remove httpx logger
         logging.getLogger("httpx").setLevel(logging.WARNING)
+    if config.very_verbose:
+        logging.basicConfig(level=logging.DEBUG)
+        # Remove httpx logger
+        logging.getLogger("httpx").setLevel(logging.INFO)
     if is_main_process():
         hud_console.header("Starting GRPO Training")
@@ -103,10 +107,9 @@ async def train(config: Config, tasks: list[Task]) -> None:
     if is_main_process():
         hud_console.info(f"Creating job with config.job_id: {config.job_id}")
         job_obj = hud.create_job(
-            job_id=config.job_id, name=config.job_name, metadata={
-                "config": config.to_dict(),
-                "agent_class": config.model.base_model
-            }
+            job_id=config.job_id,
+            name=config.job_name,
+            metadata={"config": config.to_dict(), "agent_class": config.model.base_model},
         )
         hud_console.info(f"Created job with job_obj.id: {job_obj.id}")
         job_obj.update_status_sync("running")
@@ -299,7 +302,7 @@ async def main() -> None:
     # Load config
     if args.config:
-        with open(args.config) as f:  # noqa: ASYNC230
+        with open(args.config, encoding="utf-8") as f:  # noqa: ASYNC230
             config_dict = json.load(f)
         config = Config.from_dict(config_dict)
     else:
@@ -337,7 +340,7 @@ async def main() -> None:
     # Load tasks
     if args.tasks_json:
         # Tasks provided as JSON list via command line
-        tasks = load_tasks(args.tasks_jso)
+        tasks = load_tasks(args.tasks_json)
     elif args.tasks:
         # Tasks provided as file path or HuggingFace dataset
         tasks = load_tasks(args.tasks)

{hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/group_eval.py RENAMED Viewed

@@ -189,7 +189,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
     hud_console.info(f"Overall mean reward: {overall_mean:.3f} ± {overall_std:.3f}")
     # Detailed table
-    if show_details and len(stats) <= 20:  # Only show for reasonable dataset sizes
+    if show_details and len(stats) <= 50:  # Only show for reasonable dataset sizes
         table = Table(title="\nPer-Task Performance Distribution")
         table.add_column("Task", style="cyan", no_wrap=True)
         table.add_column("Mean±Std", justify="right", style="green")
@@ -216,7 +216,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
     # High variance tasks
     high_variance_tasks = [s for s in stats if s["std_reward"] > 0.3 and s["group_size"] > 1]
     if high_variance_tasks:
-        hud_console.warning(f"\n⚠️  {len(high_variance_tasks)} tasks show high variance (std > 0.3)")
+        hud_console.warning(f"\n{len(high_variance_tasks)} tasks show high variance (std > 0.3)")
         for task in high_variance_tasks[:3]:
             hud_console.info(
                 f"  • {task['task_id']}: μ={task['mean_reward']:.3f}, σ={task['std_reward']:.3f}"  # noqa: RUF001

{hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tasks.py RENAMED Viewed

@@ -40,7 +40,7 @@ def load_tasks(tasks_input: str | list[dict], *, raw: bool = False) -> list[Task
         if Path(tasks_input).exists():
             file_path = Path(tasks_input)
-            with open(file_path) as f:
+            with open(file_path, encoding="utf-8") as f:
                 # Handle JSON files (array of tasks)
                 if file_path.suffix.lower() == ".json":
                     data = json.load(f)

{hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/test_version.py RENAMED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.4.42"
+    assert hud.__version__ == "0.4.43"

{hud_python-0.4.42 → hud_python-0.4.43}/hud/version.py RENAMED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.4.42"
+__version__ = "0.4.43"

{hud_python-0.4.42 → hud_python-0.4.43}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hud-python"
-version = "0.4.42"
+version = "0.4.43"
 description = "SDK for the HUD platform."
 readme = "README.md"
 requires-python = ">=3.11, <3.13"