PyPI - hud-python - Versions diffs - 0.4.43__py3-none-any.whl → 0.4.44__py3-none-any.whl - Mend

hud-python 0.4.43py3-none-any.whl → 0.4.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (10) hide show

hud/rl/distributed.py +34 -1
hud/rl/learner.py +28 -5
hud/rl/train.py +64 -44
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/METADATA +1 -1
{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/RECORD +10 -10
{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/WHEEL +0 -0
{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/licenses/LICENSE +0 -0

hud/rl/distributed.py CHANGED Viewed

@@ -66,7 +66,13 @@ def all_reduce_mean(tensor: torch.Tensor) -> torch.Tensor:
 def broadcast_object(obj: Any, src: int = 0) -> Any:
-    """Broadcast a Python object from src rank to all ranks."""
+    """Broadcast a Python object from src rank to all ranks.
+    Args:
+        obj: Object to broadcast (used on src rank)
+        src: Source rank
+        device: Device for temporary tensor buffer during pickling transfer
+    """
     if not dist.is_initialized():
         return obj
@@ -75,6 +81,33 @@ def broadcast_object(obj: Any, src: int = 0) -> Any:
     return obj_list[0]
+def scatter_object(
+    obj_list: list[Any] | None,
+    src: int = 0,
+) -> Any:
+    """Scatter a list of Python objects from src so each rank receives one object.
+    Usage:
+        - On src rank: pass the full list (length == world_size)
+        - On non-src ranks: pass None
+    Returns:
+        The object intended for this rank.
+    """
+    if not dist.is_initialized():
+        # Single-process: return first element if provided, else None
+        if obj_list is None or len(obj_list) == 0:
+            return None
+        return obj_list[0]
+    out: list[Any] = [None]
+    if dist.get_rank() == src:
+        dist.scatter_object_list(out, obj_list, src=src)
+    else:
+        dist.scatter_object_list(out, None, src=src)
+    return out[0]
 def gather_tensors(tensor: torch.Tensor) -> list[torch.Tensor] | None:
     """Gather tensors from all ranks to rank 0.

hud/rl/learner.py CHANGED Viewed

@@ -240,6 +240,8 @@ class GRPOLearner:
                 if sample.inputs:
                     sample = sample.to_device(self.device)
                     sample.old_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
+                    # Free GPU memory for this sample immediately
+                    sample.to_device(torch.device("cpu"))
             policy_module = self.policy.module if hasattr(self.policy, "module") else self.policy
             with policy_module.disable_adapter():
@@ -247,7 +249,10 @@ class GRPOLearner:
                     if is_main_process():
                         progress.update(f"Processing batch of traces... {i}/{len(batch)}")
                     if sample.inputs:
+                        # Move back to GPU for reference computation, then free
+                        sample = sample.to_device(self.device)
                         sample.ref_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
+                        sample.to_device(torch.device("cpu"))
         hud_console.info_log("Creating mini-batches...")
         group_size = self.config.training.group_size
@@ -488,15 +493,21 @@ class GRPOLearner:
             out = model(**model_inputs)
             logits = out.logits / self.config.actor.temperature
-            log_probs = F.log_softmax(logits, dim=-1)
+            # Compute token log-probs via negative cross-entropy to avoid materializing full log_probs
             targets = inputs["input_ids"][:, 1:]
-            token_log_probs = log_probs[:, :-1].gather(-1, targets.unsqueeze(-1)).squeeze(-1)
+            logits_slice = logits[:, :-1, :]
+            loss_flat = F.cross_entropy(
+                logits_slice.reshape(-1, logits_slice.size(-1)),
+                targets.reshape(-1),
+                reduction="none",
+            )
+            token_log_probs = (-loss_flat).reshape_as(targets)
             # Compute entropy only for assistant tokens to save memory
             assistant_mask = inputs["assistant_mask"]
             entropy = torch.zeros_like(token_log_probs)
-            if assistant_mask.any():
+            if assistant_mask.any() and getattr(self.config.training, "entropy_beta", 0.0) != 0.0:
                 entropy[assistant_mask] = entropy_from_logits(logits[:, :-1][assistant_mask])
             return token_log_probs, entropy
@@ -506,8 +517,20 @@ class GRPOLearner:
             # Return dummy values that match expected shapes
             seq_len = inputs["input_ids"].shape[1] - 1 if "input_ids" in inputs else 0
             batch_size = inputs["input_ids"].shape[0] if "input_ids" in inputs else 1
-            dummy_logprobs = torch.zeros(batch_size, seq_len, device=self.device)
-            dummy_entropy = torch.zeros(batch_size, seq_len, device=self.device)
+            # Create dummy tensors that still participate in autograd so backward doesn't fail
+            try:
+                param_sum = torch.sum(
+                    next(self.policy.parameters())
+                )  # touch params to build a graph
+                base = param_sum * 0.0
+            except StopIteration:
+                base = torch.tensor(0.0, device=self.device)
+            dummy_logprobs = (
+                base + torch.zeros(batch_size, seq_len, device=self.device)
+            ).requires_grad_(True)
+            dummy_entropy = (
+                base + torch.zeros(batch_size, seq_len, device=self.device)
+            ).requires_grad_(True)
             return dummy_logprobs, dummy_entropy
     def save(self, path: str) -> None:

hud/rl/train.py CHANGED Viewed

@@ -13,7 +13,7 @@ import json
 import logging
 from datetime import datetime
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 import hud
 from hud.rl.actor import Actor
@@ -25,6 +25,7 @@ from hud.rl.distributed import (
     get_global_rank,
     get_world_size,
     is_main_process,
+    scatter_object,
     setup_distributed,
     synchronize,
 )
@@ -133,53 +134,71 @@ async def train(config: Config, tasks: list[Task]) -> None:
             global_reward_stats = None
             global_advantage_stats = None
-            # Only rank 0 runs tasks and collects traces
+            # Step-state gate: ensure all ranks branch coherently
+            state = {"ok": False, "err": None, "num_samples": 0}
+            rank_samples = None
+            episode_time_value = None
+            # Only rank 0 runs tasks and prepares distribution
             if is_main_process() and actor is not None:
                 import time
-                episode_start_time = time.time()
-                traces = await actor.run_tasks(tasks, job_id=job_id)
-                episode_time = time.time() - episode_start_time
-                hud_console.info(f"Sampled {len(traces)} traces in {episode_time:.1f}s")
-                trace_buffer.add(traces)
-                global_reward_stats = [trace.reward for trace in traces]
-                # Get all traces from buffer for distribution
-                all_traces = trace_buffer.sample_traces()
-                assert len(traces) == len(all_traces)  # noqa: S101
-                # Preprocess traces to training samples
-                preprocessed_traces = preprocess_advantages(all_traces, config)
-                # Store these for later use in metrics
-                global_advantage_stats = [sample.advantage for sample in preprocessed_traces]
-                # Distribute preprocessed samples in groups across ranks
-                gpu_batch_size = len(preprocessed_traces) // num_gpus
-                rank_samples = [
-                    preprocessed_traces[i : i + gpu_batch_size]
-                    for i in range(0, len(preprocessed_traces), gpu_batch_size)
-                ]
+                try:
+                    episode_start_time = time.time()
+                    traces = await actor.run_tasks(tasks, job_id=job_id)
+                    episode_time = time.time() - episode_start_time
+                    hud_console.info(f"Sampled {len(traces)} traces in {episode_time:.1f}s")
+                    trace_buffer.add(traces)
+                    global_reward_stats = [trace.reward for trace in traces]
+                    # Get all traces from buffer for distribution
+                    all_traces = trace_buffer.sample_traces()
+                    # Preprocess traces to training samples
+                    preprocessed_traces = preprocess_advantages(all_traces, config)
+                    # Store these for later use in metrics
+                    global_advantage_stats = [sample.advantage for sample in preprocessed_traces]
+                    # Distribute preprocessed samples in groups across ranks via scatter
+                    # Ensure list length is a multiple of num_gpus by allowing empty per-rank slices
+                    gpu_batch_size = max(1, (len(preprocessed_traces) + num_gpus - 1) // num_gpus)
+                    rank_samples = [
+                        preprocessed_traces[i : i + gpu_batch_size]
+                        for i in range(0, len(preprocessed_traces), gpu_batch_size)
+                    ]
+                    # Pad rank_samples to exactly num_gpus entries
+                    if len(rank_samples) < num_gpus:
+                        rank_samples.extend([[] for _ in range(num_gpus - len(rank_samples))])
+                    # Log distribution info
+                    dist_msg = (
+                        f"Distributing {len(preprocessed_traces)} samples as {gpu_batch_size} "
+                        f"sized batches across {num_gpus} GPUs"
+                    )
+                    hud_console.info(dist_msg)
+                    for rank in range(num_gpus):
+                        n_samples = len(rank_samples[rank]) if rank < len(rank_samples) else 0
+                        hud_console.info(f"  Rank {rank}: {n_samples} samples")
+                    hud_console.section_title(f"Training on {len(all_traces)} traces")
+                    episode_time_value = episode_time
+                    state.update({"ok": True, "num_samples": len(preprocessed_traces)})
+                except Exception as e:
+                    state.update({"ok": False, "err": str(e)})
+            # Broadcast step-state to keep ranks in lockstep
+            state = broadcast_object(state, src=0)
+            if not state.get("ok", False):
+                hud_console.warning("Step failed on rank 0; skipping this step coherently")
+                synchronize()
+                continue
-                # Log distribution info
-                hud_console.info(
-                    f"Distributing {len(preprocessed_traces)} samples as {gpu_batch_size} sized batches across {num_gpus} GPUs"  # noqa: E501
-                )
-                for rank in range(num_gpus):
-                    n_samples = len(rank_samples[rank])
-                    hud_console.info(f"  Rank {rank}: {n_samples} samples")
-                hud_console.section_title(f"Training on {len(all_traces)} traces")
-                episode_time_value = episode_time
-            else:
-                rank_samples = None
-                episode_time_value = None
-            # Broadcast each rank's samples and episode time
-            rank_samples = broadcast_object(rank_samples, src=0)
+            # Scatter per-rank samples; each rank receives only its slice
+            my_samples = scatter_object(rank_samples if is_main_process() else None, src=0)
+            # Broadcast the episode time (small object)
             episode_time_value = broadcast_object(episode_time_value, src=0)
-            my_samples = rank_samples[get_global_rank()] if rank_samples else []
             # Process only assigned samples
             last_metrics = learner.update(my_samples)
@@ -356,7 +375,8 @@ async def main() -> None:
             )
     # Run training
-    await train(config, tasks)
+    tasks_typed = cast("list[Task]", tasks)
+    await train(config, tasks_typed)
 if __name__ == "__main__":

hud/utils/tests/test_version.py CHANGED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.4.43"
+    assert hud.__version__ == "0.4.44"

hud/version.py CHANGED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.4.43"
+__version__ = "0.4.44"

{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.43
+Version: 0.4.44
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ hud/__init__.py,sha256=JMDFUE1pP0J1Xl_miBdt7ERvoffZmTzSFe8yxz512A8,552
 hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
 hud/settings.py,sha256=disObWa-DgXzoDcCDp3y1dTPaNsbR0IvoMJL9Eg4zyo,3947
 hud/types.py,sha256=pmPj_8emfMIfEY_fRS8NgIJ56kCsolWSqQjyCzXDaGY,11072
-hud/version.py,sha256=AekBbsq3gM3fHm0EOZE1KbYDAcZYlNagJ4ps3KU-byo,105
+hud/version.py,sha256=j-0v9E6ZVwBdP3D1A-70Ie5rXP137HYVUJCZeIwO3_0,105
 hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
 hud/agents/base.py,sha256=_u1zR3gXzZ1RlTCUYdMcvgHqdJBC4-AB1lZt0yBx8lg,35406
 hud/agents/claude.py,sha256=TGhm5gE2ltINDAdEsDxKuT9iGMQ5G87R6kmabU3KPt8,16101
@@ -121,9 +121,9 @@ hud/rl/actor.py,sha256=H6gwRGRY1YpkOyiaJ9yai8yQwcI-Gx0dFxd18jpLx_Q,6950
 hud/rl/buffer.py,sha256=z47HOjOBJx3umUzzUfdtq_N4ZoJ8FMBPkX8YQKBtd3A,15457
 hud/rl/chat_template.jinja,sha256=XTdzI8oFGEcSA-exKxyHaprwRDmX5Am1KEb0VxvUc6U,4965
 hud/rl/config.py,sha256=akQ2a53NX3Dh1UWgMyw7mTxq33eiQbZcBpmKTzd79Xk,5624
-hud/rl/distributed.py,sha256=8avhrb0lHYkhW22Z7MfkqSnlczWj5jMrUMEtkcoCf74,2473
-hud/rl/learner.py,sha256=FKIgIIghsNiDr_g090xokOO_BxNmTSj1O-TSJzIq_Uw,24703
-hud/rl/train.py,sha256=hmobsaGp5UwK8u9oJGFdxCfI7rrxM-XbeVy-TkzQwxU,13804
+hud/rl/distributed.py,sha256=ZIh5GTMuRl_tHV_62iWsYgrV--AylBelp_TZQnhwfy4,3391
+hud/rl/learner.py,sha256=GowGqhWyCMPfrxD9V3KyOdqF0FDeUMUSCA0QPnE1RWE,25855
+hud/rl/train.py,sha256=zO5TVvGWQdYfdhSCOSMaahfBVwcWb0Fxa80LiInx01c,15005
 hud/rl/types.py,sha256=lrLKo7iaqodYth2EyeuOQfLiuzXfYM2eJjPmpObrD7c,3965
 hud/rl/utils.py,sha256=IsgVUUibxnUzb32a4mu1sYrgJC1CwoG9E-Dd5y5VDOA,19115
 hud/rl/vllm_adapter.py,sha256=2wnTfoXPI4C9EzhVxk0GU-ArLjX7hgXS0BndMwN8Ppg,4751
@@ -218,10 +218,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
 hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
 hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
 hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
-hud/utils/tests/test_version.py,sha256=5Kh7gFTdzSIvPh8KSkpP9Rq-4a4rJchabweQncIcYHQ,160
+hud/utils/tests/test_version.py,sha256=B9UhswFSFbHf544swTgKJdq6TMat27bGIzFb8Sy-bKc,160
 hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hud_python-0.4.43.dist-info/METADATA,sha256=qTNrSt6NhfZR1_KzmtIGgZXbvAUZBlsh1xp_1JZMZaU,22275
-hud_python-0.4.43.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hud_python-0.4.43.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
-hud_python-0.4.43.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
-hud_python-0.4.43.dist-info/RECORD,,
+hud_python-0.4.44.dist-info/METADATA,sha256=bjz1T1aLq3yUaoW_Ih9ZQjGD8X-nKRTYmgeggS568LM,22275
+hud_python-0.4.44.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hud_python-0.4.44.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
+hud_python-0.4.44.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
+hud_python-0.4.44.dist-info/RECORD,,

{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/WHEEL RENAMED Viewed

File without changes

{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{hud_python-0.4.43.dist-info → hud_python-0.4.44.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hud-python 0.4.43__py3-none-any.whl → 0.4.44__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.43py3-none-any.whl → 0.4.44py3-none-any.whl