PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/rl/utils.py DELETED Viewed

@@ -1,524 +0,0 @@
-"""Utility functions for RL training."""
-from __future__ import annotations
-import base64
-import io
-import logging
-import os
-import random
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-import numpy as np
-import torch
-from PIL import Image
-from transformers.utils.chat_template_utils import render_jinja_template
-from hud.utils.hud_console import HUDConsole
-from .types import TrainingSample
-if TYPE_CHECKING:
-    from hud.types import Trace
-    from .config import Config
-logger = logging.getLogger(__name__)
-hud_console = HUDConsole(logger)
-def set_seed(seed: int) -> None:
-    """Set random seeds for reproducibility."""
-    random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(seed)
-def load_chat_template(path: str) -> str:
-    """Load chat template from file."""
-    with open(path) as f:
-        return f.read()
-def ensure_dir(path: str) -> None:
-    """Create directory if it doesn't exist."""
-    os.makedirs(path, exist_ok=True)
-def get_memory_usage() -> float:
-    if torch.cuda.is_available():
-        torch.cuda.synchronize()
-        return torch.cuda.memory_allocated() / 1024**3
-    return 0.0
-def get_gpu_utilization() -> float:
-    """Get current GPU utilization percentage (0-100)."""
-    if not torch.cuda.is_available():
-        return 0.0
-    try:
-        import nvidia_ml_py as nvml  # type: ignore
-        nvml.nvmlInit()
-        device_id = torch.cuda.current_device()
-        handle = nvml.nvmlDeviceGetHandleByIndex(device_id)
-        util = nvml.nvmlDeviceGetUtilizationRates(handle)
-        return float(util.gpu)
-    except Exception:
-        # Fallback: estimate based on memory usage
-        # This is less accurate but works without nvidia-ml-py
-        return min(100.0, (torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated()) * 100)
-def aggregate_metrics_across_ranks(
-    metrics: Any, metrics_to_aggregate: list[str] | None = None
-) -> None:
-    """Aggregate metrics across all ranks for proper distributed statistics.
-    Args:
-        metrics: TrainingMetrics object to update in-place
-        metrics_to_aggregate: List of metric names to aggregate. If None, aggregates all numeric metrics.
-    This function:
-    1. Gathers metric values from all ranks
-    2. Computes proper mean/std across all GPUs
-    3. Updates the metrics object in-place (only on rank 0)
-    """  # noqa: E501
-    from hud.rl.distributed import get_local_rank, get_world_size, is_main_process
-    if get_world_size() <= 1:
-        return  # Nothing to aggregate in single GPU mode
-    # Default metrics that typically vary across GPUs
-    if metrics_to_aggregate is None:
-        metrics_to_aggregate = [
-            "training_time",
-            "samples_per_second",
-            "gpu_util",
-            "gpu_memory",
-            "grad_norm",
-            # Include core training scalars
-            "loss",
-            "kl",
-            "entropy",
-            "tokens",
-            "policy_ratio",
-        ]
-    # Collect current values from this rank
-    local_values = {}
-    for metric_name in metrics_to_aggregate:
-        if hasattr(metrics, metric_name):
-            metric_obj = getattr(metrics, metric_name)
-            # Get the last value if available, otherwise 0
-            local_values[metric_name] = metric_obj.values[-1] if metric_obj.values else 0.0
-    # Convert to tensor for distributed gathering
-    values_tensor = torch.tensor(
-        list(local_values.values()), device=f"cuda:{get_local_rank()}", dtype=torch.float32
-    )
-    # Gather from all ranks using NCCL-supported all_gather
-    world_size = get_world_size()
-    gather_list = [torch.zeros_like(values_tensor) for _ in range(world_size)]
-    torch.distributed.all_gather(gather_list, values_tensor)
-    # Update metrics on main process only
-    if is_main_process():
-        # Reshape: [num_gpus, num_metrics]
-        all_values = torch.stack(gather_list).cpu().numpy()
-        # Update each metric with aggregated values
-        for i, metric_name in enumerate(local_values.keys()):
-            metric_obj = getattr(metrics, metric_name)
-            gpu_values = all_values[:, i].tolist()
-            # Replace last value with cross-rank mean for reporting
-            if len(metric_obj.values) == 0:
-                metric_obj.values.append(0.0)
-            metric_obj.values[-1] = float(sum(gpu_values) / len(gpu_values))
-            # Recompute mean/std across history using updated last value
-            metric_obj.mean = float(sum(metric_obj.values) / len(metric_obj.values))
-            variance = sum((x - metric_obj.mean) ** 2 for x in metric_obj.values) / len(
-                metric_obj.values
-            )
-            metric_obj.std = float(variance**0.5)
-def b64_to_pil(b64_str: str) -> Image.Image:
-    """Convert base64 string to PIL Image."""
-    return Image.open(io.BytesIO(base64.b64decode(b64_str))).convert("RGB")
-def build_assistant_masks(
-    input_ids: list[list[int]],
-    tokenizer: Any,
-) -> list[list[int]]:
-    """
-    Build assistant masks from token IDs by finding assistant turns.
-    Args:
-        input_ids: List of token sequences
-        tokenizer: Tokenizer to decode tokens and get special token IDs
-        verbose: Whether to print verbose information
-    Returns:
-        List of binary masks indicating assistant tokens
-    """
-    id_im_start = tokenizer.convert_tokens_to_ids("<|im_start|>")
-    id_im_end = tokenizer.convert_tokens_to_ids("<|im_end|>")
-    id_assistant = tokenizer.convert_tokens_to_ids("assistant")
-    assistant_masks: list[list[int]] = []
-    for seq in input_ids:
-        mask = [0] * len(seq)
-        i_tok = 0
-        assistant_turn_count = 0
-        while i_tok < len(seq):
-            # Detect start of assistant turn
-            if (
-                seq[i_tok] == id_im_start
-                and i_tok + 1 < len(seq)
-                and seq[i_tok + 1] == id_assistant
-            ):
-                assistant_turn_count += 1
-                # Skip '<|im_start|>', 'assistant' and possible newline token
-                i_tok += 2
-                # Check for newline after 'assistant'
-                if i_tok < len(seq) and tokenizer.decode([seq[i_tok]]) == "\n":
-                    i_tok += 1
-                # Skip leading spaces after assistant\n
-                while i_tok < len(seq) and tokenizer.decode([seq[i_tok]]).strip() == "":
-                    i_tok += 1
-                assistant_content_start = i_tok
-                # Mark tokens until we hit <|im_end|>
-                content_end = i_tok
-                while i_tok < len(seq) and seq[i_tok] != id_im_end:
-                    content_end = i_tok + 1  # Track last non-<|im_end|> position
-                    mask[i_tok] = 1
-                    i_tok += 1
-                # Remove trailing spaces from the mask
-                while content_end > assistant_content_start:
-                    if (
-                        mask[content_end - 1] == 1
-                        and tokenizer.decode([seq[content_end - 1]]).strip() == ""
-                    ):
-                        mask[content_end - 1] = 0
-                        content_end -= 1
-                    else:
-                        break
-                # Skip the <|im_end|> token
-                i_tok += 1
-            else:
-                i_tok += 1
-        assistant_masks.append(mask)
-    return assistant_masks
-def prepare_conversation_history(
-    conversation_history: list[dict[str, Any]],
-) -> tuple[list[dict[str, Any]], list[Image.Image]]:
-    """Sanitize conversation history to avoid vLLM errors."""
-    sanitized_messages = []
-    images = []
-    for m in conversation_history:
-        if "tool_calls" in m:
-            m = {
-                "role": m["role"],
-                "content": m.get("content", ""),
-                "tool_calls": [
-                    tc.model_dump() if not isinstance(tc, dict) else tc
-                    for tc in m.get("tool_calls", [])
-                ],
-            }
-        elif m.get("role") == "user":
-            user_content = m.get("content", [])
-            for c in user_content:
-                if isinstance(c, dict) and c.get("type") == "image_url":
-                    image_url = c.get("image_url", {})
-                    url = image_url.get("url", "")
-                    if url.startswith("data:image"):
-                        data = url.split(",", 1)[1] if "," in url else url
-                        images.append(b64_to_pil(data))
-                    elif isinstance(data, bytes | bytearray):
-                        images.append(Image.open(io.BytesIO(data)).convert("RGB"))
-                    c = {"type": "image"}
-            m["content"] = user_content
-        sanitized_messages.append(m)
-    return sanitized_messages, images
-def prepare_inputs(trace: Trace, processor: Any) -> dict[str, torch.Tensor]:
-    """
-    Prepare inputs from a trace.
-    Args:
-        trace: Trace to process
-        processor: Model processor
-    Returns:
-        Inputs for the model
-    """
-    if len(trace.messages) == 0:
-        return {}
-    # Get images for current turn
-    conversation, images = prepare_conversation_history(trace.messages)
-    # Get absolute path to chat template
-    chat_template_path = Path(__file__).parent / "chat_template.jinja"
-    # For VL models, processor has a tokenizer attribute; for text models, processor IS tokenizer
-    tokenizer = processor.tokenizer if hasattr(processor, "tokenizer") else processor
-    text_list, _ = render_jinja_template(
-        conversations=[conversation],
-        chat_template=load_chat_template(str(chat_template_path)),
-        tools=trace.info["tool_spec"] if trace.info["tool_spec"] else None,  # mcp_tools
-        return_assistant_tokens_mask=True,
-        **tokenizer.special_tokens_map,
-    )
-    # For text models, don't pass images parameter
-    if hasattr(processor, "tokenizer"):
-        # VL model - processor accepts images
-        inputs = processor(
-            images=images if len(images) > 0 else None,
-            text=text_list,
-            return_offsets_mapping=False,  # we no longer need char offsets
-        )
-    else:
-        # Text model - processor is tokenizer, doesn't accept images
-        inputs = processor(
-            text=text_list,
-            return_offsets_mapping=False,  # we no longer need char offsets
-        )
-    assistant_masks = build_assistant_masks(inputs["input_ids"], tokenizer)
-    mask_tensor = torch.tensor(assistant_masks, dtype=torch.long)
-    # Ensure mask_tensor is 2D before slicing
-    if mask_tensor.dim() == 1:
-        mask_tensor = mask_tensor.unsqueeze(0)
-    # Slice to align with targets [B, T-1]
-    inputs["assistant_mask"] = mask_tensor[:, 1:].bool()
-    # Log amount of assistant tokens, and the first 10 tokens that are non 0, decoded
-    # assistant_batches = render_assistant_tokens(mask_tensor, inputs['input_ids'], processor)
-    inputs.convert_to_tensors(tensor_type="pt")
-    return inputs
-def render_assistant_tokens(
-    mask_tensor: torch.Tensor, input_ids: torch.Tensor, processor: Any
-) -> list[str]:
-    """Render assistant tokens as a list of continuous batches."""
-    # Get the mask as a 1D tensor
-    mask_1d = mask_tensor[0]
-    # Find continuous sequences of non-zero values
-    batches = []
-    start_idx = None
-    for i in range(len(mask_1d)):
-        if mask_1d[i] != 0 and start_idx is None:
-            # Start of a new batch
-            start_idx = i
-        elif mask_1d[i] == 0 and start_idx is not None:
-            # End of current batch
-            # Extract and decode the tokens in this batch
-            batch_token_ids = input_ids[0][start_idx:i].tolist()
-            decoded_batch = processor.decode(batch_token_ids)
-            batches.append(decoded_batch)
-            start_idx = None
-    # Handle case where the last batch extends to the end
-    if start_idx is not None:
-        batch_token_ids = input_ids[0][start_idx:].tolist()
-        decoded_batch = processor.decode(batch_token_ids)
-        batches.append(decoded_batch)
-    return batches
-def entropy_from_logits(logits: torch.Tensor) -> torch.Tensor:
-    """Calculate entropy from logits in a memory-efficient way."""
-    log_probs = torch.nn.functional.log_softmax(logits, dim=-1)
-    entropy = -torch.sum(torch.exp(log_probs) * log_probs, dim=-1)
-    return entropy
-def preprocess_advantages(group: list[Trace], config: Config) -> list[TrainingSample]:
-    """Preprocess a group of traces."""
-    group_size = config.training.group_size
-    if config.training.batch_level == "group":
-        groups = [group[i : i + group_size] for i in range(0, len(group), group_size)]
-    elif config.training.batch_level == "batch":
-        groups = [group]
-    else:
-        raise ValueError(f"Invalid batch level: {config.training.batch_level}")
-    all_samples = []
-    for i, group in enumerate(groups):
-        rewards = np.array([trace.reward for trace in group])
-        mean_reward = np.mean(rewards)
-        std_reward = np.std(rewards)
-        # Calculate advantages
-        samples = [TrainingSample(**trace.model_dump()) for trace in group]
-        for sample, reward in zip(samples, rewards, strict=True):
-            if sample.isError:
-                sample.advantage = torch.Tensor(np.array([0.0]))
-                continue
-            # No std (non-baseline GRPO)
-            if config.training.no_std:
-                advantage_value = reward - mean_reward
-            else:
-                # Avoid division by zero
-                if std_reward < 1e-6:
-                    advantage_value = torch.Tensor(np.array([0.0]))
-                else:
-                    advantage_value = (reward - mean_reward) / std_reward
-            # Leave one out RLOO/LOOP
-            if config.training.leave_one_out:
-                advantage_value = advantage_value * len(group) / (len(group) - 1)
-            sample.advantage = torch.Tensor(np.array([advantage_value]))
-        hud_console.info_log(
-            f"Advantages for group {i} [{mean_reward:.4f} ± {std_reward:.4f}]:"
-            f"{[round(sample.advantage.item(), 4) for sample in samples if sample.advantage is not None]}"  # noqa: E501
-        )
-        all_samples.extend(samples)
-    return all_samples
-def batch_training_samples(samples: list[TrainingSample]) -> list[TrainingSample]:
-    """Create batched model inputs from a list of TrainingSample.
-    Pads token sequences to the maximum length in the list and zero-pads
-    images to the maximum H/W when present. Returns a dictionary of batched
-    tensors suitable for a single forward pass. Keeps assistant_masks for
-    masked scoring.
-    """
-    if not samples:
-        hud_console.warning("No samples to batch.")
-        return []
-    for s in samples:
-        if (
-            "assistant_mask" not in s.inputs
-            or s.inputs["assistant_mask"].sum() == 0
-            or s.advantage == 0.0
-        ) and len(samples) > 1:
-            hud_console.info("Removing sample with zero advantage.")
-            samples.remove(s)
-    if len(samples) == 1:
-        return samples
-    import torch.nn.functional as F
-    new_samples = [TrainingSample()]
-    input_keys_to_expand = ["input_ids", "attention_mask", "assistant_mask"]
-    input_keys_to_cat = ["pixel_values", "image_grid_thw"]
-    updated_inputs: dict[str, list[torch.Tensor]] = {
-        k: [] for k in input_keys_to_expand + input_keys_to_cat
-    }
-    # Sanity check dimensions
-    for s in samples:
-        for k in input_keys_to_expand + input_keys_to_cat:
-            val = s.inputs.get(k)
-            if val is not None:
-                if k in input_keys_to_expand:
-                    if val.dim() == 2 and val.size(0) == 1:
-                        val = val[0]
-                    elif val.dim() != 1:
-                        raise ValueError(f"{k} has unexpected dimensions: {val.shape}")
-                updated_inputs[k].append(val)
-    # Pad 1D sequences to max length
-    max_len = max(t.size(-1) for t in updated_inputs["input_ids"])
-    def pad_1d(x: torch.Tensor, pad_to: int, pad_value: int) -> torch.Tensor:
-        pad = pad_to - x.size(-1)
-        return F.pad(x, (0, pad), value=pad_value) if pad > 0 else x
-    stacked_inputs: dict[str, torch.Tensor] = {}
-    # These are 1D sequences that need padding
-    for k in input_keys_to_expand:
-        if updated_inputs[k]:
-            # assistant_mask is T-1, others are T
-            if k == "assistant_mask":
-                stacked_inputs[k] = torch.stack(
-                    [pad_1d(x, max_len - 1, 0) for x in updated_inputs[k]], dim=0
-                )
-            else:
-                stacked_inputs[k] = torch.stack(
-                    [pad_1d(x, max_len, 0) for x in updated_inputs[k]], dim=0
-                )
-    for k in input_keys_to_cat:
-        if updated_inputs[k]:
-            # pixel_values and image_grid_thw are concatenated across all images from all samples
-            # Shape of pixel_values: (sum of all patches from all images, feature_dim)
-            # Shape of image_grid_thw: (sum of all images, 3)
-            stacked_inputs[k] = torch.cat(updated_inputs[k], dim=0)
-        else:
-            stacked_inputs.pop(k)
-    new_samples[0].inputs = stacked_inputs
-    # Pad logprobs to max length before stacking
-    # old_logprobs and ref_logprobs have shape [seq_len] or [1, seq_len] after gathering
-    def pad_logprobs(logprobs: torch.Tensor | None, max_len: int) -> torch.Tensor:
-        # Always work with 1D tensor, squeeze batch dim if present
-        if logprobs is None:
-            return torch.tensor([float("-inf")], dtype=torch.float32)
-        if logprobs.dim() == 2 and logprobs.size(0) == 1:
-            logprobs = logprobs.squeeze(0)
-        elif logprobs.dim() != 1:
-            raise ValueError(
-                f"Expected logprobs to have 1 or 2 dimensions, got {logprobs.dim()} with shape {logprobs.shape}"  # noqa: E501
-            )
-        # Now logprobs is [seq_len]
-        seq_len = logprobs.size(0) if logprobs is not None else 0
-        if seq_len < max_len:
-            pad_size = max_len - seq_len
-            # Pad with -inf (log of 0 probability) along sequence dimension
-            return F.pad(logprobs, (0, pad_size), value=float("-inf"))
-        return logprobs
-    # Stack padded logprobs (these are T-1 length)
-    old_logprobs_list = [pad_logprobs(s.old_logprobs, max_len - 1) for s in samples]
-    ref_logprobs_list = [pad_logprobs(s.ref_logprobs, max_len - 1) for s in samples]
-    new_samples[0].old_logprobs = torch.stack(old_logprobs_list, dim=0)
-    new_samples[0].ref_logprobs = torch.stack(ref_logprobs_list, dim=0)
-    # Stack advantages, checking for None values
-    advantages = [s.advantage for s in samples]
-    if any(adv is None for adv in advantages):
-        raise ValueError(
-            "Some samples have None advantages. Make sure advantages are computed before batching."
-        )
-    new_samples[0].advantage = torch.stack(advantages, dim=0)  # type: ignore
-    return new_samples

hud/rl/vllm_adapter.py DELETED Viewed

@@ -1,143 +0,0 @@
-"""vLLM adapter management for LoRA hot-swapping."""
-from __future__ import annotations
-import json
-import logging
-import requests
-from hud.utils.hud_console import HUDConsole
-hud_console = HUDConsole(logging.getLogger(__name__))
-class VLLMAdapter:
-    """Manages LoRA adapter loading/unloading in vLLM."""
-    def __init__(self, base_url: str, api_key: str) -> None:
-        self.base_url = base_url
-        self.api_key = api_key
-        self.current_adapter = None
-    def load_adapter(self, adapter_name: str, adapter_path: str, timeout: int = 30) -> bool:
-        """
-        Hot-load a LoRA adapter to vLLM.
-        Args:
-            adapter_name: Name to register the adapter as
-            adapter_path: Path to the adapter checkpoint
-            timeout: Request timeout in seconds
-        Returns:
-            True if successful, False otherwise
-        """
-        url = f"{self.base_url}/load_lora_adapter"
-        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
-        payload = {"lora_name": adapter_name, "lora_path": adapter_path}
-        # Implement exponential backoff for retrying the adapter load request.
-        max_retries = 8
-        backoff_factor = 2
-        delay = 1  # initial delay in seconds
-        for attempt in range(1, max_retries + 1):
-            try:
-                response = requests.post(
-                    url, headers=headers, data=json.dumps(payload), timeout=timeout
-                )
-                response.raise_for_status()
-                self.current_adapter = adapter_name
-                hud_console.info(f"[VLLMAdapter] Loaded adapter: {adapter_name}")
-                return True
-            except requests.exceptions.RequestException as e:
-                if attempt == max_retries:
-                    hud_console.error(
-                        f"[VLLMAdapter] Failed to load adapter {adapter_name} after {attempt} attempts: {e}"  # noqa: E501
-                    )
-                    return False
-                else:
-                    hud_console.warning(
-                        f"[VLLMAdapter] Load adapter {adapter_name} failed (attempt {attempt}/{max_retries}): {e}. Retrying in {delay} seconds...",  # noqa: E501
-                    )
-                    import time
-                    time.sleep(delay)
-                    delay *= backoff_factor
-        return False
-    def unload_adapter(self, adapter_name: str) -> bool:
-        """
-        Unload a LoRA adapter from vLLM.
-        Args:
-            adapter_name: Name of the adapter to unload
-        Returns:
-            True if successful, False otherwise
-        """
-        url = f"{self.base_url}/unload_lora_adapter"
-        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
-        payload = {"lora_name": adapter_name}
-        try:
-            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
-            response.raise_for_status()
-            if self.current_adapter == adapter_name:
-                self.current_adapter = None
-            hud_console.info(f"[VLLMAdapter] Unloaded adapter: {adapter_name}")
-            return True
-        except requests.exceptions.RequestException as e:
-            hud_console.error(f"[VLLMAdapter] Failed to unload adapter {adapter_name}: {e}")
-            return False
-    def list_adapters(self) -> list | None:
-        """
-        List all loaded LoRA adapters in vLLM.
-        Returns:
-            List of adapter names, or None if failed
-        """
-        url = f"{self.base_url}/list_lora_adapters"
-        headers = {"Authorization": f"Bearer {self.api_key}"}
-        try:
-            response = requests.get(url, headers=headers, timeout=10)
-            response.raise_for_status()
-            return response.json().get("adapters", [])
-        except requests.exceptions.RequestException as e:
-            hud_console.error(f"[VLLMAdapter] Failed to list adapters: {e}")
-            return None
-    def get_current(self) -> str | None:
-        """Get the name of the currently loaded adapter."""
-        return self.current_adapter
-# Convenience function for standalone use
-def hotload_lora(
-    adapter_name: str,
-    adapter_path: str,
-    base_url: str = "http://localhost:8000/v1",
-    api_key: str = "token-abc123",
-) -> bool:
-    """
-    Quick function to hot-load a LoRA adapter.
-    Args:
-        adapter_name: Name for the adapter
-        adapter_path: Path to adapter checkpoint
-        base_url: vLLM server URL
-        api_key: API key for vLLM
-    Returns:
-        True if successful
-    """
-    adapter = VLLMAdapter(base_url, api_key)
-    return adapter.load_adapter(adapter_name, adapter_path)

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl