PyPI - hud-python - Versions diffs - 0.4.27__py3-none-any.whl → 0.4.29__py3-none-any.whl - Mend

hud-python 0.4.27py3-none-any.whl → 0.4.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (76) hide show

hud/__init__.py +2 -1
hud/agents/base.py +73 -45
hud/agents/claude.py +8 -4
hud/agents/openai_chat_generic.py +65 -40
hud/agents/tests/test_base.py +0 -4
hud/agents/tests/test_openai.py +1 -1
hud/cli/__init__.py +182 -52
hud/cli/dev.py +8 -9
hud/cli/eval.py +317 -119
hud/cli/flows/__init__.py +0 -0
hud/cli/flows/tasks.py +0 -0
hud/cli/get.py +160 -0
hud/cli/rl/__init__.py +563 -71
hud/cli/rl/config.py +94 -0
hud/cli/rl/display.py +133 -0
hud/cli/rl/gpu.py +63 -0
hud/cli/rl/gpu_utils.py +318 -0
hud/cli/rl/presets.py +96 -0
hud/cli/rl/remote_runner.py +348 -0
hud/cli/rl/rl_api.py +150 -0
hud/cli/rl/vllm.py +177 -0
hud/cli/tests/test_analyze_metadata.py +0 -1
hud/cli/utils/tasks.py +26 -0
hud/clients/base.py +21 -23
hud/clients/mcp_use.py +36 -44
hud/clients/tests/test_mcp_use_retry.py +10 -10
hud/datasets/__init__.py +4 -3
hud/datasets/{execution/parallel.py → parallel.py} +1 -1
hud/datasets/{execution/runner.py → runner.py} +1 -1
hud/datasets/utils.py +1 -1
hud/native/tests/test_native_init.py +1 -1
hud/otel/config.py +1 -1
hud/otel/instrumentation.py +35 -0
hud/rl/README.md +31 -0
hud/rl/__init__.py +1 -0
hud/rl/actor.py +174 -0
hud/rl/buffer.py +371 -0
hud/rl/chat_template.jinja +101 -0
hud/rl/config.py +184 -0
hud/rl/distributed.py +95 -0
hud/rl/learner.py +586 -0
hud/rl/tests/__init__.py +1 -0
hud/rl/tests/test_learner.py +171 -0
hud/rl/train.py +354 -0
hud/rl/types.py +101 -0
hud/rl/utils/start_vllm_server.sh +30 -0
hud/rl/utils.py +524 -0
hud/rl/vllm_adapter.py +125 -0
hud/settings.py +6 -0
hud/telemetry/__init__.py +2 -1
hud/telemetry/job.py +46 -3
hud/telemetry/tests/test_trace.py +3 -3
hud/telemetry/trace.py +85 -13
hud/tools/computer/hud.py +4 -4
hud/tools/tests/test_computer.py +3 -3
hud/tools/tests/test_computer_actions.py +1 -1
hud/types.py +123 -2
hud/utils/group_eval.py +223 -0
hud/utils/hud_console.py +113 -13
hud/utils/tasks.py +119 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/RECORD +67 -47
hud/cli/hf.py +0 -406
hud/cli/rl/README.md +0 -243
hud/cli/rl/init.py +0 -370
hud/cli/rl/pod.py +0 -501
hud/cli/rl/ssh.py +0 -322
hud/cli/rl/train.py +0 -562
hud/cli/rl/utils.py +0 -165
hud/datasets/execution/__init__.py +0 -13
hud/datasets/task.py +0 -116
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0

hud/cli/rl/presets.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Training configuration presets for different GPU configurations."""
+from __future__ import annotations
+from typing import Any
+def get_training_presets(gpu_memory_gb: float) -> list[dict[str, Any]]:
+    """Get training configuration presets based on GPU memory."""
+    # Time estimates based on provided benchmarks
+    if gpu_memory_gb >= 40:  # A100 40GB or better
+        presets = [
+            {
+                "name": "More Steps",
+                "max_steps_per_episode": 12,
+                "mini_batch_size": 1,
+                "group_size": 4,
+                "batch_size": 8,
+                "max_new_tokens": 256,
+                "tasks_per_hour": 847,
+                "steps_per_hour": 424,
+                "lr": 3e-5,
+                "epochs": 2,
+            },
+            {
+                "name": "Balanced (Recommended)",
+                "max_steps_per_episode": 5,
+                "mini_batch_size": 1,
+                "group_size": 6,
+                "batch_size": 12,
+                "max_new_tokens": 1024,
+                "tasks_per_hour": 738,
+                "steps_per_hour": 415,
+                "lr": 3e-5,
+                "epochs": 2,
+            },
+            {
+                "name": "Low Variance",
+                "max_steps_per_episode": 3,
+                "mini_batch_size": 2,
+                "group_size": 8,
+                "batch_size": 16,
+                "max_new_tokens": 512,
+                "tasks_per_hour": 900,
+                "steps_per_hour": 450,
+                "lr": 3e-5,
+                "epochs": 2,
+            },
+        ]
+    elif gpu_memory_gb >= 24:  # RTX 4090, A10, etc
+        presets = [
+            {
+                "name": "Balanced (Recommended)",
+                "max_steps_per_episode": 4,
+                "mini_batch_size": 1,
+                "group_size": 4,
+                "batch_size": 16,
+                "lr": 1e-4,
+                "epochs": 2,
+            },
+            {
+                "name": "Low Variance",
+                "max_steps_per_episode": 3,
+                "mini_batch_size": 2,
+                "group_size": 4,
+                "batch_size": 16,
+                "lr": 5e-5,
+                "epochs": 2,
+            },
+        ]
+    else:  # Smaller GPUs
+        presets = [
+            {
+                "name": "Test",
+                "max_steps_per_episode": 5,
+                "mini_batch_size": 1,
+                "group_size": 4,
+                "batch_size": 8,
+                "lr": 1e-4,
+                "epochs": 1,
+            },
+        ]
+    return presets
+def estimate_memory_usage(
+    mini_batch_size: int, max_steps: int, max_new_tokens: int, max_pixels: int
+) -> float:
+    """Calculate estimated GPU memory usage using the formula from train.py."""
+    INITIAL_MEMORY = 8.0
+    SCALING_FACTOR = 4 / (28 * 28 * 256 * 1024)
+    token_estimate = mini_batch_size * max_steps * max_new_tokens
+    image_estimate = max_pixels
+    total_memory = INITIAL_MEMORY + SCALING_FACTOR * token_estimate * image_estimate
+    return total_memory

hud/cli/rl/remote_runner.py ADDED Viewed

@@ -0,0 +1,348 @@
+"""
+Remote runner for HUD RL training via API server.
+This module implements the new interactive flow for RL training.
+"""
+from __future__ import annotations
+import os
+import subprocess
+import time
+from pathlib import Path
+from rich.console import Console
+from hud.utils.hud_console import hud_console
+from hud.utils.tasks import load_tasks
+from . import rl_api
+from .config import generate_config_interactive, load_config, save_config
+from .presets import get_training_presets
+console = Console()
+# GPU pricing information
+GPU_PRICING = {
+    "A100": {"price": "1", "memory": "80GB"},
+    "H100": {"price": "2", "memory": "80GB"},
+}
+def run_remote_training(
+    tasks_file: str | None,
+    model: str | None,
+    config_file: Path | None,
+    output_dir: str,
+) -> None:
+    """Run RL training remotely via the API server following the new interactive flow."""
+    from hud.settings import settings
+    if not settings.api_key:
+        hud_console.error("API key not found")
+        console.print("[yellow]Please set HUD_API_KEY environment variable[/yellow]")
+        raise ValueError("API key not found")
+    # Step 1: CONFIRMATION - Load tasks and show example
+    if tasks_file:
+        tasks = load_tasks(tasks_file)
+    else:
+        raise ValueError("Tasks file not found")
+    # Show example task for confirmation
+    hud_console.section_title("Example Task from Dataset")
+    if tasks:
+        # Display task with truncated values
+        task_data = tasks[0].model_dump()
+        truncated_data = {}
+        max_value_length = 120  # Maximum characters to show per line
+        for key, value in task_data.items():
+            value_str = str(value)
+            if len(value_str) > max_value_length:
+                truncated_data[key] = value_str[:max_value_length] + "..."
+            else:
+                truncated_data[key] = value_str
+        hud_console.key_value_table(truncated_data)
+        if not hud_console.confirm("Proceed with training on this dataset?", default=True):
+            hud_console.error("Training cancelled")
+            return
+    # Step 2: MODEL SELECTION
+    hud_console.section_title("Model Selection")
+    # Fetch existing models
+    hud_console.info("Fetching your models from https://app.hud.so/models")
+    try:
+        models = rl_api.list_models()
+        # Filter for active/training models and sort by recency
+        active_models = [m for m in models if m.status in ["ready", "training"]]
+        active_models.sort(key=lambda m: m.created_at or "", reverse=True)
+        if active_models or model is None:
+            # Build choices
+            choices = []
+            for m in active_models:
+                status_emoji = {
+                    "ready": "✅",
+                    "training": "🔄",
+                    "deploying": "🚀",
+                    "pending": "⏳",
+                }.get(m.status, "❓")
+                choices.append({"name": f"{status_emoji} {m.name} ({m.status})", "value": m.name})
+            choices.append({"name": "Create new model", "value": "__new__"})
+            if not model:
+                if choices:
+                    selected = hud_console.select("Select a model:", choices=choices)
+                else:
+                    selected = "__new__"
+                    hud_console.hint("No existing models found. Creating new model...")
+            else:
+                # Model was provided via CLI
+                selected = model
+        else:
+            selected = "__new__"
+        # Handle model selection
+        if selected == "__new__":
+            # Create new model flow
+            hud_console.info("Creating new model...")
+            # Ask for model type
+            model_type = hud_console.select(
+                "Select base model type:",
+                choices=[
+                    {"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
+                    # {"name": "Qwen2.5-VL-7B-Instruct", "value": "Qwen/Qwen2.5-VL-7B-Instruct"},
+                ],
+                default=0,
+            )
+            from rich.prompt import Prompt
+            # Ask for model name
+            default_name = model_type.split("/")[-1].lower()
+            hud_console.info(f"Enter model name (default: {default_name}):")
+            model_name = Prompt.ask("Model name", default=default_name)
+            model_name = model_name.replace("/", "-").lower()
+            # Create the model
+            hud_console.info(f"Creating model: {model_name}")
+            try:
+                rl_api.create_model(model_name, model_type)
+                hud_console.success(f"Created model: {model_name}")
+                # Deploy vLLM automatically
+                hud_console.info(f"Deploying vLLM server for {model_name}...")
+                rl_api.deploy_vllm(model_name, gpu_type="A100")
+                hud_console.success("vLLM deployment started")
+                # Wait for deployment
+                hud_console.info("Waiting for vLLM server to be ready...")
+                max_wait = 600  # 10 minutes
+                start_time = time.time()
+                with hud_console.progress() as progress:
+                    progress.update(
+                        "Checking deployment status (see live status on https://app.hud.so/models)"
+                    )
+                    while True:
+                        if time.time() - start_time > max_wait:
+                            hud_console.error("Timeout waiting for vLLM deployment")
+                            raise ValueError("vLLM deployment timeout")
+                        model_info = rl_api.get_model(model_name)
+                        if model_info.status == "ready":
+                            hud_console.success(
+                                f"vLLM server ready at http://rl.hud.so/v1/models/{model_name}/vllm"
+                            )
+                            break
+                        time.sleep(5)
+            except Exception as e:
+                hud_console.error(f"Failed to create model: {e}")
+                raise
+        else:
+            # Existing model selected
+            model_name = selected
+            model_info = rl_api.get_model(model_name)
+            # Check if model is in training
+            if model_info.status == "training":
+                if hud_console.confirm(
+                    f"{model_name} is currently training. Stop current training?", default=False
+                ):
+                    hud_console.info(f"Stopping training for {model_name}...")
+                    try:
+                        rl_api.stop_training(model_name)
+                        hud_console.success("Training stopped")
+                    except Exception as e:
+                        hud_console.error(f"Failed to stop training: {e}")
+                        raise
+                else:
+                    hud_console.error("Cannot start new training while model is already training")
+                    return
+            # Ensure vLLM is deployed
+            if not model_info.vllm_url:
+                hud_console.info(f"Deploying vLLM server for {model_name}...")
+                rl_api.deploy_vllm(model_name, gpu_type="A100")
+                hud_console.success("vLLM deployment started")
+                # Wait for deployment
+                hud_console.info("Waiting for vLLM server to be ready...")
+                max_wait = 600  # 10 minutes
+                start_time = time.time()
+                with hud_console.progress() as progress:
+                    progress.update(
+                        "Checking deployment status (see live status on https://app.hud.so/models)"
+                    )
+                    while True:
+                        if time.time() - start_time > max_wait:
+                            hud_console.error("Timeout waiting for vLLM deployment")
+                            raise ValueError("vLLM deployment timeout")
+                        model_info = rl_api.get_model(model_name)
+                        if model_info.vllm_url:
+                            hud_console.success(
+                                f"vLLM server ready at http://rl.hud.so/v1/models/{model_name}/vllm"
+                            )
+                            break
+                        time.sleep(5)
+            else:
+                hud_console.success("vLLM server already running")
+    except KeyboardInterrupt:
+        hud_console.dim_info("Training cancelled", "")
+        return
+    except Exception as e:
+        hud_console.error(f"Error during model selection: {e}")
+        raise
+    # Get final model info
+    model_info = rl_api.get_model(model_name)
+    # Step 3: TRAINING CONFIG
+    hud_console.section_title("Training Configuration")
+    if not config_file:
+        # Ask about number of GPUs with pricing
+        # hud_console.info("GPU Selection (Pricing per GPU):")
+        # gpu_table = Table(show_header=True, header_style="bold magenta")
+        # gpu_table.add_column("GPU Type", style="cyan")
+        # gpu_table.add_column("Memory", style="green")
+        # gpu_table.add_column("Price/hr", style="yellow")
+        # for gpu, info in GPU_PRICING.items():
+        #     gpu_table.add_row(gpu, info["memory"], "see pricing on hud.so")
+        # console.print(gpu_table)
+        gpu_choice = hud_console.select(
+            "Select GPU type:",
+            choices=[
+                {"name": "A100 80GB", "value": "A100"},
+                {"name": "H100 80GB", "value": "H100"},
+            ],
+            default=0,
+        )
+        num_gpus = hud_console.select(
+            "Number of GPUs:",
+            choices=[
+                {"name": "1 GPU", "value": 1},
+                {"name": "2 GPUs", "value": 2},
+                {"name": "4 GPUs", "value": 4},
+                {"name": "8 GPUs", "value": 8},
+            ],
+            default=1,
+        )
+        # Generate config with presets
+        hud_console.info("Generating training configuration...")
+        gpu_memory_gb = 80.0 if gpu_choice in ["A100", "H100"] else 48.0
+        presets = get_training_presets(gpu_memory_gb)
+        config, _ = generate_config_interactive(
+            model_name=model_info.base_model,
+            presets=presets,
+        )
+        config.job_name = f"RL {model_name} on {tasks_file}"
+        # Save config for editing
+        temp_config_path = Path(f".rl_config_temp_{model_name}.json")
+        save_config(config, temp_config_path)
+        # Ask to edit config
+        hud_console.info(
+            f"Using training configuration from [underline cyan]{temp_config_path.absolute()}[/underline cyan]"  # noqa: E501
+        )
+        edit_choice = hud_console.select(
+            "Would you like to start training?",
+            choices=[
+                {"name": "🚀 Start training!", "value": "start"},
+                {"name": "✏️  Review configuration", "value": "edit"},
+                {"name": "❌ Cancel", "value": "cancel"},
+            ],
+            default=0,
+        )
+        if edit_choice == "cancel":
+            hud_console.error("Training cancelled")
+            return
+        elif edit_choice == "edit":
+            # Open editor
+            editor = os.environ.get("EDITOR", "nano")
+            hud_console.info(f"Opening {editor} to edit configuration...")
+            try:
+                subprocess.run([editor, str(temp_config_path)], check=True)  # noqa: S603
+                # Reload config
+                config = load_config(temp_config_path)
+                hud_console.success("Configuration updated")
+            except Exception as e:
+                hud_console.error(f"Failed to edit config: {e}")
+                return
+        config_dict = config.to_dict()
+    else:
+        # Load provided config
+        hud_console.info(f"Loading configuration from: {config_file}")
+        config = load_config(config_file)
+        config_dict = config.to_dict()
+        gpu_choice = "A100"  # Default
+        num_gpus = 1  # Default for non-interactive mode
+    # Launch training
+    try:
+        rl_api.launch_training(
+            model_name=model_name,
+            config=config_dict,
+            tasks=[task.model_dump() for task in tasks],
+            gpu_type=gpu_choice,
+            gpu_count=int(num_gpus),
+        )
+        hud_console.success("Training Started Successfully!")
+        hud_console.info(f"See your model {model_name} training on https://app.hud.so/models")
+        hud_console.hint("Launch another training run via: hud rl <tasks_file>")
+        hud_console.hint("Or evaluate the model via: hud eval <tasks_file>")
+    except Exception as e:
+        hud_console.error(f"Failed to launch training: {e}")
+        raise

hud/cli/rl/rl_api.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""
+Direct API functions for HUD RL remote endpoints using shared requests module.
+This module provides functions for interacting with the HUD RL API server.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel
+from hud.settings import settings
+from hud.shared.requests import make_request_sync
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+class RLModelInfo(BaseModel):
+    """Model information from the API."""
+    name: str
+    base_model: str
+    vllm_url: str | None = None
+    trainer_name: str | None = None
+    checkpoint_volume: str | None = None
+    status: str = "pending"  # pending, deploying, ready, training, terminated
+    created_at: str | None = None
+    updated_at: str | None = None
+    terminated_at: str | None = None
+def create_model(name: str, base_model: str) -> dict[str, Any]:
+    """Create a new model."""
+    return make_request_sync(
+        method="POST",
+        url=f"{settings.hud_rl_url}/models",
+        json={"name": name, "base_model": base_model},
+        api_key=settings.api_key,
+    )
+def get_model(name: str) -> RLModelInfo:
+    """Get model information."""
+    response = make_request_sync(
+        method="GET", url=f"{settings.hud_rl_url}/models/{name}", api_key=settings.api_key
+    )
+    return RLModelInfo(**response)
+def list_models() -> list[RLModelInfo]:
+    """List all models."""
+    response = make_request_sync(
+        method="GET", url=f"{settings.hud_rl_url}/models", api_key=settings.api_key
+    )
+    if not isinstance(response, list):
+        response = [response]
+    return [
+        RLModelInfo(**(model if isinstance(model, dict) else model.__dict__)) for model in response
+    ]
+def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
+    """Deploy a vLLM server for a model."""
+    return make_request_sync(
+        method="POST",
+        url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
+        json={"gpu_type": gpu_type},
+        api_key=settings.api_key,
+    )
+def stop_vllm(model_name: str) -> dict[str, Any]:
+    """Stop the vLLM server for a model."""
+    return make_request_sync(
+        method="DELETE",
+        url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
+        api_key=settings.api_key,
+    )
+def stop_training(model_name: str) -> dict[str, Any]:
+    """Stop the training for a model."""
+    return make_request_sync(
+        method="DELETE",
+        url=f"{settings.hud_rl_url}/models/{model_name}/training",
+        api_key=settings.api_key,
+    )
+def launch_training(
+    model_name: str,
+    config: dict[str, Any],
+    tasks: list[dict[str, Any]],
+    gpu_type: str = "A100",
+    gpu_count: int = 1,
+) -> dict[str, Any]:
+    """Launch a training run for a model."""
+    return make_request_sync(
+        method="POST",
+        url=f"{settings.hud_rl_url}/models/{model_name}/training/launch",
+        json={"config": config, "tasks": tasks, "gpu_type": gpu_type, "gpu_count": gpu_count},
+        api_key=settings.api_key,
+    )
+def get_training_status(model_name: str) -> dict[str, Any]:
+    """Get the status of a training run."""
+    return make_request_sync(
+        method="GET",
+        url=f"{settings.hud_rl_url}/models/{model_name}/training/status",
+        api_key=settings.api_key,
+    )
+def get_training_logs(model_name: str, lines: int = 100, follow: bool = False) -> Iterator[str]:
+    """Get training logs for a model.
+    Args:
+        model_name: Name of the model
+        lines: Number of lines to return
+        follow: If True, stream logs as they arrive
+    Yields:
+        Log lines as strings
+    """
+    # For streaming logs, we need to use httpx directly
+    # as the shared requests module expects JSON responses
+    import httpx
+    params = {"lines": lines}
+    if follow:
+        params["follow"] = True
+    headers = {"Authorization": f"Bearer {settings.api_key}"}
+    with (
+        httpx.Client(timeout=300.0) as client,
+        client.stream(
+            "GET",
+            f"{settings.hud_rl_url}/models/{model_name}/training/logs",
+            params=params,
+            headers=headers,
+        ) as response,
+    ):
+        response.raise_for_status()
+        for line in response.iter_lines():
+            if line:
+                yield line

hud-python 0.4.27__py3-none-any.whl → 0.4.29__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.27py3-none-any.whl → 0.4.29py3-none-any.whl