PyPI - hud-python - Versions diffs - 0.4.27__py3-none-any.whl → 0.4.29__py3-none-any.whl - Mend

hud-python 0.4.27py3-none-any.whl → 0.4.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (76) hide show

hud/__init__.py +2 -1
hud/agents/base.py +73 -45
hud/agents/claude.py +8 -4
hud/agents/openai_chat_generic.py +65 -40
hud/agents/tests/test_base.py +0 -4
hud/agents/tests/test_openai.py +1 -1
hud/cli/__init__.py +182 -52
hud/cli/dev.py +8 -9
hud/cli/eval.py +317 -119
hud/cli/flows/__init__.py +0 -0
hud/cli/flows/tasks.py +0 -0
hud/cli/get.py +160 -0
hud/cli/rl/__init__.py +563 -71
hud/cli/rl/config.py +94 -0
hud/cli/rl/display.py +133 -0
hud/cli/rl/gpu.py +63 -0
hud/cli/rl/gpu_utils.py +318 -0
hud/cli/rl/presets.py +96 -0
hud/cli/rl/remote_runner.py +348 -0
hud/cli/rl/rl_api.py +150 -0
hud/cli/rl/vllm.py +177 -0
hud/cli/tests/test_analyze_metadata.py +0 -1
hud/cli/utils/tasks.py +26 -0
hud/clients/base.py +21 -23
hud/clients/mcp_use.py +36 -44
hud/clients/tests/test_mcp_use_retry.py +10 -10
hud/datasets/__init__.py +4 -3
hud/datasets/{execution/parallel.py → parallel.py} +1 -1
hud/datasets/{execution/runner.py → runner.py} +1 -1
hud/datasets/utils.py +1 -1
hud/native/tests/test_native_init.py +1 -1
hud/otel/config.py +1 -1
hud/otel/instrumentation.py +35 -0
hud/rl/README.md +31 -0
hud/rl/__init__.py +1 -0
hud/rl/actor.py +174 -0
hud/rl/buffer.py +371 -0
hud/rl/chat_template.jinja +101 -0
hud/rl/config.py +184 -0
hud/rl/distributed.py +95 -0
hud/rl/learner.py +586 -0
hud/rl/tests/__init__.py +1 -0
hud/rl/tests/test_learner.py +171 -0
hud/rl/train.py +354 -0
hud/rl/types.py +101 -0
hud/rl/utils/start_vllm_server.sh +30 -0
hud/rl/utils.py +524 -0
hud/rl/vllm_adapter.py +125 -0
hud/settings.py +6 -0
hud/telemetry/__init__.py +2 -1
hud/telemetry/job.py +46 -3
hud/telemetry/tests/test_trace.py +3 -3
hud/telemetry/trace.py +85 -13
hud/tools/computer/hud.py +4 -4
hud/tools/tests/test_computer.py +3 -3
hud/tools/tests/test_computer_actions.py +1 -1
hud/types.py +123 -2
hud/utils/group_eval.py +223 -0
hud/utils/hud_console.py +113 -13
hud/utils/tasks.py +119 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/RECORD +67 -47
hud/cli/hf.py +0 -406
hud/cli/rl/README.md +0 -243
hud/cli/rl/init.py +0 -370
hud/cli/rl/pod.py +0 -501
hud/cli/rl/ssh.py +0 -322
hud/cli/rl/train.py +0 -562
hud/cli/rl/utils.py +0 -165
hud/datasets/execution/__init__.py +0 -13
hud/datasets/task.py +0 -116
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0

hud/rl/chat_template.jinja ADDED Viewed

@@ -0,0 +1,101 @@
+{% set image_count = namespace(value=0) %}
+{% set video_count = namespace(value=0) %}
+{{- '<|im_start|>system\n' }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {{ messages[0]['content'] }}
+    {%- else -%}
+        {%- for content in messages[0]['content'] -%}
+            {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+                {%- set image_count.value = image_count.value + 1 -%}
+                {%- if add_vision_id -%}
+                    {{ 'Picture ' ~ image_count.value ~ ': ' }}
+                {%- endif -%}
+                {{ '<|vision_start|><|image_pad|><|vision_end|>' }}
+            {%- elif content['type'] == 'video' or 'video' in content -%}
+                {%- set video_count.value = video_count.value + 1 -%}
+                {%- if add_vision_id -%}
+                    {{ 'Video ' ~ video_count.value ~ ': ' }}
+                {%- endif -%}
+                {{ '<|vision_start|><|video_pad|><|vision_end|>' }}
+            {%- elif 'text' in content -%}
+                {{ content['text'] }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+{%- else -%}
+    {{ 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
+{%- endif -%}
+{%- if tools -%}
+    {{ '\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n' }}
+    {{- tools | map('tojson') | join('\n') -}}
+    {{ '\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call>' }}
+{%- endif -%}
+{{ '<|im_end|>\n' }}
+{%- for message in messages -%}
+    {# Skip the first system message as it was already rendered. #}
+    {%- if loop.first and message.role == 'system' %}{% continue %}{% endif -%}
+    {# Render tool messages. The logic is slightly different with other messages. #}
+    {%- if message['role'] == 'tool' -%}
+        {%- if loop.first or messages[loop.index0 - 1]['role'] != 'tool' -%}
+            {{ '<|im_start|>user' }}
+        {%- endif -%}
+        {{ '\n<tool_response>\n' }}
+    {%- else -%}
+        {{ '<|im_start|>' ~ message['role'] ~ '\n' }}
+    {%- endif -%}
+    {%- if message['content'] is string -%}
+        {{ message['content'] }}
+    {%- else -%}
+        {%- for content in message['content'] -%}
+            {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+                {%- set image_count.value = image_count.value + 1 -%}
+                {%- if add_vision_id -%}
+                    {{ 'Picture ' ~ image_count.value ~ ': ' }}
+                {%- endif -%}
+                {{ '<|vision_start|><|image_pad|><|vision_end|>' }}
+            {%- elif content['type'] == 'video' or 'video' in content -%}
+                {%- set video_count.value = video_count.value + 1 -%}
+                {%- if add_vision_id -%}
+                    {{ 'Video ' ~ video_count.value ~ ': ' }}
+                {%- endif -%}
+                {{ '<|vision_start|><|video_pad|><|vision_end|>' }}
+            {%- elif 'text' in content and message['role'] == 'assistant' -%}
+                {% generation %} {{ content['text'] }} {% endgeneration %}
+            {%- elif 'text' in content -%}
+                {{ content['text'] }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+    {# Render tool_calls in AI messages. #}
+    {%- if message['role'] == 'assistant' and 'tool_calls' in message -%}
+        {# It will be cleaner if I can use some map function and join them with '\n' #}
+        {%- for tool_call in message['tool_calls'] -%}
+            {%- if tool_call['function'] is defined -%}
+                {%- set tool_call = tool_call['function'] -%}
+            {%- endif -%}
+            {# Handle the case where arguments is already a JSON string (OpenAI format) #}
+            {%- if tool_call.arguments is string -%}
+                {% generation %} {{ '<tool_call>\n{"name": "' }}{{ tool_call.name }}{{ '", "arguments": ' }}{{ tool_call.arguments }}{{ '}\n</tool_call>' }} {% endgeneration %}
+            {%- else -%}
+                {% generation %} {{ '<tool_call>\n' }}{{ tool_call | tojson }}{{ '\n</tool_call>' }} {% endgeneration %}
+            {%- endif -%}
+            {%- if not loop.last -%}
+                {% generation %} {{ '\n' }} {% endgeneration %}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- if message['role'] == 'tool' -%}
+        {{ '\n</tool_response>' }}
+        {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' -%}
+            {{ '<|im_end|>\n' }}
+        {%- endif -%}
+    {%- else -%}
+        {{ '<|im_end|>\n' }}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{ '<|im_start|>assistant\n' }}
+{%- endif -%}

hud/rl/config.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""Configuration for RL training."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Literal
+# List of supported VL (Vision-Language) models
+SUPPORTED_MODELS = [
+    "Qwen/Qwen2.5-VL-3B-Instruct",
+    "Qwen/Qwen2.5-VL-7B-Instruct",
+    "Qwen/Qwen2.5-VL-14B-Instruct",
+    "Qwen/Qwen2.5-VL-32B-Instruct",
+    "Qwen/Qwen2.5-VL-72B-Instruct",
+    "Qwen/Qwen2.5-7B-Instruct",
+]
+def validate_vl_model(model_name: str) -> None:
+    """Validate that the model is a supported VL model.
+    Args:
+        model_name: The model name to validate
+    Raises:
+        ValueError: If the model is not a supported VL model
+    """
+    if not any(model_name.startswith(supported) for supported in SUPPORTED_MODELS):
+        raise ValueError(
+            f"Model '{model_name}' is not a supported VL model. "
+            f"Only VL (Vision-Language) models are supported for RL training.\n"
+            f"Supported models: {', '.join(SUPPORTED_MODELS)}\n"
+            f"Note: '{model_name}' appears to be a text-only model."
+        )
+@dataclass
+class ModelConfig:
+    """Model and LoRA configuration."""
+    base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
+    lora_r: int = 8
+    lora_alpha: int = 16
+    lora_dropout: float = 0.05
+    target_modules: tuple[str, ...] = (
+        "q_proj",
+        "k_proj",
+        "v_proj",
+        "o_proj",
+        "gate_proj",
+        "up_proj",
+        "down_proj",
+    )
+    min_pixels: int = 256 * 28 * 28
+    max_pixels: int = 512 * 28 * 28
+    attn_implementation: str = "flash_attention_2"
+    use_liger: bool = True
+    gradient_checkpointing: bool = True
+@dataclass
+class TrainingConfig:
+    """Training hyperparameters."""
+    # Training parameters
+    training_steps: int = 100
+    shuffle_dataset: bool = False
+    save_every_batches: int = 1
+    # Batching parameters
+    epochs: int = 2
+    batch_size: int = 24
+    group_size: int = 4
+    mini_batch_size: int = 1
+    update_after_group: bool = True  # Whether to update the policy after each task group
+    accumulate_over_minibatches: bool = False  # Whether to accumulate over minibatches
+    # Advantage calculation parameters
+    batch_level: Literal["group", "batch"] = "group"
+    no_std: bool = False
+    leave_one_out: bool = True
+    # Replay buffer parameters
+    buffer_steps: int = 4
+    select_strategy: Literal["recent", "variance", "random"] = "variance"
+    # Aggregation parameters
+    ppo_mode: Literal["per_token", "per_trace"] = "per_token"
+    token_agg: Literal["mean", "sum"] = "mean"  # noqa: S105
+    # Regularization parameters
+    kl_beta: float = 0.0
+    entropy_beta: float = 0.0
+    top_eps: float = 0.2
+    bottom_eps: float = 0.1
+    # Training hyperparameters
+    lr: float = 3e-5
+    grad_clip: float = 1.0
+    # Adam hyperparameters
+    use_8bit_optimizer: bool = True
+    adam_betas: tuple[float, float] = (0.9, 0.999)
+    adam_eps: float = 1e-8
+@dataclass
+class ActorConfig:
+    """Actor/episode collection configuration."""
+    # Execution parameters
+    max_steps_per_episode: int = 5
+    max_parallel_episodes: int = 48
+    max_new_tokens: int = 1024
+    force_tool_choice: bool = True
+    allowed_tools: list[str] | None = None
+    # Model parameters
+    temperature: float = 0.7
+    # Hud agent parameters
+    system_prompt: str = "You are an expert agent. Complete the task efficiently."
+    vllm_base_url: str = "http://localhost:8000/v1"
+    vllm_api_key: str = "token-abc123"
+    # Episode execution timeout (seconds)
+    episode_timeout_sec: int = 600
+@dataclass
+class Config:
+    """Main configuration combining all sub-configs."""
+    model: ModelConfig = field(default_factory=ModelConfig)
+    training: TrainingConfig = field(default_factory=TrainingConfig)
+    actor: ActorConfig = field(default_factory=ActorConfig)
+    # Telemetry configuration
+    job_name: str = "RL Training"
+    job_id: str | None = None  # Use existing job ID if provided
+    stats_interval: int = 1
+    verbose: bool = False
+    # Paths
+    out_dir: str = "./checkpoints"
+    adapter_prefix: str = "cua-grpo-step"
+    # Misc
+    seed: int = 1234
+    @classmethod
+    def from_dict(cls, d: dict) -> Config:
+        """Create config from dictionary."""
+        model = ModelConfig(**d.get("model", {}))
+        training = TrainingConfig(**d.get("training", {}))
+        actor = ActorConfig(**d.get("actor", {}))
+        return cls(
+            model=model,
+            training=training,
+            actor=actor,
+            job_name=d.get("job_name", "RL Training"),
+            job_id=d.get("job_id"),
+            stats_interval=d.get("stats_interval", 1),
+            verbose=d.get("verbose", False),
+            out_dir=d.get("out_dir", "./checkpoints"),
+            adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
+            seed=d.get("seed", 1234),
+        )
+    def to_dict(self) -> dict:
+        """Convert config to dictionary."""
+        return {
+            "model": self.model.__dict__,
+            "training": self.training.__dict__,
+            "actor": self.actor.__dict__,
+            "job_name": self.job_name,
+            "job_id": self.job_id,
+            "stats_interval": self.stats_interval,
+            "verbose": self.verbose,
+            "out_dir": self.out_dir,
+            "adapter_prefix": self.adapter_prefix,
+            "seed": self.seed,
+        }

hud/rl/distributed.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Distributed training utilities for GRPO."""
+from __future__ import annotations
+import os
+from typing import Any
+import torch
+import torch.distributed as dist
+def setup_distributed() -> None:
+    """Initialize distributed training environment."""
+    if "RANK" in os.environ and int(os.environ["WORLD_SIZE"]) > 1:
+        # Set device for this process
+        local_rank = int(os.environ["LOCAL_RANK"])
+        torch.cuda.set_device(local_rank)
+        # Initialize process group
+        dist.init_process_group("nccl")
+def get_local_rank() -> int:
+    """Get local rank from environment."""
+    return int(os.environ.get("LOCAL_RANK", 0))
+def get_global_rank() -> int:
+    """Get global rank from environment."""
+    return int(os.environ.get("RANK", 0))
+def get_world_size() -> int:
+    """Get world size from environment."""
+    return int(os.environ.get("WORLD_SIZE", 1))
+def cleanup_distributed() -> None:
+    """Clean up distributed environment."""
+    if dist.is_initialized():
+        dist.destroy_process_group()
+def is_main_process() -> bool:
+    """Check if this is the main process (rank 0)."""
+    if not dist.is_initialized():
+        return True
+    return dist.get_rank() == 0
+def synchronize() -> None:
+    """Synchronize all processes."""
+    if dist.is_initialized():
+        dist.barrier()
+def all_reduce_mean(tensor: torch.Tensor) -> torch.Tensor:
+    """Average a tensor across all processes."""
+    if not dist.is_initialized():
+        return tensor
+    world_size = dist.get_world_size()
+    dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
+    tensor /= world_size
+    return tensor
+def broadcast_object(obj: Any, src: int = 0) -> Any:
+    """Broadcast a Python object from src rank to all ranks."""
+    if not dist.is_initialized():
+        return obj
+    obj_list = [obj] if dist.get_rank() == src else [None]
+    dist.broadcast_object_list(obj_list, src=src)
+    return obj_list[0]
+def gather_tensors(tensor: torch.Tensor) -> list[torch.Tensor] | None:
+    """Gather tensors from all ranks to rank 0.
+    Returns:
+        List of tensors on rank 0, None on other ranks
+    """
+    if not dist.is_initialized():
+        return [tensor]
+    world_size = dist.get_world_size()
+    if dist.get_rank() == 0:
+        gathered = [torch.zeros_like(tensor) for _ in range(world_size)]
+        dist.gather(tensor, gathered, dst=0)
+        return gathered
+    else:
+        dist.gather(tensor, None, dst=0)
+        return None

hud-python 0.4.27__py3-none-any.whl → 0.4.29__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.27py3-none-any.whl → 0.4.29py3-none-any.whl