PyPI - hud-python - Versions diffs - 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl - Mend

hud-python 0.4.28py3-none-any.whl → 0.4.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (77) hide show

hud/__init__.py +2 -1
hud/agents/base.py +81 -45
hud/agents/claude.py +8 -4
hud/agents/openai_chat_generic.py +66 -40
hud/agents/tests/test_base.py +0 -4
hud/agents/tests/test_openai.py +1 -1
hud/cli/__init__.py +182 -52
hud/cli/dev.py +8 -9
hud/cli/eval.py +317 -119
hud/cli/flows/__init__.py +0 -0
hud/cli/flows/tasks.py +0 -0
hud/cli/get.py +160 -0
hud/cli/rl/__init__.py +567 -71
hud/cli/rl/config.py +94 -0
hud/cli/rl/display.py +133 -0
hud/cli/rl/gpu.py +63 -0
hud/cli/rl/gpu_utils.py +318 -0
hud/cli/rl/presets.py +96 -0
hud/cli/rl/remote_runner.py +347 -0
hud/cli/rl/rl_api.py +150 -0
hud/cli/rl/vllm.py +177 -0
hud/cli/tests/test_analyze_metadata.py +0 -1
hud/cli/utils/tasks.py +26 -0
hud/clients/base.py +21 -23
hud/clients/mcp_use.py +36 -44
hud/clients/tests/test_mcp_use_retry.py +10 -10
hud/datasets/__init__.py +4 -3
hud/datasets/{execution/parallel.py → parallel.py} +1 -1
hud/datasets/{execution/runner.py → runner.py} +1 -1
hud/datasets/utils.py +1 -1
hud/native/comparator.py +6 -6
hud/native/tests/test_comparator.py +8 -8
hud/native/tests/test_native_init.py +13 -11
hud/otel/config.py +1 -1
hud/otel/instrumentation.py +35 -0
hud/rl/README.md +30 -0
hud/rl/__init__.py +1 -0
hud/rl/actor.py +174 -0
hud/rl/buffer.py +371 -0
hud/rl/chat_template.jinja +101 -0
hud/rl/config.py +184 -0
hud/rl/distributed.py +95 -0
hud/rl/learner.py +589 -0
hud/rl/tests/__init__.py +1 -0
hud/rl/tests/test_learner.py +171 -0
hud/rl/train.py +354 -0
hud/rl/types.py +101 -0
hud/rl/utils/start_vllm_server.sh +30 -0
hud/rl/utils.py +524 -0
hud/rl/vllm_adapter.py +125 -0
hud/settings.py +6 -0
hud/telemetry/__init__.py +2 -1
hud/telemetry/job.py +46 -3
hud/telemetry/tests/test_trace.py +3 -3
hud/telemetry/trace.py +85 -13
hud/tools/tests/test_computer.py +3 -3
hud/tools/tests/test_computer_actions.py +1 -1
hud/types.py +123 -2
hud/utils/group_eval.py +223 -0
hud/utils/hud_console.py +113 -13
hud/utils/tasks.py +119 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/METADATA +20 -2
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/RECORD +68 -48
hud/cli/hf.py +0 -406
hud/cli/rl/README.md +0 -243
hud/cli/rl/init.py +0 -370
hud/cli/rl/pod.py +0 -501
hud/cli/rl/ssh.py +0 -322
hud/cli/rl/train.py +0 -562
hud/cli/rl/utils.py +0 -165
hud/datasets/execution/__init__.py +0 -13
hud/datasets/task.py +0 -116
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/WHEEL +0 -0
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/licenses/LICENSE +0 -0

hud/cli/get.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""Get command for downloading HuggingFace datasets."""
+from __future__ import annotations
+import contextlib
+import json
+from pathlib import Path
+import typer
+from datasets import Dataset
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn
+console = Console()
+def get_command(
+    dataset_name: str = typer.Argument(
+        ..., help="HuggingFace dataset name (e.g., 'hud-evals/browser-2048-tasks')"
+    ),
+    split: str = typer.Option(
+        "train", "--split", "-s", help="Dataset split to download (train/test/validation)"
+    ),
+    output: Path | None = typer.Option(  # noqa: B008
+        None, "--output", "-o", help="Output filename (defaults to dataset_name.jsonl)"
+    ),
+    format: str | None = typer.Option(
+        "json",
+        "--format",
+        "-f",
+        help="Output format: json (list) or jsonl (one task per line)",
+    ),
+    limit: int | None = typer.Option(
+        None, "--limit", "-l", help="Limit number of examples to download"
+    ),
+) -> None:
+    """Download a HuggingFace dataset and save it as JSON (list) or JSONL."""
+    console.print(f"\n[cyan]📥 Downloading dataset: {dataset_name}[/cyan]")
+    # Import datasets library
+    try:
+        from datasets import load_dataset
+    except ImportError as e:
+        console.print("[red]Error: datasets library not installed[/red]")
+        console.print("[yellow]Install with: pip install datasets[/yellow]")
+        raise typer.Exit(1) from e
+    # Determine output filename
+    if output is None:
+        # Convert dataset name to filename (e.g., "hud-evals/browser-2048" -> "browser-2048.json|jsonl") # noqa: E501
+        if format is None:
+            format = "json"
+        ext = ".json" if format.lower() == "json" else ".jsonl"
+        dataset_filename = dataset_name.split("/")[-1] + ext
+        output = Path(dataset_filename)
+    # Download dataset with progress
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        transient=True,
+    ) as progress:
+        task = progress.add_task(f"Loading {dataset_name}...", total=None)
+        try:
+            dataset = load_dataset(dataset_name, split=split)
+            progress.update(task, completed=100)
+        except ValueError as e:
+            if "Unknown split" in str(e):
+                console.print(f"[red]Error: Split '{split}' not found in dataset[/red]")
+                console.print("[yellow]Common splits: train, test, validation[/yellow]")
+            else:
+                console.print(f"[red]Error loading dataset: {e}[/red]")
+            raise typer.Exit(1) from e
+        except FileNotFoundError as e:
+            console.print(f"[red]Error: Dataset '{dataset_name}' not found[/red]")
+            console.print("[yellow]Check the dataset name on HuggingFace Hub[/yellow]")
+            raise typer.Exit(1) from e
+        except Exception as e:
+            if "authentication" in str(e).lower() or "401" in str(e):
+                console.print("[red]Error: Dataset requires authentication[/red]")
+                console.print("[yellow]Login with: huggingface-cli login[/yellow]")
+            else:
+                console.print(f"[red]Error loading dataset: {e}[/red]")
+            raise typer.Exit(1) from e
+    if not isinstance(dataset, Dataset):
+        raise typer.Exit(1)
+    # Apply limit if specified
+    if limit:
+        dataset = dataset.select(range(min(limit, len(dataset))))
+        console.print(f"[yellow]Limited to {len(dataset)} examples[/yellow]")
+    # Save as JSON or JSONL
+    console.print(f"[cyan]Writing to {output}...[/cyan]")
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        "[progress.percentage]{task.percentage:>3.0f}%",
+        transient=True,
+    ) as progress:
+        task = progress.add_task("Saving...", total=len(dataset))
+        if format is None:
+            format = "json"
+        if format.lower() == "json":
+            # Write a single JSON array
+            data_list = []
+            for _, example in enumerate(dataset):
+                item = example.to_dict() if hasattr(example, "to_dict") else example  # type: ignore
+                for key, value in item.items():  # type: ignore
+                    with contextlib.suppress(json.JSONDecodeError):
+                        item[key] = json.loads(value)  # type: ignore
+                data_list.append(item)
+                progress.update(task, advance=1)
+            with open(output, "w", encoding="utf-8") as f:
+                json.dump(data_list, f, ensure_ascii=False, indent=2)
+        else:
+            # Write JSONL
+            with open(output, "w", encoding="utf-8") as f:
+                for _, example in enumerate(dataset):
+                    # Convert to dict if needed
+                    if hasattr(example, "to_dict"):
+                        example = example.to_dict()  # type: ignore
+                    for key, value in example.items():  # type: ignore
+                        with contextlib.suppress(json.JSONDecodeError):
+                            example[key] = json.loads(value)  # type: ignore
+                    # Write as JSON line
+                    f.write(json.dumps(example) + "\n")
+                    progress.update(task, advance=1)
+    # Show summary
+    console.print(f"\n[green]✅ Downloaded {len(dataset)} examples to {output}[/green]")
+    # Show sample of fields
+    if len(dataset) > 0:
+        first_example = dataset[0]
+        if hasattr(first_example, "to_dict"):
+            first_example = first_example.to_dict()  # type: ignore
+        console.print("\n[yellow]Dataset fields:[/yellow]")
+        for field in first_example:
+            console.print(f"  • {field}")
+        # Show example if small enough
+        if len(json.dumps(first_example)) < 500:
+            console.print("\n[yellow]First example:[/yellow]")
+            console.print(json.dumps(first_example, indent=2))
+    # Show next steps
+    console.print("\n[dim]Next steps:[/dim]")
+    console.print(f"[dim]• Use for training: hud rl {output}[/dim]")
+    console.print(f"[dim]• Use for evaluation: hud eval {output}[/dim]")
+# Export the command
+__all__ = ["get_command"]

hud-python 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.28py3-none-any.whl → 0.4.30py3-none-any.whl