hud-python 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (77) hide show
  1. hud/__init__.py +2 -1
  2. hud/agents/base.py +81 -45
  3. hud/agents/claude.py +8 -4
  4. hud/agents/openai_chat_generic.py +66 -40
  5. hud/agents/tests/test_base.py +0 -4
  6. hud/agents/tests/test_openai.py +1 -1
  7. hud/cli/__init__.py +182 -52
  8. hud/cli/dev.py +8 -9
  9. hud/cli/eval.py +317 -119
  10. hud/cli/flows/__init__.py +0 -0
  11. hud/cli/flows/tasks.py +0 -0
  12. hud/cli/get.py +160 -0
  13. hud/cli/rl/__init__.py +567 -71
  14. hud/cli/rl/config.py +94 -0
  15. hud/cli/rl/display.py +133 -0
  16. hud/cli/rl/gpu.py +63 -0
  17. hud/cli/rl/gpu_utils.py +318 -0
  18. hud/cli/rl/presets.py +96 -0
  19. hud/cli/rl/remote_runner.py +347 -0
  20. hud/cli/rl/rl_api.py +150 -0
  21. hud/cli/rl/vllm.py +177 -0
  22. hud/cli/tests/test_analyze_metadata.py +0 -1
  23. hud/cli/utils/tasks.py +26 -0
  24. hud/clients/base.py +21 -23
  25. hud/clients/mcp_use.py +36 -44
  26. hud/clients/tests/test_mcp_use_retry.py +10 -10
  27. hud/datasets/__init__.py +4 -3
  28. hud/datasets/{execution/parallel.py → parallel.py} +1 -1
  29. hud/datasets/{execution/runner.py → runner.py} +1 -1
  30. hud/datasets/utils.py +1 -1
  31. hud/native/comparator.py +6 -6
  32. hud/native/tests/test_comparator.py +8 -8
  33. hud/native/tests/test_native_init.py +13 -11
  34. hud/otel/config.py +1 -1
  35. hud/otel/instrumentation.py +35 -0
  36. hud/rl/README.md +30 -0
  37. hud/rl/__init__.py +1 -0
  38. hud/rl/actor.py +174 -0
  39. hud/rl/buffer.py +371 -0
  40. hud/rl/chat_template.jinja +101 -0
  41. hud/rl/config.py +184 -0
  42. hud/rl/distributed.py +95 -0
  43. hud/rl/learner.py +589 -0
  44. hud/rl/tests/__init__.py +1 -0
  45. hud/rl/tests/test_learner.py +171 -0
  46. hud/rl/train.py +354 -0
  47. hud/rl/types.py +101 -0
  48. hud/rl/utils/start_vllm_server.sh +30 -0
  49. hud/rl/utils.py +524 -0
  50. hud/rl/vllm_adapter.py +125 -0
  51. hud/settings.py +6 -0
  52. hud/telemetry/__init__.py +2 -1
  53. hud/telemetry/job.py +46 -3
  54. hud/telemetry/tests/test_trace.py +3 -3
  55. hud/telemetry/trace.py +85 -13
  56. hud/tools/tests/test_computer.py +3 -3
  57. hud/tools/tests/test_computer_actions.py +1 -1
  58. hud/types.py +123 -2
  59. hud/utils/group_eval.py +223 -0
  60. hud/utils/hud_console.py +113 -13
  61. hud/utils/tasks.py +119 -0
  62. hud/utils/tests/test_version.py +1 -1
  63. hud/version.py +1 -1
  64. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/METADATA +20 -2
  65. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/RECORD +68 -48
  66. hud/cli/hf.py +0 -406
  67. hud/cli/rl/README.md +0 -243
  68. hud/cli/rl/init.py +0 -370
  69. hud/cli/rl/pod.py +0 -501
  70. hud/cli/rl/ssh.py +0 -322
  71. hud/cli/rl/train.py +0 -562
  72. hud/cli/rl/utils.py +0 -165
  73. hud/datasets/execution/__init__.py +0 -13
  74. hud/datasets/task.py +0 -116
  75. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/WHEEL +0 -0
  76. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/entry_points.txt +0 -0
  77. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/licenses/LICENSE +0 -0
hud/cli/get.py ADDED
@@ -0,0 +1,160 @@
1
+ """Get command for downloading HuggingFace datasets."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import json
7
+ from pathlib import Path
8
+
9
+ import typer
10
+ from datasets import Dataset
11
+ from rich.console import Console
12
+ from rich.progress import Progress, SpinnerColumn, TextColumn
13
+
14
+ console = Console()
15
+
16
+
17
+ def get_command(
18
+ dataset_name: str = typer.Argument(
19
+ ..., help="HuggingFace dataset name (e.g., 'hud-evals/browser-2048-tasks')"
20
+ ),
21
+ split: str = typer.Option(
22
+ "train", "--split", "-s", help="Dataset split to download (train/test/validation)"
23
+ ),
24
+ output: Path | None = typer.Option( # noqa: B008
25
+ None, "--output", "-o", help="Output filename (defaults to dataset_name.jsonl)"
26
+ ),
27
+ format: str | None = typer.Option(
28
+ "json",
29
+ "--format",
30
+ "-f",
31
+ help="Output format: json (list) or jsonl (one task per line)",
32
+ ),
33
+ limit: int | None = typer.Option(
34
+ None, "--limit", "-l", help="Limit number of examples to download"
35
+ ),
36
+ ) -> None:
37
+ """Download a HuggingFace dataset and save it as JSON (list) or JSONL."""
38
+ console.print(f"\n[cyan]📥 Downloading dataset: {dataset_name}[/cyan]")
39
+
40
+ # Import datasets library
41
+ try:
42
+ from datasets import load_dataset
43
+ except ImportError as e:
44
+ console.print("[red]Error: datasets library not installed[/red]")
45
+ console.print("[yellow]Install with: pip install datasets[/yellow]")
46
+ raise typer.Exit(1) from e
47
+
48
+ # Determine output filename
49
+ if output is None:
50
+ # Convert dataset name to filename (e.g., "hud-evals/browser-2048" -> "browser-2048.json|jsonl") # noqa: E501
51
+ if format is None:
52
+ format = "json"
53
+ ext = ".json" if format.lower() == "json" else ".jsonl"
54
+ dataset_filename = dataset_name.split("/")[-1] + ext
55
+ output = Path(dataset_filename)
56
+
57
+ # Download dataset with progress
58
+ with Progress(
59
+ SpinnerColumn(),
60
+ TextColumn("[progress.description]{task.description}"),
61
+ transient=True,
62
+ ) as progress:
63
+ task = progress.add_task(f"Loading {dataset_name}...", total=None)
64
+
65
+ try:
66
+ dataset = load_dataset(dataset_name, split=split)
67
+ progress.update(task, completed=100)
68
+ except ValueError as e:
69
+ if "Unknown split" in str(e):
70
+ console.print(f"[red]Error: Split '{split}' not found in dataset[/red]")
71
+ console.print("[yellow]Common splits: train, test, validation[/yellow]")
72
+ else:
73
+ console.print(f"[red]Error loading dataset: {e}[/red]")
74
+ raise typer.Exit(1) from e
75
+ except FileNotFoundError as e:
76
+ console.print(f"[red]Error: Dataset '{dataset_name}' not found[/red]")
77
+ console.print("[yellow]Check the dataset name on HuggingFace Hub[/yellow]")
78
+ raise typer.Exit(1) from e
79
+ except Exception as e:
80
+ if "authentication" in str(e).lower() or "401" in str(e):
81
+ console.print("[red]Error: Dataset requires authentication[/red]")
82
+ console.print("[yellow]Login with: huggingface-cli login[/yellow]")
83
+ else:
84
+ console.print(f"[red]Error loading dataset: {e}[/red]")
85
+ raise typer.Exit(1) from e
86
+
87
+ if not isinstance(dataset, Dataset):
88
+ raise typer.Exit(1)
89
+
90
+ # Apply limit if specified
91
+ if limit:
92
+ dataset = dataset.select(range(min(limit, len(dataset))))
93
+ console.print(f"[yellow]Limited to {len(dataset)} examples[/yellow]")
94
+
95
+ # Save as JSON or JSONL
96
+ console.print(f"[cyan]Writing to {output}...[/cyan]")
97
+
98
+ with Progress(
99
+ SpinnerColumn(),
100
+ TextColumn("[progress.description]{task.description}"),
101
+ "[progress.percentage]{task.percentage:>3.0f}%",
102
+ transient=True,
103
+ ) as progress:
104
+ task = progress.add_task("Saving...", total=len(dataset))
105
+
106
+ if format is None:
107
+ format = "json"
108
+
109
+ if format.lower() == "json":
110
+ # Write a single JSON array
111
+ data_list = []
112
+ for _, example in enumerate(dataset):
113
+ item = example.to_dict() if hasattr(example, "to_dict") else example # type: ignore
114
+ for key, value in item.items(): # type: ignore
115
+ with contextlib.suppress(json.JSONDecodeError):
116
+ item[key] = json.loads(value) # type: ignore
117
+ data_list.append(item)
118
+ progress.update(task, advance=1)
119
+ with open(output, "w", encoding="utf-8") as f:
120
+ json.dump(data_list, f, ensure_ascii=False, indent=2)
121
+ else:
122
+ # Write JSONL
123
+ with open(output, "w", encoding="utf-8") as f:
124
+ for _, example in enumerate(dataset):
125
+ # Convert to dict if needed
126
+ if hasattr(example, "to_dict"):
127
+ example = example.to_dict() # type: ignore
128
+ for key, value in example.items(): # type: ignore
129
+ with contextlib.suppress(json.JSONDecodeError):
130
+ example[key] = json.loads(value) # type: ignore
131
+ # Write as JSON line
132
+ f.write(json.dumps(example) + "\n")
133
+ progress.update(task, advance=1)
134
+
135
+ # Show summary
136
+ console.print(f"\n[green]✅ Downloaded {len(dataset)} examples to {output}[/green]")
137
+
138
+ # Show sample of fields
139
+ if len(dataset) > 0:
140
+ first_example = dataset[0]
141
+ if hasattr(first_example, "to_dict"):
142
+ first_example = first_example.to_dict() # type: ignore
143
+
144
+ console.print("\n[yellow]Dataset fields:[/yellow]")
145
+ for field in first_example:
146
+ console.print(f" • {field}")
147
+
148
+ # Show example if small enough
149
+ if len(json.dumps(first_example)) < 500:
150
+ console.print("\n[yellow]First example:[/yellow]")
151
+ console.print(json.dumps(first_example, indent=2))
152
+
153
+ # Show next steps
154
+ console.print("\n[dim]Next steps:[/dim]")
155
+ console.print(f"[dim]• Use for training: hud rl {output}[/dim]")
156
+ console.print(f"[dim]• Use for evaluation: hud eval {output}[/dim]")
157
+
158
+
159
+ # Export the command
160
+ __all__ = ["get_command"]