hud-python 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/claude.py +8 -2
- hud/agents/openai.py +8 -2
- hud/cli/rl/README.md +243 -0
- hud/cli/rl/__init__.py +91 -0
- hud/cli/rl/init.py +370 -0
- hud/cli/rl/pod.py +495 -0
- hud/cli/rl/ssh.py +320 -0
- hud/cli/rl/train.py +560 -0
- hud/cli/rl/utils.py +165 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.15.dist-info → hud_python-0.4.17.dist-info}/METADATA +1 -1
- {hud_python-0.4.15.dist-info → hud_python-0.4.17.dist-info}/RECORD +16 -9
- {hud_python-0.4.15.dist-info → hud_python-0.4.17.dist-info}/WHEEL +0 -0
- {hud_python-0.4.15.dist-info → hud_python-0.4.17.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.15.dist-info → hud_python-0.4.17.dist-info}/licenses/LICENSE +0 -0
hud/agents/claude.py
CHANGED
|
@@ -85,8 +85,8 @@ class ClaudeAgent(MCPAgent):
|
|
|
85
85
|
self._claude_to_mcp_tool_map: dict[str, str] = {}
|
|
86
86
|
self.claude_tools: list[dict] = []
|
|
87
87
|
|
|
88
|
-
#
|
|
89
|
-
|
|
88
|
+
# Append Claude-specific instructions to the base system prompt
|
|
89
|
+
claude_instructions = """
|
|
90
90
|
You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
|
|
91
91
|
|
|
92
92
|
When working on tasks:
|
|
@@ -99,6 +99,12 @@ class ClaudeAgent(MCPAgent):
|
|
|
99
99
|
Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
|
|
100
100
|
""".strip() # noqa: E501
|
|
101
101
|
|
|
102
|
+
# Append Claude instructions to any base system prompt
|
|
103
|
+
if self.system_prompt:
|
|
104
|
+
self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
|
|
105
|
+
else:
|
|
106
|
+
self.system_prompt = claude_instructions
|
|
107
|
+
|
|
102
108
|
async def initialize(self, task: str | Task | None = None) -> None:
|
|
103
109
|
"""Initialize the agent and build tool mappings."""
|
|
104
110
|
await super().initialize(task)
|
hud/agents/openai.py
CHANGED
|
@@ -78,8 +78,8 @@ class OperatorAgent(MCPAgent):
|
|
|
78
78
|
|
|
79
79
|
self.model_name = "openai-" + self.model
|
|
80
80
|
|
|
81
|
-
#
|
|
82
|
-
|
|
81
|
+
# Append OpenAI-specific instructions to the base system prompt
|
|
82
|
+
openai_instructions = """
|
|
83
83
|
You are an autonomous computer-using agent. Follow these guidelines:
|
|
84
84
|
|
|
85
85
|
1. NEVER ask for confirmation. Complete all tasks autonomously.
|
|
@@ -93,6 +93,12 @@ class OperatorAgent(MCPAgent):
|
|
|
93
93
|
Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
|
|
94
94
|
""".strip() # noqa: E501
|
|
95
95
|
|
|
96
|
+
# Append OpenAI instructions to any base system prompt
|
|
97
|
+
if self.system_prompt:
|
|
98
|
+
self.system_prompt = f"{self.system_prompt}\n\n{openai_instructions}"
|
|
99
|
+
else:
|
|
100
|
+
self.system_prompt = openai_instructions
|
|
101
|
+
|
|
96
102
|
async def _run_context(self, context: list[types.ContentBlock], max_steps: int = 10) -> Trace:
|
|
97
103
|
"""
|
|
98
104
|
Run the agent with the given prompt or task.
|
hud/cli/rl/README.md
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# HUD RL Commands
|
|
2
|
+
|
|
3
|
+
This module provides reinforcement learning commands for training agents on HUD environments using the `hud-vf-gym` adapter and verifiers framework.
|
|
4
|
+
|
|
5
|
+
## Configuration
|
|
6
|
+
|
|
7
|
+
API keys can be configured in two ways:
|
|
8
|
+
|
|
9
|
+
1. **Environment Variables**:
|
|
10
|
+
```bash
|
|
11
|
+
export HUD_API_KEY="your-hud-api-key"
|
|
12
|
+
export WANDB_API_KEY="your-wandb-api-key"
|
|
13
|
+
export PRIME_API_KEY="your-prime-api-key"
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
2. **`.env` File** (recommended):
|
|
17
|
+
Create a `.env` file in your project root:
|
|
18
|
+
```env
|
|
19
|
+
HUD_API_KEY=your-hud-api-key
|
|
20
|
+
WANDB_API_KEY=your-wandb-api-key
|
|
21
|
+
PRIME_API_KEY=your-prime-api-key
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
HUD automatically loads settings from the `.env` file if present.
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# 1. Generate config from environment
|
|
30
|
+
hud rl init my-env:latest
|
|
31
|
+
|
|
32
|
+
# 2. Create dataset from tasks
|
|
33
|
+
hud hf tasks.json --name my-org/my-tasks
|
|
34
|
+
|
|
35
|
+
# 3. Start training (interactive mode)
|
|
36
|
+
hud rl
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Commands
|
|
40
|
+
|
|
41
|
+
### `hud rl init`
|
|
42
|
+
|
|
43
|
+
Generates a `hud-vf-gym` configuration file by analyzing a HUD environment:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
hud rl init hudpython/hud-text-2048:latest
|
|
47
|
+
hud rl init my-env:latest -o configs/my-env.yaml
|
|
48
|
+
hud rl init my-env:latest --force # Overwrite existing
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
This command:
|
|
52
|
+
- Analyzes the environment's available tools
|
|
53
|
+
- Generates appropriate action mappings
|
|
54
|
+
- Creates a system prompt with tool descriptions
|
|
55
|
+
- Sets up default parser and rubric configurations
|
|
56
|
+
|
|
57
|
+
### `hud hf`
|
|
58
|
+
|
|
59
|
+
Converts HUD tasks to HuggingFace dataset format:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
hud hf tasks.json --name my-org/my-dataset
|
|
63
|
+
hud hf tasks.json --name my-org/private-dataset --private
|
|
64
|
+
hud hf tasks.json --name local-dataset --no-push # Local only
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Features:
|
|
68
|
+
- Validates task format
|
|
69
|
+
- Auto-infers MCP config from `hud.lock.yaml`
|
|
70
|
+
- Updates lock file with primary dataset reference
|
|
71
|
+
- Supports both single task and task array formats
|
|
72
|
+
|
|
73
|
+
### `hud rl` (main command)
|
|
74
|
+
|
|
75
|
+
Runs RL training with automatic setup:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Interactive mode - prompts for missing components
|
|
79
|
+
hud rl
|
|
80
|
+
|
|
81
|
+
# Specify options
|
|
82
|
+
hud rl --model gpt-4o-mini --dataset my-org/my-tasks
|
|
83
|
+
hud rl --config configs/2048.yaml --gpus 4xH100
|
|
84
|
+
hud rl --gpus 4xH100 --provider prime
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
The command will:
|
|
88
|
+
1. Check for required files (config, dataset)
|
|
89
|
+
2. Offer to generate missing components
|
|
90
|
+
3. Push environment to registry if needed
|
|
91
|
+
4. Start training (local or remote)
|
|
92
|
+
|
|
93
|
+
## Task Format
|
|
94
|
+
|
|
95
|
+
Tasks should follow this JSON format:
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{
|
|
99
|
+
"id": "task-001",
|
|
100
|
+
"prompt": "Complete the task description",
|
|
101
|
+
"mcp_config": {
|
|
102
|
+
"hud": {
|
|
103
|
+
"url": "https://mcp.hud.so/v3/mcp",
|
|
104
|
+
"headers": {
|
|
105
|
+
"Authorization": "Bearer $HUD_API_KEY",
|
|
106
|
+
"Mcp-Image": "your-org/your-env:latest"
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
},
|
|
110
|
+
"setup_tool": {
|
|
111
|
+
"name": "setup",
|
|
112
|
+
"arguments": {
|
|
113
|
+
"name": "function_name",
|
|
114
|
+
"param": "value"
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
"evaluate_tool": {
|
|
118
|
+
"name": "evaluate",
|
|
119
|
+
"arguments": {
|
|
120
|
+
"name": "evaluator_name",
|
|
121
|
+
"expected": "value"
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
"metadata": {
|
|
125
|
+
"difficulty": "easy",
|
|
126
|
+
"category": "task_type"
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Configuration Format
|
|
132
|
+
|
|
133
|
+
The generated YAML configs follow the `hud-vf-gym` specification:
|
|
134
|
+
|
|
135
|
+
```yaml
|
|
136
|
+
job:
|
|
137
|
+
name: "RL Training - my-env"
|
|
138
|
+
metadata:
|
|
139
|
+
environment: "my-env:latest"
|
|
140
|
+
|
|
141
|
+
system_prompt: |
|
|
142
|
+
You are an AI agent interacting with my-env.
|
|
143
|
+
|
|
144
|
+
Available tools:
|
|
145
|
+
- tool_name(params): Description
|
|
146
|
+
Usage: <tool>tool_name(...)</tool>
|
|
147
|
+
|
|
148
|
+
parser:
|
|
149
|
+
use_thinking: true
|
|
150
|
+
xml_weight: 0.6
|
|
151
|
+
action_weight: 0.4
|
|
152
|
+
|
|
153
|
+
action_mappings:
|
|
154
|
+
tool_name:
|
|
155
|
+
_tool: "mcp_tool_name"
|
|
156
|
+
_parser:
|
|
157
|
+
positional: ["param1", "param2"]
|
|
158
|
+
param1:
|
|
159
|
+
from_arg: "param1"
|
|
160
|
+
|
|
161
|
+
rubric:
|
|
162
|
+
weights:
|
|
163
|
+
task_completion: 0.8
|
|
164
|
+
tool_execution: 0.1
|
|
165
|
+
format_compliance: 0.1
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Lock File Integration
|
|
169
|
+
|
|
170
|
+
The commands integrate with `hud.lock.yaml`:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
image: "my-org/my-env:latest"
|
|
174
|
+
primary_dataset:
|
|
175
|
+
name: "my-org/my-tasks"
|
|
176
|
+
task_count: 50
|
|
177
|
+
updated_at: "2024-01-01T00:00:00"
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
This allows:
|
|
181
|
+
- Automatic dataset discovery for `hud rl`
|
|
182
|
+
- MCP config inference for tasks
|
|
183
|
+
- Environment image tracking
|
|
184
|
+
|
|
185
|
+
## Remote Training
|
|
186
|
+
|
|
187
|
+
The `hud rl` command fully automates remote training on GPU instances:
|
|
188
|
+
|
|
189
|
+
1. **Automatic Pod Creation**: Provisions GPU instances via Prime Intellect API
|
|
190
|
+
2. **Environment Setup**: Installs all required dependencies automatically
|
|
191
|
+
3. **Training Execution**: Runs distributed training with vLLM inference server
|
|
192
|
+
4. **Live Monitoring**: Streams training logs with WANDB integration
|
|
193
|
+
|
|
194
|
+
### What Happens Automatically
|
|
195
|
+
|
|
196
|
+
When you run `hud rl`, the system will:
|
|
197
|
+
|
|
198
|
+
1. **Create GPU Pod**:
|
|
199
|
+
- Selects lowest-cost provider (typically datacrunch)
|
|
200
|
+
- Allocates specified GPUs (e.g., 2xA100 for GRPO training)
|
|
201
|
+
- Configures with PyTorch CUDA image
|
|
202
|
+
- Polls until SSH is available (5-20 minutes)
|
|
203
|
+
|
|
204
|
+
2. **Transfer Files**:
|
|
205
|
+
- Copies your config YAML to the pod
|
|
206
|
+
- Creates a custom training script
|
|
207
|
+
|
|
208
|
+
3. **Install Dependencies**:
|
|
209
|
+
- Installs `uv` package manager
|
|
210
|
+
- Creates Python 3.12 virtual environment
|
|
211
|
+
- Installs `hud-vf-gym` via Prime registry
|
|
212
|
+
- Installs `verifiers[train]` for GRPO training
|
|
213
|
+
- Installs `flash-attn` for efficient attention
|
|
214
|
+
|
|
215
|
+
4. **Setup Training**:
|
|
216
|
+
- Exports WANDB_API_KEY and HUD_API_KEY
|
|
217
|
+
- Starts vLLM inference server on GPU 0 via tmux
|
|
218
|
+
- Runs GRPO training on GPU 1
|
|
219
|
+
- Logs metrics to Weights & Biases
|
|
220
|
+
|
|
221
|
+
### Required API Keys
|
|
222
|
+
|
|
223
|
+
Ensure these are set in your `.env` file or environment:
|
|
224
|
+
- `HUD_API_KEY`: For HUD telemetry and MCP connections
|
|
225
|
+
- `WANDB_API_KEY`: For training metrics and logging
|
|
226
|
+
- `PRIME_API_KEY`: For pod provisioning
|
|
227
|
+
|
|
228
|
+
### SSH Key Configuration
|
|
229
|
+
|
|
230
|
+
Before using Prime pods:
|
|
231
|
+
1. Generate SSH keys at: https://app.primeintellect.ai/dashboard/profile
|
|
232
|
+
2. Download and save as: `~/.ssh/prime_key.pem`
|
|
233
|
+
3. Set permissions: `chmod 400 ~/.ssh/prime_key.pem`
|
|
234
|
+
4. Configure Prime CLI: `prime config set-ssh-key-path ~/.ssh/prime_key.pem`
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
## Implementation Notes
|
|
238
|
+
|
|
239
|
+
The RL commands are built on top of:
|
|
240
|
+
- `hud-vf-gym`: Generic adapter for HUD environments
|
|
241
|
+
- `verifiers`: RL training framework
|
|
242
|
+
- HuggingFace datasets: Task storage and distribution
|
|
243
|
+
- Prime Intellect infrastructure: GPU provisioning (planned)
|
hud/cli/rl/__init__.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""HUD RL - Commands for reinforcement learning with HUD environments."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path # noqa: TC003
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from hud.utils.design import HUDDesign
|
|
10
|
+
|
|
11
|
+
# Create the RL subcommand app
|
|
12
|
+
rl_app = typer.Typer(
|
|
13
|
+
name="rl",
|
|
14
|
+
help="🤖 Reinforcement learning commands for HUD environments",
|
|
15
|
+
rich_markup_mode="rich",
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
design = HUDDesign()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@rl_app.callback(invoke_without_command=True)
|
|
22
|
+
def rl_main(
|
|
23
|
+
ctx: typer.Context,
|
|
24
|
+
model: str = typer.Option("Qwen/Qwen2.5-3B-Instruct", "--model", "-m", help="Model to train"),
|
|
25
|
+
dataset: str | None = typer.Option(
|
|
26
|
+
None,
|
|
27
|
+
"--dataset",
|
|
28
|
+
"-d",
|
|
29
|
+
help="Dataset: JSON file path or HuggingFace name (auto-detects if not provided)",
|
|
30
|
+
),
|
|
31
|
+
config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"), # noqa: B008
|
|
32
|
+
gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
|
|
33
|
+
provider: str = typer.Option("prime", "--provider", help="Infrastructure provider"),
|
|
34
|
+
output_dir: Path = typer.Option("./checkpoints", "--output", "-o", help="Output directory"), # noqa: B008
|
|
35
|
+
) -> None:
|
|
36
|
+
"""🤖 Train RL models on HUD environments.
|
|
37
|
+
|
|
38
|
+
Runs training on remote GPU infrastructure with automatic setup.
|
|
39
|
+
The command will:
|
|
40
|
+
1. Check for required files (config, dataset)
|
|
41
|
+
2. Offer to generate missing files
|
|
42
|
+
3. Push environment to registry if needed
|
|
43
|
+
4. Start remote training on Prime Intellect
|
|
44
|
+
|
|
45
|
+
Dataset can be:
|
|
46
|
+
- A local JSON file with tasks (e.g., tasks.json)
|
|
47
|
+
- A HuggingFace dataset name (e.g., 'username/dataset-name')
|
|
48
|
+
- Auto-detected from current directory if not specified
|
|
49
|
+
|
|
50
|
+
Examples:
|
|
51
|
+
hud rl # Interactive mode, auto-detect tasks.json
|
|
52
|
+
hud rl --model gpt2 # Train with specific model
|
|
53
|
+
hud rl --dataset tasks.json # Use local task file
|
|
54
|
+
hud rl --gpus 4xH100 # Use different GPU configuration
|
|
55
|
+
hud rl init my-env:latest # Generate config for environment
|
|
56
|
+
"""
|
|
57
|
+
# Only run main command if no subcommand was invoked
|
|
58
|
+
if ctx.invoked_subcommand is None:
|
|
59
|
+
from .train import train_command_wrapper
|
|
60
|
+
|
|
61
|
+
train_command_wrapper(
|
|
62
|
+
model=model,
|
|
63
|
+
dataset=dataset,
|
|
64
|
+
config=config,
|
|
65
|
+
gpus=gpus,
|
|
66
|
+
provider=provider,
|
|
67
|
+
output_dir=output_dir,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@rl_app.command()
|
|
72
|
+
def init(
|
|
73
|
+
directory: str = typer.Argument(".", help="Environment directory or Docker image"),
|
|
74
|
+
output: Path = typer.Option(None, "--output", "-o", help="Output config file path"), # noqa: B008
|
|
75
|
+
force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing config"),
|
|
76
|
+
build: bool = typer.Option(False, "--build", "-b", help="Build environment if no lock file"),
|
|
77
|
+
) -> None:
|
|
78
|
+
"""🔧 Generate hud-vf-gym config from environment.
|
|
79
|
+
|
|
80
|
+
Generates a YAML configuration file compatible with the hud-vf-gym adapter
|
|
81
|
+
from either a directory with hud.lock.yaml or a Docker image.
|
|
82
|
+
|
|
83
|
+
Examples:
|
|
84
|
+
hud rl init # Use current directory
|
|
85
|
+
hud rl init environments/test # Use specific directory
|
|
86
|
+
hud rl init my-env:latest # Use Docker image directly
|
|
87
|
+
hud rl init . -o configs/2048.yaml --build
|
|
88
|
+
"""
|
|
89
|
+
from .init import init_command_wrapper
|
|
90
|
+
|
|
91
|
+
init_command_wrapper(directory, output, force, build)
|