hud-python 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/agents/claude.py CHANGED
@@ -85,8 +85,8 @@ class ClaudeAgent(MCPAgent):
85
85
  self._claude_to_mcp_tool_map: dict[str, str] = {}
86
86
  self.claude_tools: list[dict] = []
87
87
 
88
- # Base system prompt for autonomous operation
89
- self.system_prompt = """
88
+ # Append Claude-specific instructions to the base system prompt
89
+ claude_instructions = """
90
90
  You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
91
91
 
92
92
  When working on tasks:
@@ -99,6 +99,12 @@ class ClaudeAgent(MCPAgent):
99
99
  Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
100
100
  """.strip() # noqa: E501
101
101
 
102
+ # Append Claude instructions to any base system prompt
103
+ if self.system_prompt:
104
+ self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
105
+ else:
106
+ self.system_prompt = claude_instructions
107
+
102
108
  async def initialize(self, task: str | Task | None = None) -> None:
103
109
  """Initialize the agent and build tool mappings."""
104
110
  await super().initialize(task)
hud/agents/openai.py CHANGED
@@ -78,8 +78,8 @@ class OperatorAgent(MCPAgent):
78
78
 
79
79
  self.model_name = "openai-" + self.model
80
80
 
81
- # Base system prompt for autonomous operation
82
- self.system_prompt = """
81
+ # Append OpenAI-specific instructions to the base system prompt
82
+ openai_instructions = """
83
83
  You are an autonomous computer-using agent. Follow these guidelines:
84
84
 
85
85
  1. NEVER ask for confirmation. Complete all tasks autonomously.
@@ -93,6 +93,12 @@ class OperatorAgent(MCPAgent):
93
93
  Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
94
94
  """.strip() # noqa: E501
95
95
 
96
+ # Append OpenAI instructions to any base system prompt
97
+ if self.system_prompt:
98
+ self.system_prompt = f"{self.system_prompt}\n\n{openai_instructions}"
99
+ else:
100
+ self.system_prompt = openai_instructions
101
+
96
102
  async def _run_context(self, context: list[types.ContentBlock], max_steps: int = 10) -> Trace:
97
103
  """
98
104
  Run the agent with the given prompt or task.
hud/cli/rl/README.md ADDED
@@ -0,0 +1,243 @@
1
+ # HUD RL Commands
2
+
3
+ This module provides reinforcement learning commands for training agents on HUD environments using the `hud-vf-gym` adapter and verifiers framework.
4
+
5
+ ## Configuration
6
+
7
+ API keys can be configured in two ways:
8
+
9
+ 1. **Environment Variables**:
10
+ ```bash
11
+ export HUD_API_KEY="your-hud-api-key"
12
+ export WANDB_API_KEY="your-wandb-api-key"
13
+ export PRIME_API_KEY="your-prime-api-key"
14
+ ```
15
+
16
+ 2. **`.env` File** (recommended):
17
+ Create a `.env` file in your project root:
18
+ ```env
19
+ HUD_API_KEY=your-hud-api-key
20
+ WANDB_API_KEY=your-wandb-api-key
21
+ PRIME_API_KEY=your-prime-api-key
22
+ ```
23
+
24
+ HUD automatically loads settings from the `.env` file if present.
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ # 1. Generate config from environment
30
+ hud rl init my-env:latest
31
+
32
+ # 2. Create dataset from tasks
33
+ hud hf tasks.json --name my-org/my-tasks
34
+
35
+ # 3. Start training (interactive mode)
36
+ hud rl
37
+ ```
38
+
39
+ ## Commands
40
+
41
+ ### `hud rl init`
42
+
43
+ Generates a `hud-vf-gym` configuration file by analyzing a HUD environment:
44
+
45
+ ```bash
46
+ hud rl init hudpython/hud-text-2048:latest
47
+ hud rl init my-env:latest -o configs/my-env.yaml
48
+ hud rl init my-env:latest --force # Overwrite existing
49
+ ```
50
+
51
+ This command:
52
+ - Analyzes the environment's available tools
53
+ - Generates appropriate action mappings
54
+ - Creates a system prompt with tool descriptions
55
+ - Sets up default parser and rubric configurations
56
+
57
+ ### `hud hf`
58
+
59
+ Converts HUD tasks to HuggingFace dataset format:
60
+
61
+ ```bash
62
+ hud hf tasks.json --name my-org/my-dataset
63
+ hud hf tasks.json --name my-org/private-dataset --private
64
+ hud hf tasks.json --name local-dataset --no-push # Local only
65
+ ```
66
+
67
+ Features:
68
+ - Validates task format
69
+ - Auto-infers MCP config from `hud.lock.yaml`
70
+ - Updates lock file with primary dataset reference
71
+ - Supports both single task and task array formats
72
+
73
+ ### `hud rl` (main command)
74
+
75
+ Runs RL training with automatic setup:
76
+
77
+ ```bash
78
+ # Interactive mode - prompts for missing components
79
+ hud rl
80
+
81
+ # Specify options
82
+ hud rl --model gpt-4o-mini --dataset my-org/my-tasks
83
+ hud rl --config configs/2048.yaml --gpus 4xH100
84
+ hud rl --gpus 4xH100 --provider prime
85
+ ```
86
+
87
+ The command will:
88
+ 1. Check for required files (config, dataset)
89
+ 2. Offer to generate missing components
90
+ 3. Push environment to registry if needed
91
+ 4. Start training (local or remote)
92
+
93
+ ## Task Format
94
+
95
+ Tasks should follow this JSON format:
96
+
97
+ ```json
98
+ {
99
+ "id": "task-001",
100
+ "prompt": "Complete the task description",
101
+ "mcp_config": {
102
+ "hud": {
103
+ "url": "https://mcp.hud.so/v3/mcp",
104
+ "headers": {
105
+ "Authorization": "Bearer $HUD_API_KEY",
106
+ "Mcp-Image": "your-org/your-env:latest"
107
+ }
108
+ }
109
+ },
110
+ "setup_tool": {
111
+ "name": "setup",
112
+ "arguments": {
113
+ "name": "function_name",
114
+ "param": "value"
115
+ }
116
+ },
117
+ "evaluate_tool": {
118
+ "name": "evaluate",
119
+ "arguments": {
120
+ "name": "evaluator_name",
121
+ "expected": "value"
122
+ }
123
+ },
124
+ "metadata": {
125
+ "difficulty": "easy",
126
+ "category": "task_type"
127
+ }
128
+ }
129
+ ```
130
+
131
+ ## Configuration Format
132
+
133
+ The generated YAML configs follow the `hud-vf-gym` specification:
134
+
135
+ ```yaml
136
+ job:
137
+ name: "RL Training - my-env"
138
+ metadata:
139
+ environment: "my-env:latest"
140
+
141
+ system_prompt: |
142
+ You are an AI agent interacting with my-env.
143
+
144
+ Available tools:
145
+ - tool_name(params): Description
146
+ Usage: <tool>tool_name(...)</tool>
147
+
148
+ parser:
149
+ use_thinking: true
150
+ xml_weight: 0.6
151
+ action_weight: 0.4
152
+
153
+ action_mappings:
154
+ tool_name:
155
+ _tool: "mcp_tool_name"
156
+ _parser:
157
+ positional: ["param1", "param2"]
158
+ param1:
159
+ from_arg: "param1"
160
+
161
+ rubric:
162
+ weights:
163
+ task_completion: 0.8
164
+ tool_execution: 0.1
165
+ format_compliance: 0.1
166
+ ```
167
+
168
+ ## Lock File Integration
169
+
170
+ The commands integrate with `hud.lock.yaml`:
171
+
172
+ ```yaml
173
+ image: "my-org/my-env:latest"
174
+ primary_dataset:
175
+ name: "my-org/my-tasks"
176
+ task_count: 50
177
+ updated_at: "2024-01-01T00:00:00"
178
+ ```
179
+
180
+ This allows:
181
+ - Automatic dataset discovery for `hud rl`
182
+ - MCP config inference for tasks
183
+ - Environment image tracking
184
+
185
+ ## Remote Training
186
+
187
+ The `hud rl` command fully automates remote training on GPU instances:
188
+
189
+ 1. **Automatic Pod Creation**: Provisions GPU instances via Prime Intellect API
190
+ 2. **Environment Setup**: Installs all required dependencies automatically
191
+ 3. **Training Execution**: Runs distributed training with vLLM inference server
192
+ 4. **Live Monitoring**: Streams training logs with WANDB integration
193
+
194
+ ### What Happens Automatically
195
+
196
+ When you run `hud rl`, the system will:
197
+
198
+ 1. **Create GPU Pod**:
199
+ - Selects lowest-cost provider (typically datacrunch)
200
+ - Allocates specified GPUs (e.g., 2xA100 for GRPO training)
201
+ - Configures with PyTorch CUDA image
202
+ - Polls until SSH is available (5-20 minutes)
203
+
204
+ 2. **Transfer Files**:
205
+ - Copies your config YAML to the pod
206
+ - Creates a custom training script
207
+
208
+ 3. **Install Dependencies**:
209
+ - Installs `uv` package manager
210
+ - Creates Python 3.12 virtual environment
211
+ - Installs `hud-vf-gym` via Prime registry
212
+ - Installs `verifiers[train]` for GRPO training
213
+ - Installs `flash-attn` for efficient attention
214
+
215
+ 4. **Setup Training**:
216
+ - Exports WANDB_API_KEY and HUD_API_KEY
217
+ - Starts vLLM inference server on GPU 0 via tmux
218
+ - Runs GRPO training on GPU 1
219
+ - Logs metrics to Weights & Biases
220
+
221
+ ### Required API Keys
222
+
223
+ Ensure these are set in your `.env` file or environment:
224
+ - `HUD_API_KEY`: For HUD telemetry and MCP connections
225
+ - `WANDB_API_KEY`: For training metrics and logging
226
+ - `PRIME_API_KEY`: For pod provisioning
227
+
228
+ ### SSH Key Configuration
229
+
230
+ Before using Prime pods:
231
+ 1. Generate SSH keys at: https://app.primeintellect.ai/dashboard/profile
232
+ 2. Download and save as: `~/.ssh/prime_key.pem`
233
+ 3. Set permissions: `chmod 400 ~/.ssh/prime_key.pem`
234
+ 4. Configure Prime CLI: `prime config set-ssh-key-path ~/.ssh/prime_key.pem`
235
+
236
+
237
+ ## Implementation Notes
238
+
239
+ The RL commands are built on top of:
240
+ - `hud-vf-gym`: Generic adapter for HUD environments
241
+ - `verifiers`: RL training framework
242
+ - HuggingFace datasets: Task storage and distribution
243
+ - Prime Intellect infrastructure: GPU provisioning (planned)
hud/cli/rl/__init__.py ADDED
@@ -0,0 +1,91 @@
1
+ """HUD RL - Commands for reinforcement learning with HUD environments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path # noqa: TC003
6
+
7
+ import typer
8
+
9
+ from hud.utils.design import HUDDesign
10
+
11
+ # Create the RL subcommand app
12
+ rl_app = typer.Typer(
13
+ name="rl",
14
+ help="🤖 Reinforcement learning commands for HUD environments",
15
+ rich_markup_mode="rich",
16
+ )
17
+
18
+ design = HUDDesign()
19
+
20
+
21
+ @rl_app.callback(invoke_without_command=True)
22
+ def rl_main(
23
+ ctx: typer.Context,
24
+ model: str = typer.Option("Qwen/Qwen2.5-3B-Instruct", "--model", "-m", help="Model to train"),
25
+ dataset: str | None = typer.Option(
26
+ None,
27
+ "--dataset",
28
+ "-d",
29
+ help="Dataset: JSON file path or HuggingFace name (auto-detects if not provided)",
30
+ ),
31
+ config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"), # noqa: B008
32
+ gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
33
+ provider: str = typer.Option("prime", "--provider", help="Infrastructure provider"),
34
+ output_dir: Path = typer.Option("./checkpoints", "--output", "-o", help="Output directory"), # noqa: B008
35
+ ) -> None:
36
+ """🤖 Train RL models on HUD environments.
37
+
38
+ Runs training on remote GPU infrastructure with automatic setup.
39
+ The command will:
40
+ 1. Check for required files (config, dataset)
41
+ 2. Offer to generate missing files
42
+ 3. Push environment to registry if needed
43
+ 4. Start remote training on Prime Intellect
44
+
45
+ Dataset can be:
46
+ - A local JSON file with tasks (e.g., tasks.json)
47
+ - A HuggingFace dataset name (e.g., 'username/dataset-name')
48
+ - Auto-detected from current directory if not specified
49
+
50
+ Examples:
51
+ hud rl # Interactive mode, auto-detect tasks.json
52
+ hud rl --model gpt2 # Train with specific model
53
+ hud rl --dataset tasks.json # Use local task file
54
+ hud rl --gpus 4xH100 # Use different GPU configuration
55
+ hud rl init my-env:latest # Generate config for environment
56
+ """
57
+ # Only run main command if no subcommand was invoked
58
+ if ctx.invoked_subcommand is None:
59
+ from .train import train_command_wrapper
60
+
61
+ train_command_wrapper(
62
+ model=model,
63
+ dataset=dataset,
64
+ config=config,
65
+ gpus=gpus,
66
+ provider=provider,
67
+ output_dir=output_dir,
68
+ )
69
+
70
+
71
+ @rl_app.command()
72
+ def init(
73
+ directory: str = typer.Argument(".", help="Environment directory or Docker image"),
74
+ output: Path = typer.Option(None, "--output", "-o", help="Output config file path"), # noqa: B008
75
+ force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing config"),
76
+ build: bool = typer.Option(False, "--build", "-b", help="Build environment if no lock file"),
77
+ ) -> None:
78
+ """🔧 Generate hud-vf-gym config from environment.
79
+
80
+ Generates a YAML configuration file compatible with the hud-vf-gym adapter
81
+ from either a directory with hud.lock.yaml or a Docker image.
82
+
83
+ Examples:
84
+ hud rl init # Use current directory
85
+ hud rl init environments/test # Use specific directory
86
+ hud rl init my-env:latest # Use Docker image directly
87
+ hud rl init . -o configs/2048.yaml --build
88
+ """
89
+ from .init import init_command_wrapper
90
+
91
+ init_command_wrapper(directory, output, force, build)