hud-python 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (42) hide show
  1. hud/agents/base.py +118 -33
  2. hud/agents/claude.py +1 -1
  3. hud/agents/openai.py +5 -16
  4. hud/agents/tests/test_openai.py +24 -79
  5. hud/cli/__init__.py +137 -15
  6. hud/cli/analyze.py +2 -4
  7. hud/cli/build.py +6 -2
  8. hud/cli/dev.py +67 -0
  9. hud/cli/eval.py +90 -35
  10. hud/cli/hf.py +406 -0
  11. hud/cli/init.py +38 -19
  12. hud/cli/rl/README.md +243 -0
  13. hud/cli/rl/__init__.py +82 -0
  14. hud/cli/rl/init.py +370 -0
  15. hud/cli/rl/pod.py +491 -0
  16. hud/cli/rl/ssh.py +288 -0
  17. hud/cli/rl/train.py +421 -0
  18. hud/cli/rl/utils.py +165 -0
  19. hud/cli/tests/test_mcp_server.py +1 -4
  20. hud/clients/base.py +2 -0
  21. hud/clients/fastmcp.py +7 -2
  22. hud/clients/mcp_use.py +3 -1
  23. hud/clients/utils/retry_transport.py +34 -8
  24. hud/datasets/__init__.py +32 -0
  25. hud/datasets/execution/__init__.py +13 -0
  26. hud/datasets/execution/parallel.py +592 -0
  27. hud/datasets/execution/runner.py +123 -0
  28. hud/datasets/task.py +107 -0
  29. hud/datasets/utils.py +118 -0
  30. hud/otel/instrumentation.py +2 -1
  31. hud/server/server.py +58 -21
  32. hud/settings.py +12 -0
  33. hud/types.py +31 -10
  34. hud/utils/design.py +168 -2
  35. hud/utils/tests/test_version.py +1 -1
  36. hud/version.py +1 -1
  37. {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/METADATA +4 -3
  38. {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/RECORD +41 -28
  39. hud/datasets.py +0 -327
  40. {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/WHEEL +0 -0
  41. {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/entry_points.txt +0 -0
  42. {hud_python-0.4.14.dist-info → hud_python-0.4.16.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/README.md ADDED
@@ -0,0 +1,243 @@
1
+ # HUD RL Commands
2
+
3
+ This module provides reinforcement learning commands for training agents on HUD environments using the `hud-vf-gym` adapter and verifiers framework.
4
+
5
+ ## Configuration
6
+
7
+ API keys can be configured in two ways:
8
+
9
+ 1. **Environment Variables**:
10
+ ```bash
11
+ export HUD_API_KEY="your-hud-api-key"
12
+ export WANDB_API_KEY="your-wandb-api-key"
13
+ export PRIME_API_KEY="your-prime-api-key"
14
+ ```
15
+
16
+ 2. **`.env` File** (recommended):
17
+ Create a `.env` file in your project root:
18
+ ```env
19
+ HUD_API_KEY=your-hud-api-key
20
+ WANDB_API_KEY=your-wandb-api-key
21
+ PRIME_API_KEY=your-prime-api-key
22
+ ```
23
+
24
+ HUD automatically loads settings from the `.env` file if present.
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ # 1. Generate config from environment
30
+ hud rl init my-env:latest
31
+
32
+ # 2. Create dataset from tasks
33
+ hud hf tasks.json --name my-org/my-tasks
34
+
35
+ # 3. Start training (interactive mode)
36
+ hud rl
37
+ ```
38
+
39
+ ## Commands
40
+
41
+ ### `hud rl init`
42
+
43
+ Generates a `hud-vf-gym` configuration file by analyzing a HUD environment:
44
+
45
+ ```bash
46
+ hud rl init hudpython/hud-text-2048:latest
47
+ hud rl init my-env:latest -o configs/my-env.yaml
48
+ hud rl init my-env:latest --force # Overwrite existing
49
+ ```
50
+
51
+ This command:
52
+ - Analyzes the environment's available tools
53
+ - Generates appropriate action mappings
54
+ - Creates a system prompt with tool descriptions
55
+ - Sets up default parser and rubric configurations
56
+
57
+ ### `hud hf`
58
+
59
+ Converts HUD tasks to HuggingFace dataset format:
60
+
61
+ ```bash
62
+ hud hf tasks.json --name my-org/my-dataset
63
+ hud hf tasks.json --name my-org/private-dataset --private
64
+ hud hf tasks.json --name local-dataset --no-push # Local only
65
+ ```
66
+
67
+ Features:
68
+ - Validates task format
69
+ - Auto-infers MCP config from `hud.lock.yaml`
70
+ - Updates lock file with primary dataset reference
71
+ - Supports both single task and task array formats
72
+
73
+ ### `hud rl` (main command)
74
+
75
+ Runs RL training with automatic setup:
76
+
77
+ ```bash
78
+ # Interactive mode - prompts for missing components
79
+ hud rl
80
+
81
+ # Specify options
82
+ hud rl --model gpt-4o-mini --dataset my-org/my-tasks
83
+ hud rl --config configs/2048.yaml --gpus 4xH100
84
+ hud rl --gpus 4xH100 --provider prime
85
+ ```
86
+
87
+ The command will:
88
+ 1. Check for required files (config, dataset)
89
+ 2. Offer to generate missing components
90
+ 3. Push environment to registry if needed
91
+ 4. Start training (local or remote)
92
+
93
+ ## Task Format
94
+
95
+ Tasks should follow this JSON format:
96
+
97
+ ```json
98
+ {
99
+ "id": "task-001",
100
+ "prompt": "Complete the task description",
101
+ "mcp_config": {
102
+ "hud": {
103
+ "url": "https://mcp.hud.so/v3/mcp",
104
+ "headers": {
105
+ "Authorization": "Bearer $HUD_API_KEY",
106
+ "Mcp-Image": "your-org/your-env:latest"
107
+ }
108
+ }
109
+ },
110
+ "setup_tool": {
111
+ "name": "setup",
112
+ "arguments": {
113
+ "name": "function_name",
114
+ "param": "value"
115
+ }
116
+ },
117
+ "evaluate_tool": {
118
+ "name": "evaluate",
119
+ "arguments": {
120
+ "name": "evaluator_name",
121
+ "expected": "value"
122
+ }
123
+ },
124
+ "metadata": {
125
+ "difficulty": "easy",
126
+ "category": "task_type"
127
+ }
128
+ }
129
+ ```
130
+
131
+ ## Configuration Format
132
+
133
+ The generated YAML configs follow the `hud-vf-gym` specification:
134
+
135
+ ```yaml
136
+ job:
137
+ name: "RL Training - my-env"
138
+ metadata:
139
+ environment: "my-env:latest"
140
+
141
+ system_prompt: |
142
+ You are an AI agent interacting with my-env.
143
+
144
+ Available tools:
145
+ - tool_name(params): Description
146
+ Usage: <tool>tool_name(...)</tool>
147
+
148
+ parser:
149
+ use_thinking: true
150
+ xml_weight: 0.6
151
+ action_weight: 0.4
152
+
153
+ action_mappings:
154
+ tool_name:
155
+ _tool: "mcp_tool_name"
156
+ _parser:
157
+ positional: ["param1", "param2"]
158
+ param1:
159
+ from_arg: "param1"
160
+
161
+ rubric:
162
+ weights:
163
+ task_completion: 0.8
164
+ tool_execution: 0.1
165
+ format_compliance: 0.1
166
+ ```
167
+
168
+ ## Lock File Integration
169
+
170
+ The commands integrate with `hud.lock.yaml`:
171
+
172
+ ```yaml
173
+ image: "my-org/my-env:latest"
174
+ primary_dataset:
175
+ name: "my-org/my-tasks"
176
+ task_count: 50
177
+ updated_at: "2024-01-01T00:00:00"
178
+ ```
179
+
180
+ This allows:
181
+ - Automatic dataset discovery for `hud rl`
182
+ - MCP config inference for tasks
183
+ - Environment image tracking
184
+
185
+ ## Remote Training
186
+
187
+ The `hud rl` command fully automates remote training on GPU instances:
188
+
189
+ 1. **Automatic Pod Creation**: Provisions GPU instances via Prime Intellect API
190
+ 2. **Environment Setup**: Installs all required dependencies automatically
191
+ 3. **Training Execution**: Runs distributed training with vLLM inference server
192
+ 4. **Live Monitoring**: Streams training logs with WANDB integration
193
+
194
+ ### What Happens Automatically
195
+
196
+ When you run `hud rl`, the system will:
197
+
198
+ 1. **Create GPU Pod**:
199
+ - Selects lowest-cost provider (typically datacrunch)
200
+ - Allocates specified GPUs (e.g., 2xA100 for GRPO training)
201
+ - Configures with PyTorch CUDA image
202
+ - Polls until SSH is available (5-20 minutes)
203
+
204
+ 2. **Transfer Files**:
205
+ - Copies your config YAML to the pod
206
+ - Creates a custom training script
207
+
208
+ 3. **Install Dependencies**:
209
+ - Installs `uv` package manager
210
+ - Creates Python 3.12 virtual environment
211
+ - Installs `hud-vf-gym` via Prime registry
212
+ - Installs `verifiers[train]` for GRPO training
213
+ - Installs `flash-attn` for efficient attention
214
+
215
+ 4. **Setup Training**:
216
+ - Exports WANDB_API_KEY and HUD_API_KEY
217
+ - Starts vLLM inference server on GPU 0 via tmux
218
+ - Runs GRPO training on GPU 1
219
+ - Logs metrics to Weights & Biases
220
+
221
+ ### Required API Keys
222
+
223
+ Ensure these are set in your `.env` file or environment:
224
+ - `HUD_API_KEY`: For HUD telemetry and MCP connections
225
+ - `WANDB_API_KEY`: For training metrics and logging
226
+ - `PRIME_API_KEY`: For pod provisioning
227
+
228
+ ### SSH Key Configuration
229
+
230
+ Before using Prime pods:
231
+ 1. Generate SSH keys at: https://app.primeintellect.ai/dashboard/profile
232
+ 2. Download and save as: `~/.ssh/prime_key.pem`
233
+ 3. Set permissions: `chmod 400 ~/.ssh/prime_key.pem`
234
+ 4. Configure Prime CLI: `prime config set-ssh-key-path ~/.ssh/prime_key.pem`
235
+
236
+
237
+ ## Implementation Notes
238
+
239
+ The RL commands are built on top of:
240
+ - `hud-vf-gym`: Generic adapter for HUD environments
241
+ - `verifiers`: RL training framework
242
+ - HuggingFace datasets: Task storage and distribution
243
+ - Prime Intellect infrastructure: GPU provisioning (planned)
hud/cli/rl/__init__.py ADDED
@@ -0,0 +1,82 @@
1
+ """HUD RL - Commands for reinforcement learning with HUD environments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path # noqa: TC003
6
+
7
+ import typer
8
+
9
+ from hud.utils.design import HUDDesign
10
+
11
+ # Create the RL subcommand app
12
+ rl_app = typer.Typer(
13
+ name="rl",
14
+ help="🤖 Reinforcement learning commands for HUD environments",
15
+ rich_markup_mode="rich",
16
+ )
17
+
18
+ design = HUDDesign()
19
+
20
+
21
+ @rl_app.callback(invoke_without_command=True)
22
+ def rl_main(
23
+ ctx: typer.Context,
24
+ model: str = typer.Option("Qwen/Qwen2.5-3B-Instruct", "--model", "-m", help="Model to train"),
25
+ dataset: str | None = typer.Option(
26
+ None, "--dataset", "-d", help="Override dataset from lock file"
27
+ ),
28
+ config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"), # noqa: B008
29
+ gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
30
+ provider: str = typer.Option("prime", "--provider", help="Infrastructure provider"),
31
+ output_dir: Path = typer.Option("./checkpoints", "--output", "-o", help="Output directory"), # noqa: B008
32
+ ) -> None:
33
+ """🤖 Train RL models on HUD environments.
34
+
35
+ Runs training on remote GPU infrastructure with automatic setup.
36
+ The command will:
37
+ 1. Check for required files (config, dataset)
38
+ 2. Offer to generate missing files
39
+ 3. Push environment to registry if needed
40
+ 4. Start remote training on Prime Intellect
41
+
42
+ Examples:
43
+ hud rl # Interactive mode with prompts
44
+ hud rl --model gpt2 # Train with specific model
45
+ hud rl --gpus 4xH100 # Use different GPU configuration
46
+ hud rl init my-env:latest # Generate config for environment
47
+ """
48
+ # Only run main command if no subcommand was invoked
49
+ if ctx.invoked_subcommand is None:
50
+ from .train import train_command_wrapper
51
+
52
+ train_command_wrapper(
53
+ model=model,
54
+ dataset=dataset,
55
+ config=config,
56
+ gpus=gpus,
57
+ provider=provider,
58
+ output_dir=output_dir,
59
+ )
60
+
61
+
62
+ @rl_app.command()
63
+ def init(
64
+ directory: str = typer.Argument(".", help="Environment directory or Docker image"),
65
+ output: Path = typer.Option(None, "--output", "-o", help="Output config file path"), # noqa: B008
66
+ force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing config"),
67
+ build: bool = typer.Option(False, "--build", "-b", help="Build environment if no lock file"),
68
+ ) -> None:
69
+ """🔧 Generate hud-vf-gym config from environment.
70
+
71
+ Generates a YAML configuration file compatible with the hud-vf-gym adapter
72
+ from either a directory with hud.lock.yaml or a Docker image.
73
+
74
+ Examples:
75
+ hud rl init # Use current directory
76
+ hud rl init environments/test # Use specific directory
77
+ hud rl init my-env:latest # Use Docker image directly
78
+ hud rl init . -o configs/2048.yaml --build
79
+ """
80
+ from .init import init_command_wrapper
81
+
82
+ init_command_wrapper(directory, output, force, build)
hud/cli/rl/init.py ADDED
@@ -0,0 +1,370 @@
1
+ """Initialize RL configuration from environment analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import typer
10
+ import yaml
11
+
12
+ from hud.clients import MCPClient
13
+ from hud.utils.design import HUDDesign
14
+
15
+ design = HUDDesign()
16
+
17
+
18
+ def init_command_wrapper(directory: str, output: Path | None, force: bool, build: bool) -> None:
19
+ """Wrapper to handle interactive prompts before entering async context."""
20
+ design.header("RL Config Generator", icon="🔧")
21
+
22
+ # Determine if this is a directory or Docker image
23
+ path = Path(directory)
24
+ is_directory = path.exists() and path.is_dir()
25
+
26
+ if is_directory:
27
+ # Working with a directory - check for lock file
28
+ lock_path = path / "hud.lock.yaml"
29
+
30
+ if not lock_path.exists():
31
+ if build:
32
+ # Auto-build was requested
33
+ design.info("Building environment...")
34
+ from hud.cli.build import build_command
35
+
36
+ build_command(str(directory), None, False, False, {})
37
+ # After build, lock file should exist
38
+ else:
39
+ # Try to get image from pyproject.toml or auto-generate
40
+ from hud.cli.utils.environment import get_image_name, image_exists
41
+
42
+ image, source = get_image_name(directory)
43
+
44
+ if not (source == "cache" and image_exists(image)):
45
+ design.warning(f"No hud.lock.yaml found in {directory}")
46
+ # Need to handle interactive prompt here, before async
47
+ action = design.select(
48
+ "No lock file found. Would you like to:",
49
+ ["Build the environment", "Use Docker image directly", "Cancel"],
50
+ )
51
+
52
+ if action == "Build the environment":
53
+ design.info("Building environment...")
54
+ from hud.cli.build import build_command
55
+
56
+ build_command(str(directory), None, False, False, {})
57
+ # After build, lock file should exist
58
+ elif action == "Use Docker image directly":
59
+ # Prompt for image name
60
+ image = typer.prompt("Enter Docker image name")
61
+ directory = image # Override to use as Docker image
62
+ is_directory = False # Treat as image, not directory
63
+ else:
64
+ raise typer.Exit(1)
65
+
66
+ # Now run the async command with resolved parameters
67
+ asyncio.run(init_command(directory, output, force, False))
68
+
69
+
70
+ async def init_command(directory: str, output: Path | None, force: bool, build: bool) -> None:
71
+ """Generate hud-vf-gym config from environment."""
72
+ # Determine if this is a directory or Docker image
73
+ path = Path(directory)
74
+ is_directory = path.exists() and path.is_dir()
75
+
76
+ if is_directory:
77
+ # Working with a directory - look for lock file
78
+ lock_path = path / "hud.lock.yaml"
79
+
80
+ if lock_path.exists():
81
+ design.info(f"Found lock file: {lock_path}")
82
+ lock_data = read_lock_file_path(lock_path)
83
+
84
+ if not lock_data:
85
+ design.error("Failed to read lock file")
86
+ raise typer.Exit(1)
87
+
88
+ # Get image and tools from lock file
89
+ image = lock_data.get("image", "")
90
+ tools = lock_data.get("tools", [])
91
+
92
+ if not image:
93
+ design.error("No image found in lock file")
94
+ design.hint("Run 'hud build' to create a proper lock file")
95
+ raise typer.Exit(1)
96
+
97
+ if not tools:
98
+ design.error("No tools found in lock file")
99
+ design.hint("Lock file may be outdated. Run 'hud build' to regenerate")
100
+ raise typer.Exit(1)
101
+
102
+ # Use lock file data to generate config
103
+ await generate_from_lock(image, tools, output, force)
104
+
105
+ else:
106
+ # No lock file - try to use cached image
107
+ # Build should have been handled in the wrapper
108
+ from hud.cli.utils.environment import get_image_name, image_exists
109
+
110
+ image, source = get_image_name(directory)
111
+
112
+ if source == "cache" and image_exists(image):
113
+ # Found cached image in pyproject.toml
114
+ design.info(f"Using cached image: {image}")
115
+ await analyze_and_generate(image, output, force)
116
+ else:
117
+ # This should have been handled in the wrapper
118
+ design.error("No valid image or lock file found")
119
+ raise typer.Exit(1)
120
+
121
+ else:
122
+ # Working with a Docker image directly
123
+ image = directory
124
+ await analyze_and_generate(image, output, force)
125
+
126
+
127
+ def read_lock_file_path(lock_path: Path) -> dict[str, Any]:
128
+ """Read lock file from specific path."""
129
+ try:
130
+ with open(lock_path) as f:
131
+ return yaml.safe_load(f) or {}
132
+ except Exception as e:
133
+ design.error(f"Failed to read lock file: {e}")
134
+ return {}
135
+
136
+
137
+ async def generate_from_lock(
138
+ image: str, tools: list[dict], output: Path | None, force: bool
139
+ ) -> None:
140
+ """Generate config from lock file data."""
141
+ # Determine output path
142
+ if output is None:
143
+ # Default to configs/{image_name}.yaml
144
+ image_name = image.split("/")[-1].split(":")[0]
145
+ if "/" in image_name:
146
+ image_name = image_name.split("/")[-1]
147
+ output = Path("configs") / f"{image_name}.yaml"
148
+
149
+ # Check if file exists
150
+ if output.exists() and not force:
151
+ design.error(f"Config file already exists: {output}")
152
+ design.info("Use --force to overwrite")
153
+ raise typer.Exit(1)
154
+
155
+ # Create output directory if needed
156
+ output.parent.mkdir(parents=True, exist_ok=True)
157
+
158
+ # Convert lock file tool format to full tool format
159
+ # Lock file may have full or simplified format
160
+ full_tools = []
161
+ for tool in tools:
162
+ full_tool = {
163
+ "name": tool["name"],
164
+ "description": tool.get("description", ""),
165
+ }
166
+ # Check if lock file has inputSchema (newer format)
167
+ if "inputSchema" in tool:
168
+ full_tool["inputSchema"] = tool["inputSchema"]
169
+ else:
170
+ # Old lock file format without schema
171
+ full_tool["inputSchema"] = {"type": "object", "properties": {}, "required": []}
172
+ full_tools.append(full_tool)
173
+
174
+ # Generate config
175
+ config = await generate_config(image, full_tools)
176
+
177
+ # Write to file
178
+ with open(output, "w") as f: # noqa: ASYNC230
179
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
180
+
181
+ design.success(f"Generated config: {output}")
182
+
183
+ # Show summary
184
+ design.section_title("📋 Generated Configuration")
185
+ design.info("Source: hud.lock.yaml")
186
+ design.info(f"Image: {image}")
187
+ design.info(f"System prompt: {len(config['system_prompt'])} characters")
188
+ design.info(f"Action mappings: {len(config['action_mappings'])} tools")
189
+ design.info("")
190
+ design.info("Next steps:")
191
+ design.command_example("hud hf tasks.json --name my-tasks", "Create dataset")
192
+ design.command_example(f"hud rl --config {output}", "Start training")
193
+
194
+
195
+ async def analyze_and_generate(image: str, output: Path | None, force: bool) -> None:
196
+ """Analyze Docker image and generate config."""
197
+ # Determine output path
198
+ if output is None:
199
+ # Default to configs/{image_name}.yaml
200
+ image_name = image.split("/")[-1].split(":")[0]
201
+ output = Path("configs") / f"{image_name}.yaml"
202
+
203
+ # Check if file exists
204
+ if output.exists() and not force:
205
+ design.error(f"Config file already exists: {output}")
206
+ design.info("Use --force to overwrite")
207
+ raise typer.Exit(1)
208
+
209
+ # Create output directory if needed
210
+ output.parent.mkdir(parents=True, exist_ok=True)
211
+
212
+ design.info(f"Analyzing environment: {image}")
213
+
214
+ # Analyze the environment
215
+ try:
216
+ # Create MCP config for Docker
217
+ mcp_config = {"local": {"command": "docker", "args": ["run", "--rm", "-i", image]}}
218
+
219
+ # Initialize client and analyze
220
+ client = MCPClient(mcp_config=mcp_config, auto_trace=False)
221
+ await client.initialize()
222
+
223
+ try:
224
+ analysis = await client.analyze_environment()
225
+ tools = analysis.get("tools", [])
226
+
227
+ # Generate config
228
+ config = await generate_config(image, tools)
229
+
230
+ # Write to file
231
+ with open(output, "w") as f: # noqa: ASYNC230
232
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
233
+
234
+ design.success(f"Generated config: {output}")
235
+
236
+ # Show summary
237
+ design.section_title("📋 Generated Configuration")
238
+ design.info(f"System prompt: {len(config['system_prompt'])} characters")
239
+ design.info(f"Action mappings: {len(config['action_mappings'])} tools")
240
+ design.info("")
241
+ design.info("Next steps:")
242
+ design.command_example("hud hf tasks.json --name my-tasks", "Create dataset")
243
+ design.command_example(f"hud rl --config {output}", "Start training")
244
+
245
+ finally:
246
+ await client.shutdown()
247
+
248
+ except Exception as e:
249
+ design.error(f"Failed to analyze environment: {e}")
250
+ design.hint("Make sure the Docker image exists and contains a valid MCP server")
251
+ raise typer.Exit(1) from e
252
+
253
+
254
+ async def generate_config(image: str, tools: list[dict[str, Any]]) -> dict[str, Any]:
255
+ """Generate hud-vf-gym configuration from tool analysis."""
256
+ # Clean up image name for display
257
+ display_name = image.split("@")[0] if "@" in image else image # Remove SHA hash
258
+ env_name = display_name.split("/")[-1].split(":")[0] # Extract just the env name
259
+
260
+ # Filter out setup/evaluate tools
261
+ interaction_tools = [t for t in tools if t["name"] not in ["setup", "evaluate"]]
262
+
263
+ # Generate system prompt
264
+ tool_descriptions = []
265
+ for tool in interaction_tools:
266
+ # Check if we have schema (from direct analysis) or just name/description (from lock file)
267
+ has_schema = "inputSchema" in tool and tool["inputSchema"].get("properties")
268
+
269
+ if has_schema:
270
+ params = tool.get("inputSchema", {}).get("properties", {})
271
+ required = tool.get("inputSchema", {}).get("required", [])
272
+
273
+ # Build parameter string
274
+ param_parts = []
275
+ for name, schema in params.items():
276
+ param_type = schema.get("type", "any")
277
+ if name in required:
278
+ param_parts.append(f"{name}: {param_type}")
279
+ else:
280
+ param_parts.append(f"{name}?: {param_type}")
281
+
282
+ param_str = ", ".join(param_parts) if param_parts else ""
283
+ else:
284
+ # No schema information
285
+ param_str = "..."
286
+
287
+ desc = tool.get("description", "No description")
288
+
289
+ tool_descriptions.append(
290
+ f"- {tool['name']}({param_str}): {desc}\n Usage: <tool>{tool['name']}(...)</tool>"
291
+ )
292
+
293
+ # Add note if any tools are missing schema info
294
+ if interaction_tools and any("inputSchema" not in t for t in interaction_tools):
295
+ tool_descriptions.append(
296
+ "\nNote: Some tools are missing parameter information. Update manually if needed."
297
+ )
298
+
299
+ system_prompt = f"""You are an AI agent in a HUD environment.
300
+
301
+ You have access to the following tools:
302
+
303
+ {chr(10).join(tool_descriptions)}
304
+
305
+ Always use the exact XML format shown above for tool calls.
306
+ Think step by step about what you need to do."""
307
+
308
+ # Generate action mappings
309
+ action_mappings = {}
310
+
311
+ for tool in interaction_tools:
312
+ # Check if we have inputSchema information
313
+ has_input_schema = "inputSchema" in tool
314
+
315
+ if has_input_schema:
316
+ # We have schema info (even if no parameters)
317
+ params = tool.get("inputSchema", {}).get("properties", {})
318
+ required = tool.get("inputSchema", {}).get("required", [])
319
+
320
+ # Simple 1:1 mapping by default
321
+ mapping = {
322
+ "_tool": tool["name"],
323
+ "_parser": {
324
+ "positional": list(required) # Use required params as positional
325
+ },
326
+ }
327
+
328
+ # Add parameter mappings (only if there are params)
329
+ for param_name in params:
330
+ mapping[param_name] = {"from_arg": param_name}
331
+ else:
332
+ # No schema information at all
333
+ mapping = {
334
+ "_tool": tool["name"],
335
+ "_parser": {
336
+ "positional": [] # No positional args without schema
337
+ },
338
+ "# TODO": "Update with actual parameters",
339
+ }
340
+
341
+ action_mappings[tool["name"]] = mapping
342
+
343
+ # Add special "done" action if not present
344
+ if "done" not in action_mappings:
345
+ action_mappings["done"] = {
346
+ "_tool": None, # Special marker for task completion
347
+ "_parser": {"positional": []},
348
+ }
349
+
350
+ # Build full config
351
+ config = {
352
+ "# Generated by hud rl init": f"for {env_name}",
353
+ "job": {
354
+ "name": f"RL Training - {env_name}",
355
+ "metadata": {
356
+ "environment": display_name,
357
+ "full_image": image,
358
+ "generated_by": "hud rl init",
359
+ },
360
+ },
361
+ "system_prompt": system_prompt,
362
+ "parser": {"use_thinking": True, "xml_weight": 0.6, "action_weight": 0.4},
363
+ "action_mappings": action_mappings,
364
+ "rubric": {
365
+ "weights": {"task_completion": 0.8, "tool_execution": 0.1, "format_compliance": 0.1}
366
+ },
367
+ "defaults": {"max_turns": 100},
368
+ }
369
+
370
+ return config