hud-python 0.4.28__py3-none-any.whl → 0.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +2 -1
- hud/agents/base.py +73 -45
- hud/agents/claude.py +8 -4
- hud/agents/openai_chat_generic.py +65 -40
- hud/agents/tests/test_base.py +0 -4
- hud/agents/tests/test_openai.py +1 -1
- hud/cli/__init__.py +182 -52
- hud/cli/dev.py +8 -9
- hud/cli/eval.py +317 -119
- hud/cli/flows/__init__.py +0 -0
- hud/cli/flows/tasks.py +0 -0
- hud/cli/get.py +160 -0
- hud/cli/rl/__init__.py +563 -71
- hud/cli/rl/config.py +94 -0
- hud/cli/rl/display.py +133 -0
- hud/cli/rl/gpu.py +63 -0
- hud/cli/rl/gpu_utils.py +318 -0
- hud/cli/rl/presets.py +96 -0
- hud/cli/rl/remote_runner.py +348 -0
- hud/cli/rl/rl_api.py +150 -0
- hud/cli/rl/vllm.py +177 -0
- hud/cli/tests/test_analyze_metadata.py +0 -1
- hud/cli/utils/tasks.py +26 -0
- hud/clients/base.py +21 -23
- hud/clients/mcp_use.py +36 -44
- hud/clients/tests/test_mcp_use_retry.py +10 -10
- hud/datasets/__init__.py +4 -3
- hud/datasets/{execution/parallel.py → parallel.py} +1 -1
- hud/datasets/{execution/runner.py → runner.py} +1 -1
- hud/datasets/utils.py +1 -1
- hud/native/tests/test_native_init.py +1 -1
- hud/otel/config.py +1 -1
- hud/otel/instrumentation.py +35 -0
- hud/rl/README.md +31 -0
- hud/rl/__init__.py +1 -0
- hud/rl/actor.py +174 -0
- hud/rl/buffer.py +371 -0
- hud/rl/chat_template.jinja +101 -0
- hud/rl/config.py +184 -0
- hud/rl/distributed.py +95 -0
- hud/rl/learner.py +586 -0
- hud/rl/tests/__init__.py +1 -0
- hud/rl/tests/test_learner.py +171 -0
- hud/rl/train.py +354 -0
- hud/rl/types.py +101 -0
- hud/rl/utils/start_vllm_server.sh +30 -0
- hud/rl/utils.py +524 -0
- hud/rl/vllm_adapter.py +125 -0
- hud/settings.py +6 -0
- hud/telemetry/__init__.py +2 -1
- hud/telemetry/job.py +46 -3
- hud/telemetry/tests/test_trace.py +3 -3
- hud/telemetry/trace.py +85 -13
- hud/tools/tests/test_computer.py +3 -3
- hud/tools/tests/test_computer_actions.py +1 -1
- hud/types.py +123 -2
- hud/utils/group_eval.py +223 -0
- hud/utils/hud_console.py +113 -13
- hud/utils/tasks.py +119 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/RECORD +66 -46
- hud/cli/hf.py +0 -406
- hud/cli/rl/README.md +0 -243
- hud/cli/rl/init.py +0 -370
- hud/cli/rl/pod.py +0 -501
- hud/cli/rl/ssh.py +0 -322
- hud/cli/rl/train.py +0 -562
- hud/cli/rl/utils.py +0 -165
- hud/datasets/execution/__init__.py +0 -13
- hud/datasets/task.py +0 -116
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/utils.py
DELETED
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
"""Shared utilities for RL commands."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import logging
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
import yaml
|
|
10
|
-
|
|
11
|
-
from hud.utils.hud_console import HUDConsole
|
|
12
|
-
|
|
13
|
-
hud_console = HUDConsole()
|
|
14
|
-
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def read_lock_file() -> dict[str, Any]:
|
|
19
|
-
"""Read and parse hud.lock.yaml file."""
|
|
20
|
-
lock_file = Path("hud.lock.yaml")
|
|
21
|
-
if not lock_file.exists():
|
|
22
|
-
return {}
|
|
23
|
-
|
|
24
|
-
try:
|
|
25
|
-
with open(lock_file) as f:
|
|
26
|
-
return yaml.safe_load(f) or {}
|
|
27
|
-
except Exception as e:
|
|
28
|
-
hud_console.warning(f"Could not read hud.lock.yaml: {e}")
|
|
29
|
-
return {}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def write_lock_file(data: dict[str, Any]) -> bool:
|
|
33
|
-
"""Write data to hud.lock.yaml file."""
|
|
34
|
-
lock_file = Path("hud.lock.yaml")
|
|
35
|
-
|
|
36
|
-
try:
|
|
37
|
-
with open(lock_file, "w") as f:
|
|
38
|
-
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
|
39
|
-
return True
|
|
40
|
-
except Exception as e:
|
|
41
|
-
hud_console.warning(f"Could not write hud.lock.yaml: {e}")
|
|
42
|
-
return False
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def get_mcp_config_from_lock() -> dict[str, Any] | None:
|
|
46
|
-
"""Get MCP configuration from lock file."""
|
|
47
|
-
lock_data = read_lock_file()
|
|
48
|
-
|
|
49
|
-
# Check if there's an image reference
|
|
50
|
-
image = lock_data.get("image")
|
|
51
|
-
if image:
|
|
52
|
-
return {
|
|
53
|
-
"hud": {
|
|
54
|
-
"url": "https://mcp.hud.so/v3/mcp",
|
|
55
|
-
"headers": {"Authorization": "Bearer $HUD_API_KEY", "Mcp-Image": image},
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
return None
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def get_primary_dataset() -> str | None:
|
|
63
|
-
"""Get primary dataset name from lock file."""
|
|
64
|
-
lock_data = read_lock_file()
|
|
65
|
-
return lock_data.get("primary_dataset", {}).get("name")
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def get_image_from_lock() -> str | None:
|
|
69
|
-
"""Get image name from lock file."""
|
|
70
|
-
lock_data = read_lock_file()
|
|
71
|
-
return lock_data.get("image")
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def detect_image_name() -> str | None:
|
|
75
|
-
"""Try to detect image name from various sources."""
|
|
76
|
-
# First check lock file
|
|
77
|
-
image = get_image_from_lock()
|
|
78
|
-
if image:
|
|
79
|
-
return image
|
|
80
|
-
|
|
81
|
-
# Check pyproject.toml
|
|
82
|
-
pyproject = Path("pyproject.toml")
|
|
83
|
-
if pyproject.exists():
|
|
84
|
-
try:
|
|
85
|
-
import tomllib
|
|
86
|
-
|
|
87
|
-
with open(pyproject, "rb") as f:
|
|
88
|
-
data = tomllib.load(f)
|
|
89
|
-
|
|
90
|
-
# Check for hud.image_name
|
|
91
|
-
image = data.get("tool", {}).get("hud", {}).get("image_name")
|
|
92
|
-
if image:
|
|
93
|
-
return image
|
|
94
|
-
|
|
95
|
-
# Use project name
|
|
96
|
-
name = data.get("project", {}).get("name")
|
|
97
|
-
if name:
|
|
98
|
-
return f"{name}:latest"
|
|
99
|
-
except Exception:
|
|
100
|
-
logger.warning("Failed to load pyproject.toml")
|
|
101
|
-
|
|
102
|
-
# Use directory name as last resort
|
|
103
|
-
return f"{Path.cwd().name}:latest"
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def validate_dataset_name(name: str) -> bool:
|
|
107
|
-
"""Validate HuggingFace dataset name format."""
|
|
108
|
-
if not name:
|
|
109
|
-
return False
|
|
110
|
-
|
|
111
|
-
if "/" not in name:
|
|
112
|
-
hud_console.error(f"Invalid dataset name: {name}")
|
|
113
|
-
hud_console.info("Dataset name should be in format: org/dataset")
|
|
114
|
-
return False
|
|
115
|
-
|
|
116
|
-
parts = name.split("/")
|
|
117
|
-
if len(parts) != 2:
|
|
118
|
-
hud_console.error(f"Invalid dataset name: {name}")
|
|
119
|
-
return False
|
|
120
|
-
|
|
121
|
-
org, dataset = parts
|
|
122
|
-
if not org or not dataset:
|
|
123
|
-
hud_console.error(f"Invalid dataset name: {name}")
|
|
124
|
-
return False
|
|
125
|
-
|
|
126
|
-
# Check for valid characters (alphanumeric, dash, underscore)
|
|
127
|
-
import re
|
|
128
|
-
|
|
129
|
-
if not re.match(r"^[a-zA-Z0-9_-]+$", org) or not re.match(r"^[a-zA-Z0-9_-]+$", dataset):
|
|
130
|
-
hud_console.error(f"Invalid characters in dataset name: {name}")
|
|
131
|
-
hud_console.info("Use only letters, numbers, dashes, and underscores")
|
|
132
|
-
return False
|
|
133
|
-
|
|
134
|
-
return True
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def create_tasks_template() -> list[dict[str, Any]]:
|
|
138
|
-
"""Create a template for tasks.json file."""
|
|
139
|
-
return [
|
|
140
|
-
{
|
|
141
|
-
"id": "example-task-001",
|
|
142
|
-
"prompt": "Complete the first TODO item in the list",
|
|
143
|
-
"mcp_config": {
|
|
144
|
-
"# TODO": "Add your MCP configuration here",
|
|
145
|
-
"# Example for remote": {
|
|
146
|
-
"hud": {
|
|
147
|
-
"url": "https://mcp.hud.so/v3/mcp",
|
|
148
|
-
"headers": {
|
|
149
|
-
"Authorization": "Bearer $HUD_API_KEY",
|
|
150
|
-
"Mcp-Image": "your-org/your-env:latest",
|
|
151
|
-
},
|
|
152
|
-
}
|
|
153
|
-
},
|
|
154
|
-
"# Example for local": {
|
|
155
|
-
"local": {"command": "docker", "args": ["run", "--rm", "-i", "your-env:latest"]}
|
|
156
|
-
},
|
|
157
|
-
},
|
|
158
|
-
"setup_tool": {"name": "setup", "arguments": {"name": "todo_seed", "num_items": 5}},
|
|
159
|
-
"evaluate_tool": {
|
|
160
|
-
"name": "evaluate",
|
|
161
|
-
"arguments": {"name": "todo_completed", "expected_count": 1},
|
|
162
|
-
},
|
|
163
|
-
"metadata": {"difficulty": "easy", "category": "task_completion"},
|
|
164
|
-
}
|
|
165
|
-
]
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
"""Dataset execution module."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from .parallel import calculate_optimal_workers, run_dataset_parallel, run_dataset_parallel_manual
|
|
6
|
-
from .runner import run_dataset
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"calculate_optimal_workers",
|
|
10
|
-
"run_dataset",
|
|
11
|
-
"run_dataset_parallel",
|
|
12
|
-
"run_dataset_parallel_manual",
|
|
13
|
-
]
|
hud/datasets/task.py
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
"""Task model for HUD datasets."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
import logging
|
|
7
|
-
from collections import defaultdict
|
|
8
|
-
from string import Template
|
|
9
|
-
from typing import Any
|
|
10
|
-
|
|
11
|
-
from pydantic import BaseModel, Field, field_validator
|
|
12
|
-
|
|
13
|
-
from hud.settings import settings
|
|
14
|
-
from hud.types import MCPToolCall
|
|
15
|
-
|
|
16
|
-
logger = logging.getLogger(__name__)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Task(BaseModel):
|
|
20
|
-
"""
|
|
21
|
-
A task configuration that can be used to create a task.
|
|
22
|
-
|
|
23
|
-
The mcp_config field supports environment variable substitution using
|
|
24
|
-
template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
|
|
25
|
-
|
|
26
|
-
Example:
|
|
27
|
-
mcp_config: {
|
|
28
|
-
"hud": {
|
|
29
|
-
"url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
|
|
30
|
-
"headers": {
|
|
31
|
-
"Authorization": "Bearer ${HUD_API_KEY}",
|
|
32
|
-
"Mcp-Image": "your-mcp-image"
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
id: str | None = None
|
|
39
|
-
prompt: str
|
|
40
|
-
mcp_config: dict[str, Any]
|
|
41
|
-
setup_tool: MCPToolCall | list[MCPToolCall] | None = None
|
|
42
|
-
evaluate_tool: MCPToolCall | list[MCPToolCall] | None = None
|
|
43
|
-
system_prompt: str | None = None
|
|
44
|
-
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
45
|
-
|
|
46
|
-
@field_validator("mcp_config", "metadata", mode="before")
|
|
47
|
-
@classmethod
|
|
48
|
-
def parse_json_strings(cls, v: Any) -> Any:
|
|
49
|
-
"""Parse JSON strings into dictionaries."""
|
|
50
|
-
if isinstance(v, str):
|
|
51
|
-
try:
|
|
52
|
-
return json.loads(v)
|
|
53
|
-
except json.JSONDecodeError as e:
|
|
54
|
-
from hud.shared.exceptions import HudConfigError
|
|
55
|
-
|
|
56
|
-
raise HudConfigError(f"Invalid JSON string: {e}") from e
|
|
57
|
-
return v
|
|
58
|
-
|
|
59
|
-
@field_validator("setup_tool", "evaluate_tool", mode="before")
|
|
60
|
-
@classmethod
|
|
61
|
-
def convert_dict_to_tool_call(cls, v: Any) -> Any:
|
|
62
|
-
"""Convert dict to MCPToolCall instance, parsing JSON strings first."""
|
|
63
|
-
if v is None:
|
|
64
|
-
return None
|
|
65
|
-
|
|
66
|
-
# Parse JSON string if needed
|
|
67
|
-
if isinstance(v, str):
|
|
68
|
-
try:
|
|
69
|
-
v = json.loads(v)
|
|
70
|
-
except json.JSONDecodeError as e:
|
|
71
|
-
from hud.shared.exceptions import HudConfigError
|
|
72
|
-
|
|
73
|
-
raise HudConfigError(f"Invalid JSON string: {e}") from e
|
|
74
|
-
|
|
75
|
-
if isinstance(v, dict):
|
|
76
|
-
return MCPToolCall(**v)
|
|
77
|
-
if isinstance(v, list):
|
|
78
|
-
return [MCPToolCall(**item) if isinstance(item, dict) else item for item in v]
|
|
79
|
-
return v
|
|
80
|
-
|
|
81
|
-
@field_validator("mcp_config", mode="before")
|
|
82
|
-
@classmethod
|
|
83
|
-
def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
|
|
84
|
-
"""
|
|
85
|
-
Automatically resolve environment variables in mcp_config using Template.
|
|
86
|
-
|
|
87
|
-
Supports ${VAR_NAME} syntax with variable substitution from
|
|
88
|
-
System environment variables (including HUD_API_KEY, etc.)
|
|
89
|
-
|
|
90
|
-
Missing variables resolve to empty strings.
|
|
91
|
-
"""
|
|
92
|
-
import os
|
|
93
|
-
|
|
94
|
-
# Start with current environment variables
|
|
95
|
-
mapping = dict(os.environ)
|
|
96
|
-
mapping.update(settings.model_dump())
|
|
97
|
-
|
|
98
|
-
if settings.api_key:
|
|
99
|
-
mapping["HUD_API_KEY"] = settings.api_key
|
|
100
|
-
else:
|
|
101
|
-
logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
|
|
102
|
-
|
|
103
|
-
def substitute_in_value(obj: Any) -> Any:
|
|
104
|
-
"""Recursively substitute variables in nested structures."""
|
|
105
|
-
if isinstance(obj, str):
|
|
106
|
-
# Use Template's substitute with defaultdict - missing vars become empty strings
|
|
107
|
-
safe_mapping = defaultdict(str, mapping)
|
|
108
|
-
return Template(obj).substitute(safe_mapping)
|
|
109
|
-
elif isinstance(obj, dict):
|
|
110
|
-
return {k: substitute_in_value(v) for k, v in obj.items()}
|
|
111
|
-
elif isinstance(obj, list):
|
|
112
|
-
return [substitute_in_value(item) for item in obj]
|
|
113
|
-
else:
|
|
114
|
-
return obj
|
|
115
|
-
|
|
116
|
-
return substitute_in_value(v)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|