hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/cli/init.py
CHANGED
|
@@ -14,24 +14,29 @@ import typer
|
|
|
14
14
|
|
|
15
15
|
from hud.utils.hud_console import HUDConsole
|
|
16
16
|
|
|
17
|
-
# Presets mapping to
|
|
17
|
+
# Presets mapping to public GitHub repositories under hud-evals org
|
|
18
18
|
GITHUB_OWNER = "hud-evals"
|
|
19
|
-
GITHUB_REPO = "hud-python"
|
|
20
19
|
GITHUB_BRANCH = "main"
|
|
21
20
|
|
|
22
21
|
PRESET_MAP: dict[str, str | None] = {
|
|
23
|
-
"blank": "blank",
|
|
24
|
-
"deep-research": "deepresearch",
|
|
25
|
-
"browser": "browser",
|
|
22
|
+
"blank": "hud-blank",
|
|
23
|
+
"deep-research": "hud-deepresearch",
|
|
24
|
+
"browser": "hud-browser",
|
|
25
|
+
"rubrics": "hud-rubrics",
|
|
26
|
+
"verilog-coding-template": "verilog-coding-template",
|
|
27
|
+
"data-science-template": "data-science-template",
|
|
26
28
|
}
|
|
27
29
|
|
|
28
30
|
SKIP_DIR_NAMES = {"node_modules", "__pycache__", "dist", "build", ".next", ".git"}
|
|
29
31
|
|
|
30
32
|
# Files that need placeholder replacement
|
|
31
33
|
PLACEHOLDER_FILES = {
|
|
32
|
-
"pyproject.toml",
|
|
34
|
+
"server/pyproject.toml",
|
|
35
|
+
"environment/pyproject.toml",
|
|
36
|
+
"server/main.py",
|
|
37
|
+
"server/README.md",
|
|
38
|
+
"environment/README.md",
|
|
33
39
|
"tasks.json",
|
|
34
|
-
"src/controller/server.py",
|
|
35
40
|
"test_env.ipynb",
|
|
36
41
|
"README.md",
|
|
37
42
|
}
|
|
@@ -48,7 +53,7 @@ def _replace_placeholders(target_dir: Path, env_name: str) -> list[str]:
|
|
|
48
53
|
List of files that were modified
|
|
49
54
|
"""
|
|
50
55
|
modified_files = []
|
|
51
|
-
placeholder = "
|
|
56
|
+
placeholder = "blank" # Placeholder used in blank environment template
|
|
52
57
|
|
|
53
58
|
# Normalize environment name for use in code/configs
|
|
54
59
|
# Replace spaces and special chars with underscores for Python identifiers
|
|
@@ -86,8 +91,11 @@ def _prompt_for_preset() -> str:
|
|
|
86
91
|
try:
|
|
87
92
|
choices = [
|
|
88
93
|
{"name": "blank", "message": "blank"},
|
|
89
|
-
{"name": "deep-research", "message": "deep-research"},
|
|
90
94
|
{"name": "browser", "message": "browser"},
|
|
95
|
+
{"name": "deep-research", "message": "deep-research"},
|
|
96
|
+
{"name": "rubrics", "message": "rubrics"},
|
|
97
|
+
{"name": "verilog-coding-template", "message": "verilog-coding-template"},
|
|
98
|
+
{"name": "data-science-template", "message": "data-science-template"},
|
|
91
99
|
]
|
|
92
100
|
display_choices = [c["message"] for c in choices]
|
|
93
101
|
selected = questionary.select(
|
|
@@ -103,10 +111,10 @@ def _prompt_for_preset() -> str:
|
|
|
103
111
|
return "blank"
|
|
104
112
|
|
|
105
113
|
|
|
106
|
-
def
|
|
107
|
-
owner: str, repo: str, ref: str,
|
|
114
|
+
def _download_tarball_repo(
|
|
115
|
+
owner: str, repo: str, ref: str, dest_dir: Path, files_created: list[str]
|
|
108
116
|
) -> None:
|
|
109
|
-
"""Download a GitHub tarball and extract
|
|
117
|
+
"""Download a GitHub tarball and extract the entire repository."""
|
|
110
118
|
tarball_url = f"https://codeload.github.com/{owner}/{repo}/tar.gz/{ref}"
|
|
111
119
|
|
|
112
120
|
token = os.getenv("GITHUB_TOKEN")
|
|
@@ -135,16 +143,17 @@ def _download_tarball_subdir(
|
|
|
135
143
|
if not members:
|
|
136
144
|
return
|
|
137
145
|
top = members[0].name.split("/", 1)[0]
|
|
138
|
-
target_prefix = f"{top}/environments/{subdir.strip('/')}"
|
|
139
146
|
|
|
140
147
|
for member in members:
|
|
141
148
|
name = member.name
|
|
142
|
-
if
|
|
149
|
+
if name == top:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
if not name.startswith(top + "/"):
|
|
143
153
|
continue
|
|
144
154
|
|
|
145
|
-
rel_path = name[len(
|
|
155
|
+
rel_path = name[len(top) + 1 :]
|
|
146
156
|
if not rel_path:
|
|
147
|
-
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
148
157
|
continue
|
|
149
158
|
|
|
150
159
|
out_path = (dest_dir / rel_path).resolve()
|
|
@@ -177,21 +186,21 @@ def create_environment(
|
|
|
177
186
|
|
|
178
187
|
hud_console = HUDConsole()
|
|
179
188
|
|
|
180
|
-
# Determine environment name/target directory
|
|
181
|
-
if name is None:
|
|
182
|
-
current_dir = Path.cwd()
|
|
183
|
-
name = current_dir.name
|
|
184
|
-
target_dir = current_dir
|
|
185
|
-
hud_console.info(f"Using current directory name: {name}")
|
|
186
|
-
else:
|
|
187
|
-
target_dir = Path(directory) / name
|
|
188
|
-
|
|
189
189
|
# Choose preset
|
|
190
190
|
preset_normalized = (preset or "").strip().lower() if preset else _prompt_for_preset()
|
|
191
|
+
|
|
192
|
+
# If no name is provided, use the preset name as the environment name
|
|
193
|
+
if name is None:
|
|
194
|
+
name = preset_normalized
|
|
195
|
+
hud_console.info(f"Using preset name as environment name: {name}")
|
|
196
|
+
|
|
197
|
+
# Always create a new directory based on the name
|
|
198
|
+
target_dir = Path.cwd() / name if directory == "." else Path(directory) / name
|
|
199
|
+
|
|
191
200
|
if preset_normalized not in PRESET_MAP:
|
|
201
|
+
available = ", ".join(sorted(PRESET_MAP.keys()))
|
|
192
202
|
hud_console.warning(
|
|
193
|
-
f"Unknown preset '{preset_normalized}', defaulting to 'blank' "
|
|
194
|
-
"(available: blank, deep-research, browser)"
|
|
203
|
+
f"Unknown preset '{preset_normalized}', defaulting to 'blank' (available: {available})"
|
|
195
204
|
)
|
|
196
205
|
preset_normalized = "blank"
|
|
197
206
|
|
|
@@ -205,17 +214,14 @@ def create_environment(
|
|
|
205
214
|
hud_console.warning(f"Overwriting existing files in {target_dir}")
|
|
206
215
|
|
|
207
216
|
# Download preset from GitHub
|
|
208
|
-
|
|
209
|
-
if
|
|
210
|
-
hud_console.error("Internal error: preset mapping missing
|
|
217
|
+
repo_name = PRESET_MAP[preset_normalized]
|
|
218
|
+
if repo_name is None:
|
|
219
|
+
hud_console.error("Internal error: preset mapping missing repo name")
|
|
211
220
|
raise typer.Exit(1)
|
|
212
221
|
|
|
213
222
|
hud_console.header(f"Initializing HUD Environment: {name} (preset: {preset_normalized})")
|
|
214
|
-
hud_console.section_title("Downloading template from
|
|
215
|
-
source_url =
|
|
216
|
-
f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/tree/"
|
|
217
|
-
f"{GITHUB_BRANCH}/environments/{env_folder}"
|
|
218
|
-
)
|
|
223
|
+
hud_console.section_title("Downloading template from GitHub")
|
|
224
|
+
source_url = f"https://github.com/{GITHUB_OWNER}/{repo_name}"
|
|
219
225
|
hud_console.info("Source: " + source_url)
|
|
220
226
|
|
|
221
227
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -223,11 +229,10 @@ def create_environment(
|
|
|
223
229
|
started = time.time()
|
|
224
230
|
files_created_dl: list[str] = []
|
|
225
231
|
try:
|
|
226
|
-
|
|
232
|
+
_download_tarball_repo(
|
|
227
233
|
owner=GITHUB_OWNER,
|
|
228
|
-
repo=
|
|
234
|
+
repo=repo_name,
|
|
229
235
|
ref=GITHUB_BRANCH,
|
|
230
|
-
subdir=env_folder,
|
|
231
236
|
dest_dir=target_dir,
|
|
232
237
|
files_created=files_created_dl,
|
|
233
238
|
)
|
|
@@ -240,31 +245,28 @@ def create_environment(
|
|
|
240
245
|
f"Downloaded {len(files_created_dl)} files in {duration_ms} ms into {target_dir}"
|
|
241
246
|
)
|
|
242
247
|
|
|
243
|
-
# Replace placeholders in template files
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
248
|
+
# Replace placeholders in template files (only for blank preset)
|
|
249
|
+
if preset_normalized == "blank":
|
|
250
|
+
hud_console.section_title("Customizing template files")
|
|
251
|
+
modified_files = _replace_placeholders(target_dir, name)
|
|
252
|
+
if modified_files:
|
|
253
|
+
hud_console.success(f"Replaced placeholders in {len(modified_files)} files:")
|
|
254
|
+
for file in modified_files[:5]: # Show first 5 files
|
|
255
|
+
hud_console.status_item(file, "updated")
|
|
256
|
+
if len(modified_files) > 5:
|
|
257
|
+
hud_console.info(f"... and {len(modified_files) - 5} more files")
|
|
258
|
+
else:
|
|
259
|
+
hud_console.info("No placeholder replacements needed")
|
|
254
260
|
|
|
255
261
|
hud_console.section_title("Top-level files and folders")
|
|
256
262
|
for entry in sorted(os.listdir(target_dir)):
|
|
257
263
|
hud_console.status_item(entry, "added")
|
|
258
264
|
|
|
259
265
|
hud_console.section_title("Next steps")
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
hud_console.command_example(f"cd {target_dir}")
|
|
266
|
-
hud_console.info("\n2. Start development server (with MCP inspector):")
|
|
267
|
-
hud_console.command_example("hud dev --inspector")
|
|
268
|
-
|
|
266
|
+
# Since we now almost always create a new directory, show cd command
|
|
267
|
+
hud_console.info("1. Enter the directory:")
|
|
268
|
+
hud_console.command_example(f"cd {target_dir.name}")
|
|
269
|
+
hud_console.info("\n2. Start development server (with MCP inspector):")
|
|
270
|
+
hud_console.command_example("hud dev --inspector")
|
|
269
271
|
hud_console.info("\n3. Review the README in this preset for specific instructions.")
|
|
270
272
|
hud_console.info("\n4. Customize as needed.")
|
hud/cli/push.py
CHANGED
|
@@ -152,7 +152,7 @@ def push_environment(
|
|
|
152
152
|
hud_console.error("No HUD API key found")
|
|
153
153
|
hud_console.warning("A HUD API key is required to push environments.")
|
|
154
154
|
hud_console.info("\nTo get started:")
|
|
155
|
-
hud_console.info("1. Get your API key at: https://hud.
|
|
155
|
+
hud_console.info("1. Get your API key at: https://hud.ai/settings")
|
|
156
156
|
hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
|
|
157
157
|
hud_console.command_example("hud push", "Try again")
|
|
158
158
|
hud_console.info("")
|
|
@@ -163,10 +163,7 @@ def push_environment(
|
|
|
163
163
|
lock_data = yaml.safe_load(f)
|
|
164
164
|
|
|
165
165
|
# Handle both old and new lock file formats
|
|
166
|
-
local_image = lock_data.get("image", "")
|
|
167
|
-
if not local_image and "build" in lock_data:
|
|
168
|
-
# New format might have image elsewhere
|
|
169
|
-
local_image = lock_data.get("image", "")
|
|
166
|
+
local_image = lock_data.get("images", {}).get("local") or lock_data.get("image", "")
|
|
170
167
|
|
|
171
168
|
# Get internal version from lock file
|
|
172
169
|
internal_version = lock_data.get("build", {}).get("version", None)
|
|
@@ -293,7 +290,7 @@ def push_environment(
|
|
|
293
290
|
# Push the image
|
|
294
291
|
hud_console.progress_message(f"Pushing {image} to registry...")
|
|
295
292
|
|
|
296
|
-
# Show push output
|
|
293
|
+
# Show push output (filtered for cleaner display)
|
|
297
294
|
process = subprocess.Popen( # noqa: S603
|
|
298
295
|
["docker", "push", image], # noqa: S607
|
|
299
296
|
stdout=subprocess.PIPE,
|
|
@@ -303,8 +300,27 @@ def push_environment(
|
|
|
303
300
|
errors="replace",
|
|
304
301
|
)
|
|
305
302
|
|
|
303
|
+
# Filter output to only show meaningful progress
|
|
304
|
+
layers_pushed = 0
|
|
306
305
|
for line in process.stdout or []:
|
|
307
|
-
|
|
306
|
+
line = line.rstrip()
|
|
307
|
+
# Only show: digest, pushed, mounted, or error lines
|
|
308
|
+
if any(
|
|
309
|
+
keyword in line.lower()
|
|
310
|
+
for keyword in ["digest:", "pushed", "mounted", "error", "denied"]
|
|
311
|
+
):
|
|
312
|
+
if "pushed" in line.lower():
|
|
313
|
+
layers_pushed += 1
|
|
314
|
+
if (
|
|
315
|
+
verbose
|
|
316
|
+
or "error" in line.lower()
|
|
317
|
+
or "denied" in line.lower()
|
|
318
|
+
or "digest:" in line.lower()
|
|
319
|
+
):
|
|
320
|
+
hud_console.info(line)
|
|
321
|
+
|
|
322
|
+
if layers_pushed > 0 and not verbose:
|
|
323
|
+
hud_console.info(f"Pushed {layers_pushed} layer(s)")
|
|
308
324
|
|
|
309
325
|
process.wait()
|
|
310
326
|
|
|
@@ -331,8 +347,10 @@ def push_environment(
|
|
|
331
347
|
hud_console.section_title("Pushed Image")
|
|
332
348
|
hud_console.status_item("Registry", pushed_digest, primary=True)
|
|
333
349
|
|
|
334
|
-
# Update the lock file with
|
|
335
|
-
|
|
350
|
+
# Update the lock file with pushed image reference
|
|
351
|
+
if "images" not in lock_data:
|
|
352
|
+
lock_data["images"] = {}
|
|
353
|
+
lock_data["images"]["pushed"] = image
|
|
336
354
|
|
|
337
355
|
# Add push information
|
|
338
356
|
from datetime import UTC, datetime
|
|
@@ -348,7 +366,7 @@ def push_environment(
|
|
|
348
366
|
with open(lock_path, "w") as f:
|
|
349
367
|
yaml.dump(lock_data, f, default_flow_style=False, sort_keys=False)
|
|
350
368
|
|
|
351
|
-
hud_console.success("Updated lock file with
|
|
369
|
+
hud_console.success("Updated lock file with pushed image reference")
|
|
352
370
|
|
|
353
371
|
# Upload lock file to HUD registry
|
|
354
372
|
try:
|
|
@@ -422,7 +440,7 @@ def push_environment(
|
|
|
422
440
|
elif response.status_code == 401:
|
|
423
441
|
hud_console.error("Authentication failed")
|
|
424
442
|
hud_console.info("Check your HUD_API_KEY is valid")
|
|
425
|
-
hud_console.info("Get a new key at: https://hud.
|
|
443
|
+
hud_console.info("Get a new key at: https://hud.ai/settings")
|
|
426
444
|
hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
|
|
427
445
|
elif response.status_code == 403:
|
|
428
446
|
hud_console.error("Permission denied")
|
hud/cli/rft.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
|
|
11
|
+
from hud.datasets import load_tasks
|
|
12
|
+
from hud.settings import settings
|
|
13
|
+
from hud.utils.hud_console import HUDConsole
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
console = Console()
|
|
17
|
+
hud_console = HUDConsole()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _patch_mcp_urls_to_staging(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
21
|
+
"""Recursively patch all mcp.hud.so URLs to https://orcstaging.hud.so in task configs."""
|
|
22
|
+
|
|
23
|
+
def patch_value(obj: Any) -> Any:
|
|
24
|
+
if isinstance(obj, dict):
|
|
25
|
+
return {k: patch_value(v) for k, v in obj.items()}
|
|
26
|
+
elif isinstance(obj, list):
|
|
27
|
+
return [patch_value(item) for item in obj]
|
|
28
|
+
elif isinstance(obj, str):
|
|
29
|
+
# Replace any occurrence of mcp.hud.so with orcstaging.hud.so
|
|
30
|
+
# Handle various URL formats
|
|
31
|
+
if "mcp.hud.so" in obj:
|
|
32
|
+
# Replace the domain while preserving the protocol and path
|
|
33
|
+
return obj.replace("mcp.hud.so", "orcstaging.hud.so")
|
|
34
|
+
elif "mcp.hud.ai" in obj:
|
|
35
|
+
# Also handle mcp.hud.ai URLs
|
|
36
|
+
return obj.replace("mcp.hud.ai", "orcstaging.hud.so")
|
|
37
|
+
return obj
|
|
38
|
+
else:
|
|
39
|
+
return obj
|
|
40
|
+
|
|
41
|
+
return [patch_value(task) for task in tasks]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _fetch_models() -> list[dict[str, Any]]:
|
|
45
|
+
"""Fetch trainable models from the HUD API for the user's team."""
|
|
46
|
+
url = f"{settings.hud_api_url}/models/"
|
|
47
|
+
headers = {
|
|
48
|
+
"Authorization": f"Bearer {settings.api_key}",
|
|
49
|
+
"x-api-key": settings.api_key or "",
|
|
50
|
+
}
|
|
51
|
+
params = {"team_only": "true", "limit": 200}
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
with httpx.Client(timeout=30.0) as client:
|
|
55
|
+
resp = client.get(url, headers=headers, params=params)
|
|
56
|
+
resp.raise_for_status()
|
|
57
|
+
data = resp.json()
|
|
58
|
+
return data.get("models", [])
|
|
59
|
+
except httpx.HTTPStatusError as e:
|
|
60
|
+
hud_console.error(f"Failed to fetch models: {e.response.status_code}")
|
|
61
|
+
if e.response.status_code == 401:
|
|
62
|
+
hud_console.hint("Check that your HUD_API_KEY is valid")
|
|
63
|
+
raise typer.Exit(1) from e
|
|
64
|
+
except httpx.RequestError as e:
|
|
65
|
+
hud_console.error(f"Connection error while fetching models: {e}")
|
|
66
|
+
raise typer.Exit(1) from e
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _select_model(models: list[dict[str, Any]]) -> dict[str, Any]:
|
|
70
|
+
"""Display models and let user select one for training."""
|
|
71
|
+
# Filter to only trainable models that are ready
|
|
72
|
+
trainable_models = [
|
|
73
|
+
m
|
|
74
|
+
for m in models
|
|
75
|
+
if m.get("is_trainable", False)
|
|
76
|
+
and m.get("status") == "ready"
|
|
77
|
+
and not m.get("public", False)
|
|
78
|
+
and m.get("model_name") is not None
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
if not trainable_models:
|
|
82
|
+
hud_console.error("No trainable models found in your team.")
|
|
83
|
+
hud_console.hint("Fork a trainable model at https://api.hud.so/models to start training.")
|
|
84
|
+
raise typer.Exit(1)
|
|
85
|
+
|
|
86
|
+
# Display models in a table
|
|
87
|
+
hud_console.section_title("Available Trainable Models")
|
|
88
|
+
table = Table(show_header=True, header_style="bold")
|
|
89
|
+
table.add_column("#", style="dim", width=4)
|
|
90
|
+
table.add_column("Name", style="bold")
|
|
91
|
+
table.add_column("Status")
|
|
92
|
+
table.add_column("Provider")
|
|
93
|
+
|
|
94
|
+
for i, model in enumerate(trainable_models, 1):
|
|
95
|
+
provider_name = (
|
|
96
|
+
model.get("provider", {}).get("name", "unknown") if model.get("provider") else "unknown"
|
|
97
|
+
)
|
|
98
|
+
table.add_row(
|
|
99
|
+
str(i),
|
|
100
|
+
model.get("name", "unnamed"),
|
|
101
|
+
model.get("status", "unknown"),
|
|
102
|
+
provider_name,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
hud_console.console.print(table)
|
|
106
|
+
hud_console.print("")
|
|
107
|
+
|
|
108
|
+
# Build choices for selection
|
|
109
|
+
choices = [
|
|
110
|
+
{"name": f"{m.get('name', 'unnamed')} ({m.get('base_model', 'unknown')})", "value": m}
|
|
111
|
+
for m in trainable_models
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
selected: dict[str, Any] = hud_console.select("Select a model to train:", choices) # type: ignore[assignment]
|
|
115
|
+
return selected
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def rft_command(
|
|
119
|
+
tasks_file: str,
|
|
120
|
+
reasoning_effort: str = "medium",
|
|
121
|
+
verbose: bool = False,
|
|
122
|
+
yes: bool = False,
|
|
123
|
+
model_id: str | None = None,
|
|
124
|
+
) -> None:
|
|
125
|
+
"""
|
|
126
|
+
Run Reinforcement Fine-Tuning (RFT) via the HUD RL service.
|
|
127
|
+
"""
|
|
128
|
+
hud_console.header("HUD RFT (Reinforcement Fine-Tuning)")
|
|
129
|
+
|
|
130
|
+
# Preflight check: API key
|
|
131
|
+
if not settings.api_key:
|
|
132
|
+
hud_console.error("HUD_API_KEY not found in environment.")
|
|
133
|
+
hud_console.info("Run 'hud set HUD_API_KEY=...' or export it.")
|
|
134
|
+
raise typer.Exit(1)
|
|
135
|
+
|
|
136
|
+
# Model selection
|
|
137
|
+
selected_model_id: str
|
|
138
|
+
if model_id:
|
|
139
|
+
# Use provided model_id directly
|
|
140
|
+
selected_model_id = model_id
|
|
141
|
+
hud_console.info(f"Using provided model ID: {selected_model_id}")
|
|
142
|
+
else:
|
|
143
|
+
# Fetch and let user select a model
|
|
144
|
+
hud_console.section_title("Fetching available models")
|
|
145
|
+
hud_console.info("Loading models from your team...")
|
|
146
|
+
models = _fetch_models()
|
|
147
|
+
|
|
148
|
+
if yes:
|
|
149
|
+
# Auto-select first trainable model in non-interactive mode
|
|
150
|
+
trainable_models = [
|
|
151
|
+
m
|
|
152
|
+
for m in models
|
|
153
|
+
if m.get("is_trainable", False)
|
|
154
|
+
and m.get("status") == "ready"
|
|
155
|
+
and not m.get("public", False)
|
|
156
|
+
and m.get("model_name") is not None
|
|
157
|
+
]
|
|
158
|
+
if not trainable_models:
|
|
159
|
+
hud_console.error("No trainable models found in your team.")
|
|
160
|
+
hud_console.hint(
|
|
161
|
+
"Fork a trainable model at https://api.hud.so/models to start training."
|
|
162
|
+
)
|
|
163
|
+
raise typer.Exit(1)
|
|
164
|
+
selected_model = trainable_models[0]
|
|
165
|
+
hud_console.info(
|
|
166
|
+
f"Auto-selected first trainable model (--yes mode): "
|
|
167
|
+
f"{selected_model.get('name', 'unnamed')}"
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
selected_model = _select_model(models)
|
|
171
|
+
|
|
172
|
+
selected_model_id = selected_model["id"]
|
|
173
|
+
hud_console.success(
|
|
174
|
+
f"Selected model: {selected_model.get('name', 'unnamed')} (ID: {selected_model_id})"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Preflight check: Convert tasks to remote if needed
|
|
178
|
+
hud_console.section_title("Preparing tasks for remote training")
|
|
179
|
+
try:
|
|
180
|
+
from hud.cli.flows.tasks import convert_tasks_to_remote
|
|
181
|
+
|
|
182
|
+
hud_console.info("Checking task configuration...")
|
|
183
|
+
tasks_file = convert_tasks_to_remote(tasks_file)
|
|
184
|
+
hud_console.success("Tasks are ready for remote training")
|
|
185
|
+
except typer.Exit:
|
|
186
|
+
raise
|
|
187
|
+
except Exception as e:
|
|
188
|
+
hud_console.error(f"Tasks file is not valid for remote training: {e!s}")
|
|
189
|
+
hud_console.hint("Either ensure the tasks file has remote urls")
|
|
190
|
+
hud_console.hint("Or run 'hud rft' within an environment directory")
|
|
191
|
+
raise typer.Exit(1) from e
|
|
192
|
+
|
|
193
|
+
# Load and validate tasks
|
|
194
|
+
try:
|
|
195
|
+
# Load tasks as raw dicts for patching and serialization
|
|
196
|
+
tasks: list[dict[str, Any]] = load_tasks(tasks_file, raw=True) # type: ignore[assignment]
|
|
197
|
+
if not tasks:
|
|
198
|
+
hud_console.error(f"No tasks found in {tasks_file}")
|
|
199
|
+
raise typer.Exit(1)
|
|
200
|
+
|
|
201
|
+
# Preflight check: Minimum task count
|
|
202
|
+
task_count = len(tasks)
|
|
203
|
+
if task_count < 10:
|
|
204
|
+
hud_console.error(
|
|
205
|
+
f"Insufficient tasks for RFT training: found {task_count}, need at least 10"
|
|
206
|
+
)
|
|
207
|
+
hud_console.hint("RFT requires a minimum of 10 tasks for effective training")
|
|
208
|
+
raise typer.Exit(1)
|
|
209
|
+
|
|
210
|
+
hud_console.info(f"Loaded {task_count} tasks from {tasks_file}")
|
|
211
|
+
|
|
212
|
+
# Preflight check: Vision support
|
|
213
|
+
hud_console.section_title("Vision Support Check")
|
|
214
|
+
hud_console.warning(
|
|
215
|
+
"RFT does not currently support environments that require vision capabilities."
|
|
216
|
+
)
|
|
217
|
+
hud_console.info(
|
|
218
|
+
"Vision support includes: screenshots, image analysis, visual UI interaction, etc."
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if not yes:
|
|
222
|
+
if hud_console.confirm("Does your environment require vision support?", default=False):
|
|
223
|
+
hud_console.error("RFT does not support vision-based environments at this time.")
|
|
224
|
+
hud_console.hint(
|
|
225
|
+
"Please use environments that rely on text-based interactions only."
|
|
226
|
+
)
|
|
227
|
+
raise typer.Exit(1)
|
|
228
|
+
else:
|
|
229
|
+
hud_console.info("Skipping vision support check (--yes mode)")
|
|
230
|
+
|
|
231
|
+
# Patch all mcp.hud.so URLs to orcstaging.hud.so
|
|
232
|
+
hud_console.info("Patching MCP URLs for staging environment...")
|
|
233
|
+
tasks = _patch_mcp_urls_to_staging(tasks)
|
|
234
|
+
|
|
235
|
+
# Show task preview
|
|
236
|
+
if tasks:
|
|
237
|
+
if yes:
|
|
238
|
+
# Skip interactive preview in auto-accept mode
|
|
239
|
+
hud_console.info("Skipping task preview in auto-accept mode (--yes)")
|
|
240
|
+
else:
|
|
241
|
+
try:
|
|
242
|
+
from hud.cli.utils.viewer import show_json_interactive
|
|
243
|
+
|
|
244
|
+
hud_console.section_title("Task Preview")
|
|
245
|
+
show_json_interactive(
|
|
246
|
+
tasks[0], title="Example Task from Dataset", initial_expanded=False
|
|
247
|
+
)
|
|
248
|
+
hud_console.info("This is how your task will be sent to the RFT service.")
|
|
249
|
+
|
|
250
|
+
# Ask for confirmation
|
|
251
|
+
if not hud_console.confirm(
|
|
252
|
+
"\nProceed with RFT training on this dataset?", default=True
|
|
253
|
+
):
|
|
254
|
+
hud_console.error("RFT training cancelled")
|
|
255
|
+
raise typer.Exit(0)
|
|
256
|
+
except typer.Exit:
|
|
257
|
+
raise # Re-raise typer.Exit to properly exit on cancellation
|
|
258
|
+
except Exception as e:
|
|
259
|
+
hud_console.warning(f"Could not display task preview: {e}")
|
|
260
|
+
|
|
261
|
+
except typer.Exit:
|
|
262
|
+
raise # Re-raise typer.Exit to properly exit
|
|
263
|
+
except Exception as e:
|
|
264
|
+
hud_console.error(f"Failed to load tasks file: {e}")
|
|
265
|
+
raise typer.Exit(1) from e
|
|
266
|
+
|
|
267
|
+
# Prepare payload
|
|
268
|
+
payload = {
|
|
269
|
+
"model_id": selected_model_id,
|
|
270
|
+
"dataset": {"tasks": tasks},
|
|
271
|
+
"config": {"parameters": {"reasoning_effort": reasoning_effort}},
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
# Send request to service
|
|
275
|
+
hud_console.section_title("Submitting RFT job")
|
|
276
|
+
|
|
277
|
+
base_url = settings.hud_rl_url
|
|
278
|
+
url = f"{base_url}/training/jobs"
|
|
279
|
+
|
|
280
|
+
headers = {"Authorization": f"Bearer {settings.api_key}", "Content-Type": "application/json"}
|
|
281
|
+
|
|
282
|
+
hud_console.info(
|
|
283
|
+
f"Submitting job to {url}... (this may take a few minutes to run all safety checks)"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
try:
|
|
287
|
+
with httpx.Client(timeout=300.0) as client:
|
|
288
|
+
resp = client.post(url, json=payload, headers=headers)
|
|
289
|
+
|
|
290
|
+
if resp.status_code >= 400:
|
|
291
|
+
try:
|
|
292
|
+
detail = resp.json()
|
|
293
|
+
except Exception as e:
|
|
294
|
+
detail = f"{resp.text} - {e}"
|
|
295
|
+
hud_console.error(f"Request failed ({resp.status_code}): {detail}")
|
|
296
|
+
raise typer.Exit(1)
|
|
297
|
+
|
|
298
|
+
data = resp.json()
|
|
299
|
+
job_id = data.get("job_id")
|
|
300
|
+
model_id = data.get("model", {}).get("id")
|
|
301
|
+
|
|
302
|
+
hud_console.success(f"Job launched successfully! ID: {job_id}")
|
|
303
|
+
hud_console.info(f"Model ID: {model_id}")
|
|
304
|
+
|
|
305
|
+
# Provide helpful next steps
|
|
306
|
+
hud_console.info(f"To check job status, run: hud rft status {model_id}")
|
|
307
|
+
|
|
308
|
+
except httpx.RequestError as e:
|
|
309
|
+
hud_console.error(f"Connection error: {e}")
|
|
310
|
+
hud_console.info("Is the RL service running?")
|
|
311
|
+
raise typer.Exit(1) from e
|