hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/cli/flows/tasks.py
CHANGED
|
@@ -4,21 +4,17 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import typer
|
|
10
10
|
import yaml
|
|
11
11
|
|
|
12
12
|
from hud.cli.push import push_environment
|
|
13
13
|
from hud.cli.utils.docker import require_docker_running
|
|
14
|
-
from hud.cli.utils.env_check import
|
|
14
|
+
from hud.cli.utils.env_check import find_environment_dir
|
|
15
15
|
from hud.cli.utils.registry import extract_name_and_tag
|
|
16
|
+
from hud.datasets import load_tasks
|
|
16
17
|
from hud.utils.hud_console import hud_console
|
|
17
|
-
from hud.utils.tasks import load_tasks
|
|
18
|
-
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from hud.types import Task
|
|
21
|
-
|
|
22
18
|
|
|
23
19
|
logger = logging.getLogger(__name__)
|
|
24
20
|
|
|
@@ -29,11 +25,11 @@ def _is_remote_url(url: str) -> bool:
|
|
|
29
25
|
return bool(re.match(r"^(https?:\/\/)?(www\.)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,}(\/\S*)?$", url))
|
|
30
26
|
|
|
31
27
|
|
|
32
|
-
def _validate_tasks(tasks: list[
|
|
28
|
+
def _validate_tasks(tasks: list[dict[str, Any]]) -> bool:
|
|
33
29
|
"""Validate the tasks file: return True if tasks already reference a remote MCP URL.
|
|
34
30
|
|
|
35
31
|
A task is considered remote if any "url" field anywhere inside mcp_config
|
|
36
|
-
is a valid remote URL (e.g., https://mcp.hud.
|
|
32
|
+
is a valid remote URL (e.g., https://mcp.hud.ai/v3/mcp).
|
|
37
33
|
"""
|
|
38
34
|
|
|
39
35
|
def _has_remote_url(obj: Any) -> bool:
|
|
@@ -50,13 +46,15 @@ def _validate_tasks(tasks: list[Task]) -> bool:
|
|
|
50
46
|
return False
|
|
51
47
|
|
|
52
48
|
for task in tasks:
|
|
53
|
-
cfg = task.mcp_config or {}
|
|
49
|
+
cfg = task.get("mcp_config") or {}
|
|
54
50
|
if not _has_remote_url(cfg):
|
|
55
51
|
return False
|
|
56
52
|
return True
|
|
57
53
|
|
|
58
54
|
|
|
59
|
-
def _ensure_pushed(
|
|
55
|
+
def _ensure_pushed(
|
|
56
|
+
env_dir: Path, lock_data: dict[str, Any], check_docker: bool = True
|
|
57
|
+
) -> dict[str, Any]:
|
|
60
58
|
"""Ensure the environment is pushed to a registry; return updated lock data."""
|
|
61
59
|
pushed = bool(lock_data.get("push"))
|
|
62
60
|
if not pushed:
|
|
@@ -64,7 +62,8 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
|
|
|
64
62
|
if not hud_console.confirm("Push to a registry now (runs 'hud push')?", default=True):
|
|
65
63
|
raise typer.Exit(1)
|
|
66
64
|
# Check Docker availability before attempting a push
|
|
67
|
-
|
|
65
|
+
if check_docker:
|
|
66
|
+
require_docker_running()
|
|
68
67
|
|
|
69
68
|
# If Docker or login is not configured, the push function will fail and halt.
|
|
70
69
|
push_environment(str(env_dir), yes=True)
|
|
@@ -78,29 +77,41 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
|
|
|
78
77
|
|
|
79
78
|
|
|
80
79
|
def _derive_remote_image(lock_data: dict[str, Any]) -> str:
|
|
81
|
-
"""Derive org/name:tag from lock file for MCP header.
|
|
80
|
+
"""Derive org/name:tag from lock file for remote MCP header.
|
|
82
81
|
|
|
83
|
-
Preference order:
|
|
84
|
-
1) lock_data["push"]["image_with_tag"]
|
|
85
|
-
2)
|
|
82
|
+
Preference order (new lock first, then legacy):
|
|
83
|
+
1) lock_data["push"]["image_with_tag"] (exact org/name:tag that was pushed)
|
|
84
|
+
2) lock_data["images"]["local"] (base name with internal version)
|
|
85
|
+
3) lock_data["image"] (legacy field; may contain tag or digest)
|
|
86
86
|
"""
|
|
87
|
-
|
|
87
|
+
if not isinstance(lock_data, dict): # Defensive
|
|
88
|
+
raise typer.Exit(1)
|
|
88
89
|
|
|
89
|
-
# 1)
|
|
90
|
-
|
|
90
|
+
# 1) Prefer the exact image that was pushed (org/name:tag)
|
|
91
|
+
push_info = lock_data.get("push") or {}
|
|
92
|
+
pushed_with_tag = str(push_info.get("image_with_tag") or "").strip()
|
|
91
93
|
if pushed_with_tag:
|
|
92
94
|
name, tag = extract_name_and_tag(pushed_with_tag)
|
|
93
95
|
return f"{name}:{tag}"
|
|
94
96
|
|
|
95
|
-
#
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
97
|
+
# 2) Fall back to the local tag recorded in the new lock schema
|
|
98
|
+
images = lock_data.get("images") or {}
|
|
99
|
+
local_image = str(images.get("local") or "").strip()
|
|
100
|
+
if local_image:
|
|
101
|
+
name, tag = extract_name_and_tag(local_image)
|
|
102
|
+
return f"{name}:{tag}"
|
|
101
103
|
|
|
104
|
+
# 3) Legacy top-level image field
|
|
105
|
+
legacy_image = str(lock_data.get("image") or "").strip()
|
|
106
|
+
if legacy_image:
|
|
107
|
+
name, tag = extract_name_and_tag(legacy_image)
|
|
108
|
+
return f"{name}:{tag}"
|
|
109
|
+
|
|
110
|
+
# If none of the above exist, we cannot derive an image
|
|
111
|
+
raise typer.Exit(1)
|
|
102
112
|
|
|
103
|
-
|
|
113
|
+
|
|
114
|
+
def _extract_existing_images(tasks: list[dict[str, Any]]) -> set[str]:
|
|
104
115
|
"""Extract all Mcp-Image references from tasks."""
|
|
105
116
|
images = set()
|
|
106
117
|
|
|
@@ -119,8 +130,9 @@ def _extract_existing_images(tasks: list[Task]) -> set[str]:
|
|
|
119
130
|
_extract_from_obj(item)
|
|
120
131
|
|
|
121
132
|
for task in tasks:
|
|
122
|
-
|
|
123
|
-
|
|
133
|
+
mcp_config = task.get("mcp_config")
|
|
134
|
+
if mcp_config:
|
|
135
|
+
_extract_from_obj(mcp_config)
|
|
124
136
|
|
|
125
137
|
return images
|
|
126
138
|
|
|
@@ -183,6 +195,63 @@ def _extract_dotenv_api_key_vars(env_dir: Path) -> set[str]:
|
|
|
183
195
|
return detected
|
|
184
196
|
|
|
185
197
|
|
|
198
|
+
def _extract_env_vars_from_docker_args(args: list[str]) -> set[str]:
|
|
199
|
+
"""Extract environment variable names from docker run arguments.
|
|
200
|
+
|
|
201
|
+
Parses args like: ["run", "--rm", "-i", "-e", "API_KEY=value", "-e", "TOKEN", "image:tag"]
|
|
202
|
+
Returns set of env var names (not values).
|
|
203
|
+
"""
|
|
204
|
+
env_vars: set[str] = set()
|
|
205
|
+
i = 0
|
|
206
|
+
while i < len(args):
|
|
207
|
+
arg = args[i]
|
|
208
|
+
|
|
209
|
+
# Check for -e or --env flags
|
|
210
|
+
if arg in ("-e", "--env"):
|
|
211
|
+
if i + 1 < len(args):
|
|
212
|
+
env_spec = args[i + 1]
|
|
213
|
+
# Could be "KEY=value" or just "KEY"
|
|
214
|
+
var_name = env_spec.split("=", 1)[0].strip()
|
|
215
|
+
if var_name:
|
|
216
|
+
env_vars.add(var_name)
|
|
217
|
+
i += 2
|
|
218
|
+
continue
|
|
219
|
+
# Check for --env=KEY=value format
|
|
220
|
+
elif arg.startswith("--env="):
|
|
221
|
+
env_spec = arg[6:] # Remove "--env=" prefix
|
|
222
|
+
var_name = env_spec.split("=", 1)[0].strip()
|
|
223
|
+
if var_name:
|
|
224
|
+
env_vars.add(var_name)
|
|
225
|
+
|
|
226
|
+
i += 1
|
|
227
|
+
|
|
228
|
+
env_vars.discard("HUD_API_KEY")
|
|
229
|
+
return env_vars
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _extract_vars_from_task_configs(raw_tasks: list[dict[str, Any]]) -> set[str]:
|
|
233
|
+
"""Extract environment variable names from docker run commands in task mcp_configs."""
|
|
234
|
+
all_env_vars: set[str] = set()
|
|
235
|
+
|
|
236
|
+
for task in raw_tasks:
|
|
237
|
+
mcp_config = task.get("mcp_config", {})
|
|
238
|
+
|
|
239
|
+
# Iterate through all server configs
|
|
240
|
+
for server_config in mcp_config.values():
|
|
241
|
+
if not isinstance(server_config, dict):
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
command = server_config.get("command", "")
|
|
245
|
+
args = server_config.get("args", [])
|
|
246
|
+
|
|
247
|
+
# Only process docker run commands
|
|
248
|
+
if command == "docker" and "run" in args:
|
|
249
|
+
env_vars = _extract_env_vars_from_docker_args(args)
|
|
250
|
+
all_env_vars.update(env_vars)
|
|
251
|
+
|
|
252
|
+
return all_env_vars
|
|
253
|
+
|
|
254
|
+
|
|
186
255
|
def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
187
256
|
"""Convert a local tasks file to remote MCP tasks and return new filename.
|
|
188
257
|
|
|
@@ -190,17 +259,18 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
190
259
|
1) Find env dir; ensure built (hud.lock.yaml), otherwise build
|
|
191
260
|
2) Ensure pushed to registry, otherwise push
|
|
192
261
|
3) Check for outdated images in existing task configurations
|
|
193
|
-
4) Create remote_[tasks].json with mcp_config pointing to mcp.hud.
|
|
262
|
+
4) Create remote_[tasks].json with mcp_config pointing to mcp.hud.ai and Mcp-Image
|
|
194
263
|
5) Return the new tasks file path
|
|
195
264
|
"""
|
|
196
265
|
tasks_path = Path(tasks_file).resolve()
|
|
197
266
|
|
|
198
|
-
# Load
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
# Load raw tasks to preserve placeholders when writing back to disk
|
|
267
|
+
# Load raw tasks - we work with dicts directly to preserve placeholders
|
|
268
|
+
# when writing back to disk (e.g., ${HUD_API_KEY})
|
|
202
269
|
raw_tasks: list[dict[str, Any]] = load_tasks(str(tasks_path), raw=True) # type: ignore[assignment]
|
|
203
270
|
|
|
271
|
+
# Use the same raw tasks for validation (they have mcp_config structure)
|
|
272
|
+
tasks = raw_tasks
|
|
273
|
+
|
|
204
274
|
# Ensure HUD_API_KEY is available: prefer process env, else load from env_dir/.env
|
|
205
275
|
from hud.settings import settings
|
|
206
276
|
|
|
@@ -224,9 +294,24 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
224
294
|
hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
|
|
225
295
|
raise typer.Exit(1)
|
|
226
296
|
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
|
|
297
|
+
# For convert command, we don't need Docker running - just check for lock file
|
|
298
|
+
# This avoids showing Docker-related messages during conversion
|
|
299
|
+
lock_path = env_dir / "hud.lock.yaml"
|
|
300
|
+
if not lock_path.exists():
|
|
301
|
+
hud_console.error("No hud.lock.yaml found. The environment needs to be built first.")
|
|
302
|
+
hud_console.info("Run 'hud build' in the environment directory to build it.")
|
|
303
|
+
raise typer.Exit(1)
|
|
304
|
+
|
|
305
|
+
# Load lock data directly
|
|
306
|
+
try:
|
|
307
|
+
with open(lock_path) as f:
|
|
308
|
+
lock_data: dict[str, Any] = yaml.safe_load(f) or {}
|
|
309
|
+
except Exception as e:
|
|
310
|
+
hud_console.error(f"Failed to read hud.lock.yaml: {e}")
|
|
311
|
+
raise typer.Exit(1) from e
|
|
312
|
+
|
|
313
|
+
# Check if pushed - don't check Docker for convert command
|
|
314
|
+
lock_data = _ensure_pushed(env_dir, lock_data, check_docker=False)
|
|
230
315
|
|
|
231
316
|
# Derive remote image name org/name:tag
|
|
232
317
|
remote_image = _derive_remote_image(lock_data)
|
|
@@ -297,20 +382,35 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
297
382
|
hud_console.success(f"Updated {tasks_path.name} with latest image: {remote_image}")
|
|
298
383
|
return str(tasks_path)
|
|
299
384
|
|
|
300
|
-
# Extract
|
|
385
|
+
# Extract environment variables from multiple sources:
|
|
386
|
+
# 1. Lock file (authoritative for required env vars)
|
|
301
387
|
provided_keys = _extract_api_key_vars(lock_data)
|
|
388
|
+
|
|
389
|
+
# 2. Task configs (docker run -e flags)
|
|
390
|
+
task_env_vars = _extract_vars_from_task_configs(raw_tasks)
|
|
391
|
+
|
|
392
|
+
# 3. .env file (detect API-like vars)
|
|
302
393
|
dotenv_keys = _extract_dotenv_api_key_vars(env_dir)
|
|
303
394
|
|
|
304
|
-
#
|
|
305
|
-
|
|
395
|
+
# Combine: lock file vars + task config vars, then check for missing from .env
|
|
396
|
+
all_detected = provided_keys | task_env_vars
|
|
397
|
+
|
|
398
|
+
# If .env contains API-like vars not yet included, offer to add them
|
|
399
|
+
missing = sorted(dotenv_keys - all_detected)
|
|
306
400
|
if missing:
|
|
307
401
|
names_preview = ", ".join(missing)
|
|
308
402
|
prompt = (
|
|
309
403
|
f"Detected env vars in .env that look like API keys: {names_preview}.\n"
|
|
310
404
|
"Include them as remote headers (values will be ${VAR} placeholders)?"
|
|
311
405
|
)
|
|
312
|
-
if hud_console.confirm(prompt, default=True):
|
|
313
|
-
|
|
406
|
+
if not hud_console.confirm(prompt, default=True):
|
|
407
|
+
# User cancelled - exit without creating the file
|
|
408
|
+
hud_console.info("Conversion cancelled by user")
|
|
409
|
+
raise typer.Exit(0)
|
|
410
|
+
all_detected.update(missing)
|
|
411
|
+
|
|
412
|
+
# Final set of env vars to convert to headers
|
|
413
|
+
provided_keys = all_detected
|
|
314
414
|
|
|
315
415
|
extra_api_key_headers: dict[str, str] = {}
|
|
316
416
|
for var_name in provided_keys:
|
|
@@ -344,10 +444,10 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
344
444
|
tasks_payload: list[dict[str, Any]] = []
|
|
345
445
|
for t in tasks:
|
|
346
446
|
item: dict[str, Any] = {
|
|
347
|
-
"prompt": t.prompt,
|
|
447
|
+
"prompt": t.get("prompt"),
|
|
348
448
|
"mcp_config": {
|
|
349
449
|
"hud": {
|
|
350
|
-
"url":
|
|
450
|
+
"url": settings.hud_mcp_url,
|
|
351
451
|
"headers": {
|
|
352
452
|
"Authorization": "Bearer ${HUD_API_KEY}",
|
|
353
453
|
"Mcp-Image": remote_image,
|
|
@@ -360,18 +460,16 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
360
460
|
item["mcp_config"]["hud"]["headers"].update(extra_api_key_headers)
|
|
361
461
|
|
|
362
462
|
# Optional fields, omit Nones
|
|
363
|
-
if t.setup_tool is not None:
|
|
364
|
-
item["setup_tool"] = _simplify_tool_call(t
|
|
365
|
-
if t.evaluate_tool is not None:
|
|
366
|
-
item["evaluate_tool"] = _simplify_tool_call(t
|
|
367
|
-
if t.
|
|
368
|
-
item["
|
|
369
|
-
if t.
|
|
370
|
-
item["
|
|
371
|
-
if t.
|
|
372
|
-
item["
|
|
373
|
-
if t.id is not None:
|
|
374
|
-
item["id"] = t.id
|
|
463
|
+
if t.get("setup_tool") is not None:
|
|
464
|
+
item["setup_tool"] = _simplify_tool_call(t["setup_tool"])
|
|
465
|
+
if t.get("evaluate_tool") is not None:
|
|
466
|
+
item["evaluate_tool"] = _simplify_tool_call(t["evaluate_tool"])
|
|
467
|
+
if t.get("agent_config") is not None:
|
|
468
|
+
item["agent_config"] = t["agent_config"]
|
|
469
|
+
if t.get("metadata"):
|
|
470
|
+
item["metadata"] = t["metadata"]
|
|
471
|
+
if t.get("id") is not None:
|
|
472
|
+
item["id"] = t["id"]
|
|
375
473
|
|
|
376
474
|
tasks_payload.append(item)
|
|
377
475
|
|
|
@@ -382,6 +480,5 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
382
480
|
f.write("\n")
|
|
383
481
|
|
|
384
482
|
hud_console.success(f"Created remote tasks file: {remote_path.name}")
|
|
385
|
-
hud_console.hint("Proceeding with RL training on the remote environment")
|
|
386
483
|
|
|
387
484
|
return str(remote_path)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Templates for hud init command."""
|
|
2
|
+
|
|
3
|
+
DOCKERFILE_HUD = """\
|
|
4
|
+
FROM python:3.11-slim
|
|
5
|
+
|
|
6
|
+
RUN apt-get update && apt-get install -y --no-install-recommends curl \\
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
|
|
9
|
+
WORKDIR /app
|
|
10
|
+
COPY pyproject.toml uv.lock* ./
|
|
11
|
+
RUN pip install uv && uv sync --frozen --no-dev 2>/dev/null || uv sync --no-dev
|
|
12
|
+
COPY . .
|
|
13
|
+
|
|
14
|
+
# Most of the time this command should not change, except if you change your env path
|
|
15
|
+
# or launch some other service before running the environment
|
|
16
|
+
CMD ["uv", "run", "python", "-m", "hud", "dev", "env:env", "--stdio"]
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# fmt: off
|
|
20
|
+
ENV_PY = '''\
|
|
21
|
+
"""{env_name} - HUD Environment"""
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
|
|
25
|
+
import hud
|
|
26
|
+
from hud.settings import settings
|
|
27
|
+
from openai import AsyncOpenAI, Omit
|
|
28
|
+
from hud.environment import Environment
|
|
29
|
+
|
|
30
|
+
env = Environment("{env_name}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# =============================================================================
|
|
34
|
+
# 1. TOOLS - Functions the agent can call
|
|
35
|
+
# =============================================================================
|
|
36
|
+
|
|
37
|
+
@env.tool()
|
|
38
|
+
def count_letter(text: str, letter: str) -> int:
|
|
39
|
+
"""Count occurrences of a letter in text."""
|
|
40
|
+
return text.lower().count(letter.lower())
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# =============================================================================
|
|
44
|
+
# 2. SCRIPTS - Define prompts and evaluation logic
|
|
45
|
+
# =============================================================================
|
|
46
|
+
|
|
47
|
+
@env.scenario("count")
|
|
48
|
+
async def count_script(sentence: str, letter: str, fmt: str = "integer"):
|
|
49
|
+
"""Agent must count a letter. We check if they got it right."""
|
|
50
|
+
# Yield the prompt, receive the agent's final answer
|
|
51
|
+
answer = yield f"How many times does '{{letter}}' appear in: '{{sentence}}'? Format: {{fmt}}."
|
|
52
|
+
|
|
53
|
+
# Score: 1.0 if correct, 0.0 otherwise
|
|
54
|
+
correct = str(sentence.lower().count(letter.lower()))
|
|
55
|
+
yield correct in answer
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# =============================================================================
|
|
59
|
+
# 3. CONNECT EXISTING SERVERS (optional)
|
|
60
|
+
# =============================================================================
|
|
61
|
+
|
|
62
|
+
# --- FastAPI app ---
|
|
63
|
+
# from my_app import app
|
|
64
|
+
# env.connect_fastapi(app)
|
|
65
|
+
|
|
66
|
+
# --- FastMCP / MCPServer ---
|
|
67
|
+
# from my_server import mcp
|
|
68
|
+
# env.connect_server(mcp)
|
|
69
|
+
|
|
70
|
+
# --- OpenAPI spec (URL or file path) ---
|
|
71
|
+
# env.connect_openapi("https://api.example.com/openapi.json")
|
|
72
|
+
|
|
73
|
+
# --- MCP config (stdio or SSE) ---
|
|
74
|
+
# env.connect_mcp_config({{
|
|
75
|
+
# "my-server": {{"command": "uvx", "args": ["some-mcp-server"]}}
|
|
76
|
+
# }})
|
|
77
|
+
|
|
78
|
+
# --- HUD hub (requires deployment, see below) ---
|
|
79
|
+
# env.connect_hub("my-org/my-env", prefix="remote")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# =============================================================================
|
|
83
|
+
# TEST - Run with: python env.py
|
|
84
|
+
# =============================================================================
|
|
85
|
+
|
|
86
|
+
async def test():
|
|
87
|
+
client = AsyncOpenAI(
|
|
88
|
+
base_url=settings.hud_gateway_url,
|
|
89
|
+
api_key=settings.api_key,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Create a task from the scenario
|
|
93
|
+
task = env("count", sentence="Strawberry world", letter="r")
|
|
94
|
+
|
|
95
|
+
# Test with and without tools
|
|
96
|
+
async with hud.eval(task, variants={{"tools": [True, False]}}) as ctx:
|
|
97
|
+
response = await client.chat.completions.create(
|
|
98
|
+
model="gpt-4o-mini",
|
|
99
|
+
messages=[{{"role": "user", "content": ctx.prompt}}],
|
|
100
|
+
tools=ctx.as_openai_chat_tools() if ctx.variants["tools"] else Omit(),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Handle tool calls if present
|
|
104
|
+
message = response.choices[0].message
|
|
105
|
+
if message.tool_calls:
|
|
106
|
+
result = await ctx.call_tool(message.tool_calls[0])
|
|
107
|
+
answer = str(result["content"])
|
|
108
|
+
else:
|
|
109
|
+
answer = message.content
|
|
110
|
+
|
|
111
|
+
await ctx.submit(answer or "")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
asyncio.run(test())
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# =============================================================================
|
|
119
|
+
# DEPLOYMENT
|
|
120
|
+
# =============================================================================
|
|
121
|
+
# To deploy this environment on HUD:
|
|
122
|
+
#
|
|
123
|
+
# 1. Push this repo to GitHub
|
|
124
|
+
# 2. Go to hud.ai -> New -> Environment
|
|
125
|
+
# 3. Choose "From GitHub URL" and paste your repo URL
|
|
126
|
+
# 4. This deploys the environment for remote connection
|
|
127
|
+
#
|
|
128
|
+
# Once deployed, connect to it from other environments:
|
|
129
|
+
# env.connect_hub("{env_name}")
|
|
130
|
+
#
|
|
131
|
+
# Remote deployment enables:
|
|
132
|
+
# - Parallelized evaluations (run many agents simultaneously)
|
|
133
|
+
# - Training data collection at scale
|
|
134
|
+
# - Shared environments across team members
|
|
135
|
+
#
|
|
136
|
+
# Note: The test() function above is just for local testing.
|
|
137
|
+
# It's not required for the deployed environment.
|
|
138
|
+
'''
|
|
139
|
+
# fmt: on
|
|
140
|
+
|
|
141
|
+
PYPROJECT_TOML = """\
|
|
142
|
+
[project]
|
|
143
|
+
name = "{name}"
|
|
144
|
+
version = "0.1.0"
|
|
145
|
+
requires-python = ">=3.10"
|
|
146
|
+
dependencies = ["hud-python", "openai"]
|
|
147
|
+
|
|
148
|
+
[build-system]
|
|
149
|
+
requires = ["hatchling"]
|
|
150
|
+
build-backend = "hatchling.build"
|
|
151
|
+
"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Tests for CLI flows."""
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Tests for CLI flows dev module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
from unittest import mock
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
from hud.cli.flows.dev import generate_cursor_deeplink
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestGenerateCursorDeeplink:
|
|
15
|
+
"""Test Cursor deeplink generation."""
|
|
16
|
+
|
|
17
|
+
def test_generate_deeplink_basic(self):
|
|
18
|
+
"""Test basic deeplink generation."""
|
|
19
|
+
result = generate_cursor_deeplink("my-server", 8000)
|
|
20
|
+
|
|
21
|
+
assert result.startswith("cursor://anysphere.cursor-deeplink/mcp/install?")
|
|
22
|
+
assert "name=my-server" in result
|
|
23
|
+
assert "config=" in result
|
|
24
|
+
|
|
25
|
+
def test_generate_deeplink_config_content(self):
|
|
26
|
+
"""Test that config contains correct URL."""
|
|
27
|
+
result = generate_cursor_deeplink("test-server", 9999)
|
|
28
|
+
|
|
29
|
+
# Extract and decode the config
|
|
30
|
+
config_part = result.split("config=")[1]
|
|
31
|
+
decoded = base64.b64decode(config_part).decode()
|
|
32
|
+
config = json.loads(decoded)
|
|
33
|
+
|
|
34
|
+
assert config["url"] == "http://localhost:9999/mcp"
|
|
35
|
+
|
|
36
|
+
def test_generate_deeplink_different_ports(self):
|
|
37
|
+
"""Test deeplink generation with different ports."""
|
|
38
|
+
result_8000 = generate_cursor_deeplink("server", 8000)
|
|
39
|
+
result_3000 = generate_cursor_deeplink("server", 3000)
|
|
40
|
+
|
|
41
|
+
# Decode configs
|
|
42
|
+
config_8000 = json.loads(base64.b64decode(result_8000.split("config=")[1]))
|
|
43
|
+
config_3000 = json.loads(base64.b64decode(result_3000.split("config=")[1]))
|
|
44
|
+
|
|
45
|
+
assert "8000" in config_8000["url"]
|
|
46
|
+
assert "3000" in config_3000["url"]
|
|
47
|
+
|
|
48
|
+
def test_generate_deeplink_special_characters_in_name(self):
|
|
49
|
+
"""Test deeplink with special characters in server name."""
|
|
50
|
+
# Server name with special characters should still work
|
|
51
|
+
result = generate_cursor_deeplink("my-cool_server.v2", 8000)
|
|
52
|
+
|
|
53
|
+
assert "name=my-cool_server.v2" in result
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestCreateDynamicTrace:
|
|
57
|
+
"""Test dynamic trace creation."""
|
|
58
|
+
|
|
59
|
+
@pytest.mark.asyncio
|
|
60
|
+
@mock.patch("hud.cli.flows.dev.make_request")
|
|
61
|
+
@mock.patch("hud.cli.utils.git.get_git_info")
|
|
62
|
+
@mock.patch("hud.cli.flows.dev.settings")
|
|
63
|
+
async def test_create_dynamic_trace_success(self, mock_settings, mock_git, mock_request):
|
|
64
|
+
"""Test successful trace creation."""
|
|
65
|
+
from hud.cli.flows.dev import create_dynamic_trace
|
|
66
|
+
|
|
67
|
+
mock_settings.hud_api_url = "https://api.hud.ai"
|
|
68
|
+
mock_settings.api_key = "test-key"
|
|
69
|
+
mock_git.return_value = {"remote_url": "https://github.com/user/repo"}
|
|
70
|
+
mock_request.return_value = {"id": "trace-123"}
|
|
71
|
+
|
|
72
|
+
trace_id, url = await create_dynamic_trace(
|
|
73
|
+
mcp_config={"server": {"url": "http://localhost:8000"}},
|
|
74
|
+
build_status=True,
|
|
75
|
+
environment_name="test-env",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
assert trace_id == "trace-123"
|
|
79
|
+
assert url == "https://hud.ai/trace/trace-123"
|
|
80
|
+
mock_request.assert_called_once()
|
|
81
|
+
|
|
82
|
+
@pytest.mark.asyncio
|
|
83
|
+
@mock.patch("hud.cli.flows.dev.make_request")
|
|
84
|
+
@mock.patch("hud.cli.utils.git.get_git_info")
|
|
85
|
+
@mock.patch("hud.cli.flows.dev.settings")
|
|
86
|
+
async def test_create_dynamic_trace_no_git(self, mock_settings, mock_git, mock_request):
|
|
87
|
+
"""Test trace creation without git info."""
|
|
88
|
+
from hud.cli.flows.dev import create_dynamic_trace
|
|
89
|
+
|
|
90
|
+
mock_settings.hud_api_url = "https://api.hud.ai"
|
|
91
|
+
mock_settings.api_key = "test-key"
|
|
92
|
+
mock_git.return_value = {} # No remote_url
|
|
93
|
+
mock_request.return_value = {"id": "trace-456"}
|
|
94
|
+
|
|
95
|
+
trace_id, _ = await create_dynamic_trace(
|
|
96
|
+
mcp_config={"server": {"url": "http://localhost:8000"}},
|
|
97
|
+
build_status=False,
|
|
98
|
+
environment_name="test-env",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
assert trace_id == "trace-456"
|
|
102
|
+
# Verify git_info was not included in payload
|
|
103
|
+
call_args = mock_request.call_args
|
|
104
|
+
assert "git_info" not in call_args.kwargs.get("json", {})
|
|
105
|
+
|
|
106
|
+
@pytest.mark.asyncio
|
|
107
|
+
@mock.patch("hud.cli.flows.dev.make_request")
|
|
108
|
+
@mock.patch("hud.cli.utils.git.get_git_info")
|
|
109
|
+
@mock.patch("hud.cli.flows.dev.settings")
|
|
110
|
+
async def test_create_dynamic_trace_api_error(self, mock_settings, mock_git, mock_request):
|
|
111
|
+
"""Test trace creation when API fails."""
|
|
112
|
+
from hud.cli.flows.dev import create_dynamic_trace
|
|
113
|
+
|
|
114
|
+
mock_settings.hud_api_url = "https://api.hud.ai"
|
|
115
|
+
mock_settings.api_key = "test-key"
|
|
116
|
+
mock_git.return_value = {}
|
|
117
|
+
mock_request.side_effect = Exception("API Error")
|
|
118
|
+
|
|
119
|
+
trace_id, url = await create_dynamic_trace(
|
|
120
|
+
mcp_config={"server": {}},
|
|
121
|
+
build_status=True,
|
|
122
|
+
environment_name="test-env",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
assert trace_id is None
|
|
126
|
+
assert url is None
|