hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +11 -5
  3. hud/agents/base.py +220 -500
  4. hud/agents/claude.py +200 -240
  5. hud/agents/gemini.py +275 -0
  6. hud/agents/gemini_cua.py +335 -0
  7. hud/agents/grounded_openai.py +98 -100
  8. hud/agents/misc/integration_test_agent.py +51 -20
  9. hud/agents/misc/response_agent.py +41 -36
  10. hud/agents/openai.py +291 -292
  11. hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
  12. hud/agents/operator.py +211 -0
  13. hud/agents/tests/conftest.py +133 -0
  14. hud/agents/tests/test_base.py +300 -622
  15. hud/agents/tests/test_base_runtime.py +233 -0
  16. hud/agents/tests/test_claude.py +379 -210
  17. hud/agents/tests/test_client.py +9 -10
  18. hud/agents/tests/test_gemini.py +369 -0
  19. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  20. hud/agents/tests/test_openai.py +376 -140
  21. hud/agents/tests/test_operator.py +362 -0
  22. hud/agents/tests/test_run_eval.py +179 -0
  23. hud/cli/__init__.py +461 -545
  24. hud/cli/analyze.py +43 -5
  25. hud/cli/build.py +664 -110
  26. hud/cli/debug.py +8 -5
  27. hud/cli/dev.py +882 -734
  28. hud/cli/eval.py +782 -668
  29. hud/cli/flows/dev.py +167 -0
  30. hud/cli/flows/init.py +191 -0
  31. hud/cli/flows/tasks.py +153 -56
  32. hud/cli/flows/templates.py +151 -0
  33. hud/cli/flows/tests/__init__.py +1 -0
  34. hud/cli/flows/tests/test_dev.py +126 -0
  35. hud/cli/init.py +60 -58
  36. hud/cli/push.py +29 -11
  37. hud/cli/rft.py +311 -0
  38. hud/cli/rft_status.py +145 -0
  39. hud/cli/tests/test_analyze.py +5 -5
  40. hud/cli/tests/test_analyze_metadata.py +3 -2
  41. hud/cli/tests/test_analyze_module.py +120 -0
  42. hud/cli/tests/test_build.py +108 -6
  43. hud/cli/tests/test_build_failure.py +41 -0
  44. hud/cli/tests/test_build_module.py +50 -0
  45. hud/cli/tests/test_cli_init.py +6 -1
  46. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  47. hud/cli/tests/test_cli_root.py +140 -0
  48. hud/cli/tests/test_convert.py +361 -0
  49. hud/cli/tests/test_debug.py +12 -10
  50. hud/cli/tests/test_dev.py +197 -0
  51. hud/cli/tests/test_eval.py +251 -0
  52. hud/cli/tests/test_eval_bedrock.py +51 -0
  53. hud/cli/tests/test_init.py +124 -0
  54. hud/cli/tests/test_main_module.py +11 -5
  55. hud/cli/tests/test_mcp_server.py +12 -100
  56. hud/cli/tests/test_push_happy.py +74 -0
  57. hud/cli/tests/test_push_wrapper.py +23 -0
  58. hud/cli/tests/test_registry.py +1 -1
  59. hud/cli/tests/test_utils.py +1 -1
  60. hud/cli/{rl → utils}/celebrate.py +14 -12
  61. hud/cli/utils/config.py +18 -1
  62. hud/cli/utils/docker.py +130 -4
  63. hud/cli/utils/env_check.py +9 -9
  64. hud/cli/utils/git.py +136 -0
  65. hud/cli/utils/interactive.py +39 -5
  66. hud/cli/utils/metadata.py +69 -0
  67. hud/cli/utils/runner.py +1 -1
  68. hud/cli/utils/server.py +2 -2
  69. hud/cli/utils/source_hash.py +3 -3
  70. hud/cli/utils/tasks.py +4 -1
  71. hud/cli/utils/tests/__init__.py +0 -0
  72. hud/cli/utils/tests/test_config.py +58 -0
  73. hud/cli/utils/tests/test_docker.py +93 -0
  74. hud/cli/utils/tests/test_docker_hints.py +71 -0
  75. hud/cli/utils/tests/test_env_check.py +74 -0
  76. hud/cli/utils/tests/test_environment.py +42 -0
  77. hud/cli/utils/tests/test_git.py +142 -0
  78. hud/cli/utils/tests/test_interactive_module.py +60 -0
  79. hud/cli/utils/tests/test_local_runner.py +50 -0
  80. hud/cli/utils/tests/test_logging_utils.py +23 -0
  81. hud/cli/utils/tests/test_metadata.py +49 -0
  82. hud/cli/utils/tests/test_package_runner.py +35 -0
  83. hud/cli/utils/tests/test_registry_utils.py +49 -0
  84. hud/cli/utils/tests/test_remote_runner.py +25 -0
  85. hud/cli/utils/tests/test_runner_modules.py +52 -0
  86. hud/cli/utils/tests/test_source_hash.py +36 -0
  87. hud/cli/utils/tests/test_tasks.py +80 -0
  88. hud/cli/utils/version_check.py +258 -0
  89. hud/cli/{rl → utils}/viewer.py +2 -2
  90. hud/clients/README.md +12 -11
  91. hud/clients/__init__.py +4 -3
  92. hud/clients/base.py +166 -26
  93. hud/clients/environment.py +51 -0
  94. hud/clients/fastmcp.py +13 -6
  95. hud/clients/mcp_use.py +40 -15
  96. hud/clients/tests/test_analyze_scenarios.py +206 -0
  97. hud/clients/tests/test_protocol.py +9 -3
  98. hud/datasets/__init__.py +23 -20
  99. hud/datasets/loader.py +327 -0
  100. hud/datasets/runner.py +192 -105
  101. hud/datasets/tests/__init__.py +0 -0
  102. hud/datasets/tests/test_loader.py +221 -0
  103. hud/datasets/tests/test_utils.py +315 -0
  104. hud/datasets/utils.py +270 -90
  105. hud/environment/__init__.py +50 -0
  106. hud/environment/connection.py +206 -0
  107. hud/environment/connectors/__init__.py +33 -0
  108. hud/environment/connectors/base.py +68 -0
  109. hud/environment/connectors/local.py +177 -0
  110. hud/environment/connectors/mcp_config.py +109 -0
  111. hud/environment/connectors/openai.py +101 -0
  112. hud/environment/connectors/remote.py +172 -0
  113. hud/environment/environment.py +694 -0
  114. hud/environment/integrations/__init__.py +45 -0
  115. hud/environment/integrations/adk.py +67 -0
  116. hud/environment/integrations/anthropic.py +196 -0
  117. hud/environment/integrations/gemini.py +92 -0
  118. hud/environment/integrations/langchain.py +82 -0
  119. hud/environment/integrations/llamaindex.py +68 -0
  120. hud/environment/integrations/openai.py +238 -0
  121. hud/environment/mock.py +306 -0
  122. hud/environment/router.py +112 -0
  123. hud/environment/scenarios.py +493 -0
  124. hud/environment/tests/__init__.py +1 -0
  125. hud/environment/tests/test_connection.py +317 -0
  126. hud/environment/tests/test_connectors.py +218 -0
  127. hud/environment/tests/test_environment.py +161 -0
  128. hud/environment/tests/test_integrations.py +257 -0
  129. hud/environment/tests/test_local_connectors.py +201 -0
  130. hud/environment/tests/test_scenarios.py +280 -0
  131. hud/environment/tests/test_tools.py +208 -0
  132. hud/environment/types.py +23 -0
  133. hud/environment/utils/__init__.py +35 -0
  134. hud/environment/utils/formats.py +215 -0
  135. hud/environment/utils/schema.py +171 -0
  136. hud/environment/utils/tool_wrappers.py +113 -0
  137. hud/eval/__init__.py +67 -0
  138. hud/eval/context.py +674 -0
  139. hud/eval/display.py +299 -0
  140. hud/eval/instrument.py +185 -0
  141. hud/eval/manager.py +466 -0
  142. hud/eval/parallel.py +268 -0
  143. hud/eval/task.py +340 -0
  144. hud/eval/tests/__init__.py +1 -0
  145. hud/eval/tests/test_context.py +178 -0
  146. hud/eval/tests/test_eval.py +210 -0
  147. hud/eval/tests/test_manager.py +152 -0
  148. hud/eval/tests/test_parallel.py +168 -0
  149. hud/eval/tests/test_task.py +145 -0
  150. hud/eval/types.py +63 -0
  151. hud/eval/utils.py +183 -0
  152. hud/patches/__init__.py +19 -0
  153. hud/patches/mcp_patches.py +151 -0
  154. hud/patches/warnings.py +54 -0
  155. hud/samples/browser.py +4 -4
  156. hud/server/__init__.py +2 -1
  157. hud/server/low_level.py +2 -1
  158. hud/server/router.py +164 -0
  159. hud/server/server.py +567 -80
  160. hud/server/tests/test_mcp_server_integration.py +11 -11
  161. hud/server/tests/test_mcp_server_more.py +1 -1
  162. hud/server/tests/test_server_extra.py +2 -0
  163. hud/settings.py +45 -3
  164. hud/shared/exceptions.py +36 -10
  165. hud/shared/hints.py +26 -1
  166. hud/shared/requests.py +15 -3
  167. hud/shared/tests/test_exceptions.py +40 -31
  168. hud/shared/tests/test_hints.py +167 -0
  169. hud/telemetry/__init__.py +20 -19
  170. hud/telemetry/exporter.py +201 -0
  171. hud/telemetry/instrument.py +158 -253
  172. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  173. hud/telemetry/tests/test_exporter.py +258 -0
  174. hud/telemetry/tests/test_instrument.py +401 -0
  175. hud/tools/__init__.py +16 -2
  176. hud/tools/apply_patch.py +639 -0
  177. hud/tools/base.py +54 -4
  178. hud/tools/bash.py +2 -2
  179. hud/tools/computer/__init__.py +4 -0
  180. hud/tools/computer/anthropic.py +2 -2
  181. hud/tools/computer/gemini.py +385 -0
  182. hud/tools/computer/hud.py +23 -6
  183. hud/tools/computer/openai.py +20 -21
  184. hud/tools/computer/qwen.py +434 -0
  185. hud/tools/computer/settings.py +37 -0
  186. hud/tools/edit.py +3 -7
  187. hud/tools/executors/base.py +4 -2
  188. hud/tools/executors/pyautogui.py +1 -1
  189. hud/tools/grounding/grounded_tool.py +13 -18
  190. hud/tools/grounding/grounder.py +10 -31
  191. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  192. hud/tools/jupyter.py +330 -0
  193. hud/tools/playwright.py +18 -3
  194. hud/tools/shell.py +308 -0
  195. hud/tools/tests/test_apply_patch.py +718 -0
  196. hud/tools/tests/test_computer.py +4 -9
  197. hud/tools/tests/test_computer_actions.py +24 -2
  198. hud/tools/tests/test_jupyter_tool.py +181 -0
  199. hud/tools/tests/test_shell.py +596 -0
  200. hud/tools/tests/test_submit.py +85 -0
  201. hud/tools/tests/test_types.py +193 -0
  202. hud/tools/types.py +21 -1
  203. hud/types.py +167 -57
  204. hud/utils/__init__.py +2 -0
  205. hud/utils/env.py +67 -0
  206. hud/utils/hud_console.py +61 -3
  207. hud/utils/mcp.py +15 -58
  208. hud/utils/strict_schema.py +162 -0
  209. hud/utils/tests/test_init.py +1 -2
  210. hud/utils/tests/test_mcp.py +1 -28
  211. hud/utils/tests/test_pretty_errors.py +186 -0
  212. hud/utils/tests/test_tool_shorthand.py +154 -0
  213. hud/utils/tests/test_version.py +1 -1
  214. hud/utils/types.py +20 -0
  215. hud/version.py +1 -1
  216. hud_python-0.5.1.dist-info/METADATA +264 -0
  217. hud_python-0.5.1.dist-info/RECORD +299 -0
  218. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
  219. hud/agents/langchain.py +0 -261
  220. hud/agents/lite_llm.py +0 -72
  221. hud/cli/rl/__init__.py +0 -180
  222. hud/cli/rl/config.py +0 -101
  223. hud/cli/rl/display.py +0 -133
  224. hud/cli/rl/gpu.py +0 -63
  225. hud/cli/rl/gpu_utils.py +0 -321
  226. hud/cli/rl/local_runner.py +0 -595
  227. hud/cli/rl/presets.py +0 -96
  228. hud/cli/rl/remote_runner.py +0 -463
  229. hud/cli/rl/rl_api.py +0 -150
  230. hud/cli/rl/vllm.py +0 -177
  231. hud/cli/rl/wait_utils.py +0 -89
  232. hud/datasets/parallel.py +0 -687
  233. hud/misc/__init__.py +0 -1
  234. hud/misc/claude_plays_pokemon.py +0 -292
  235. hud/otel/__init__.py +0 -35
  236. hud/otel/collector.py +0 -142
  237. hud/otel/config.py +0 -181
  238. hud/otel/context.py +0 -570
  239. hud/otel/exporters.py +0 -369
  240. hud/otel/instrumentation.py +0 -135
  241. hud/otel/processors.py +0 -121
  242. hud/otel/tests/__init__.py +0 -1
  243. hud/otel/tests/test_processors.py +0 -197
  244. hud/rl/README.md +0 -30
  245. hud/rl/__init__.py +0 -1
  246. hud/rl/actor.py +0 -176
  247. hud/rl/buffer.py +0 -405
  248. hud/rl/chat_template.jinja +0 -101
  249. hud/rl/config.py +0 -192
  250. hud/rl/distributed.py +0 -132
  251. hud/rl/learner.py +0 -637
  252. hud/rl/tests/__init__.py +0 -1
  253. hud/rl/tests/test_learner.py +0 -186
  254. hud/rl/train.py +0 -382
  255. hud/rl/types.py +0 -101
  256. hud/rl/utils/start_vllm_server.sh +0 -30
  257. hud/rl/utils.py +0 -524
  258. hud/rl/vllm_adapter.py +0 -143
  259. hud/telemetry/job.py +0 -352
  260. hud/telemetry/replay.py +0 -74
  261. hud/telemetry/tests/test_replay.py +0 -40
  262. hud/telemetry/tests/test_trace.py +0 -63
  263. hud/telemetry/trace.py +0 -158
  264. hud/utils/agent_factories.py +0 -86
  265. hud/utils/async_utils.py +0 -65
  266. hud/utils/group_eval.py +0 -223
  267. hud/utils/progress.py +0 -149
  268. hud/utils/tasks.py +0 -127
  269. hud/utils/tests/test_async_utils.py +0 -173
  270. hud/utils/tests/test_progress.py +0 -261
  271. hud_python-0.4.45.dist-info/METADATA +0 -552
  272. hud_python-0.4.45.dist-info/RECORD +0 -228
  273. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
  274. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/cli/flows/tasks.py CHANGED
@@ -4,21 +4,17 @@ import json
4
4
  import logging
5
5
  import re
6
6
  from pathlib import Path
7
- from typing import TYPE_CHECKING, Any
7
+ from typing import Any
8
8
 
9
9
  import typer
10
10
  import yaml
11
11
 
12
12
  from hud.cli.push import push_environment
13
13
  from hud.cli.utils.docker import require_docker_running
14
- from hud.cli.utils.env_check import ensure_built, find_environment_dir
14
+ from hud.cli.utils.env_check import find_environment_dir
15
15
  from hud.cli.utils.registry import extract_name_and_tag
16
+ from hud.datasets import load_tasks
16
17
  from hud.utils.hud_console import hud_console
17
- from hud.utils.tasks import load_tasks
18
-
19
- if TYPE_CHECKING:
20
- from hud.types import Task
21
-
22
18
 
23
19
  logger = logging.getLogger(__name__)
24
20
 
@@ -29,11 +25,11 @@ def _is_remote_url(url: str) -> bool:
29
25
  return bool(re.match(r"^(https?:\/\/)?(www\.)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,}(\/\S*)?$", url))
30
26
 
31
27
 
32
- def _validate_tasks(tasks: list[Task]) -> bool:
28
+ def _validate_tasks(tasks: list[dict[str, Any]]) -> bool:
33
29
  """Validate the tasks file: return True if tasks already reference a remote MCP URL.
34
30
 
35
31
  A task is considered remote if any "url" field anywhere inside mcp_config
36
- is a valid remote URL (e.g., https://mcp.hud.so/v3/mcp).
32
+ is a valid remote URL (e.g., https://mcp.hud.ai/v3/mcp).
37
33
  """
38
34
 
39
35
  def _has_remote_url(obj: Any) -> bool:
@@ -50,13 +46,15 @@ def _validate_tasks(tasks: list[Task]) -> bool:
50
46
  return False
51
47
 
52
48
  for task in tasks:
53
- cfg = task.mcp_config or {}
49
+ cfg = task.get("mcp_config") or {}
54
50
  if not _has_remote_url(cfg):
55
51
  return False
56
52
  return True
57
53
 
58
54
 
59
- def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
55
+ def _ensure_pushed(
56
+ env_dir: Path, lock_data: dict[str, Any], check_docker: bool = True
57
+ ) -> dict[str, Any]:
60
58
  """Ensure the environment is pushed to a registry; return updated lock data."""
61
59
  pushed = bool(lock_data.get("push"))
62
60
  if not pushed:
@@ -64,7 +62,8 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
64
62
  if not hud_console.confirm("Push to a registry now (runs 'hud push')?", default=True):
65
63
  raise typer.Exit(1)
66
64
  # Check Docker availability before attempting a push
67
- require_docker_running()
65
+ if check_docker:
66
+ require_docker_running()
68
67
 
69
68
  # If Docker or login is not configured, the push function will fail and halt.
70
69
  push_environment(str(env_dir), yes=True)
@@ -78,29 +77,41 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
78
77
 
79
78
 
80
79
  def _derive_remote_image(lock_data: dict[str, Any]) -> str:
81
- """Derive org/name:tag from lock file for MCP header.
80
+ """Derive org/name:tag from lock file for remote MCP header.
82
81
 
83
- Preference order:
84
- 1) lock_data["push"]["image_with_tag"] if present
85
- 2) Derive from lock_data["image"] (may be a digest; falls back to latest)
82
+ Preference order (new lock first, then legacy):
83
+ 1) lock_data["push"]["image_with_tag"] (exact org/name:tag that was pushed)
84
+ 2) lock_data["images"]["local"] (base name with internal version)
85
+ 3) lock_data["image"] (legacy field; may contain tag or digest)
86
86
  """
87
- push_info = lock_data.get("push", {}) if isinstance(lock_data, dict) else {}
87
+ if not isinstance(lock_data, dict): # Defensive
88
+ raise typer.Exit(1)
88
89
 
89
- # 1) Exact image_with_tag if present
90
- pushed_with_tag = str(push_info.get("image_with_tag", "")).strip()
90
+ # 1) Prefer the exact image that was pushed (org/name:tag)
91
+ push_info = lock_data.get("push") or {}
92
+ pushed_with_tag = str(push_info.get("image_with_tag") or "").strip()
91
93
  if pushed_with_tag:
92
94
  name, tag = extract_name_and_tag(pushed_with_tag)
93
95
  return f"{name}:{tag}"
94
96
 
95
- # Base name always comes from lock_data.image to preserve org/repo
96
- image_ref = str(lock_data.get("image", "")).strip()
97
- if not image_ref:
98
- raise typer.Exit(1)
99
- name, tag = extract_name_and_tag(image_ref)
100
- return f"{name}:{tag}"
97
+ # 2) Fall back to the local tag recorded in the new lock schema
98
+ images = lock_data.get("images") or {}
99
+ local_image = str(images.get("local") or "").strip()
100
+ if local_image:
101
+ name, tag = extract_name_and_tag(local_image)
102
+ return f"{name}:{tag}"
101
103
 
104
+ # 3) Legacy top-level image field
105
+ legacy_image = str(lock_data.get("image") or "").strip()
106
+ if legacy_image:
107
+ name, tag = extract_name_and_tag(legacy_image)
108
+ return f"{name}:{tag}"
109
+
110
+ # If none of the above exist, we cannot derive an image
111
+ raise typer.Exit(1)
102
112
 
103
- def _extract_existing_images(tasks: list[Task]) -> set[str]:
113
+
114
+ def _extract_existing_images(tasks: list[dict[str, Any]]) -> set[str]:
104
115
  """Extract all Mcp-Image references from tasks."""
105
116
  images = set()
106
117
 
@@ -119,8 +130,9 @@ def _extract_existing_images(tasks: list[Task]) -> set[str]:
119
130
  _extract_from_obj(item)
120
131
 
121
132
  for task in tasks:
122
- if task.mcp_config:
123
- _extract_from_obj(task.mcp_config)
133
+ mcp_config = task.get("mcp_config")
134
+ if mcp_config:
135
+ _extract_from_obj(mcp_config)
124
136
 
125
137
  return images
126
138
 
@@ -183,6 +195,63 @@ def _extract_dotenv_api_key_vars(env_dir: Path) -> set[str]:
183
195
  return detected
184
196
 
185
197
 
198
+ def _extract_env_vars_from_docker_args(args: list[str]) -> set[str]:
199
+ """Extract environment variable names from docker run arguments.
200
+
201
+ Parses args like: ["run", "--rm", "-i", "-e", "API_KEY=value", "-e", "TOKEN", "image:tag"]
202
+ Returns set of env var names (not values).
203
+ """
204
+ env_vars: set[str] = set()
205
+ i = 0
206
+ while i < len(args):
207
+ arg = args[i]
208
+
209
+ # Check for -e or --env flags
210
+ if arg in ("-e", "--env"):
211
+ if i + 1 < len(args):
212
+ env_spec = args[i + 1]
213
+ # Could be "KEY=value" or just "KEY"
214
+ var_name = env_spec.split("=", 1)[0].strip()
215
+ if var_name:
216
+ env_vars.add(var_name)
217
+ i += 2
218
+ continue
219
+ # Check for --env=KEY=value format
220
+ elif arg.startswith("--env="):
221
+ env_spec = arg[6:] # Remove "--env=" prefix
222
+ var_name = env_spec.split("=", 1)[0].strip()
223
+ if var_name:
224
+ env_vars.add(var_name)
225
+
226
+ i += 1
227
+
228
+ env_vars.discard("HUD_API_KEY")
229
+ return env_vars
230
+
231
+
232
+ def _extract_vars_from_task_configs(raw_tasks: list[dict[str, Any]]) -> set[str]:
233
+ """Extract environment variable names from docker run commands in task mcp_configs."""
234
+ all_env_vars: set[str] = set()
235
+
236
+ for task in raw_tasks:
237
+ mcp_config = task.get("mcp_config", {})
238
+
239
+ # Iterate through all server configs
240
+ for server_config in mcp_config.values():
241
+ if not isinstance(server_config, dict):
242
+ continue
243
+
244
+ command = server_config.get("command", "")
245
+ args = server_config.get("args", [])
246
+
247
+ # Only process docker run commands
248
+ if command == "docker" and "run" in args:
249
+ env_vars = _extract_env_vars_from_docker_args(args)
250
+ all_env_vars.update(env_vars)
251
+
252
+ return all_env_vars
253
+
254
+
186
255
  def convert_tasks_to_remote(tasks_file: str) -> str:
187
256
  """Convert a local tasks file to remote MCP tasks and return new filename.
188
257
 
@@ -190,17 +259,18 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
190
259
  1) Find env dir; ensure built (hud.lock.yaml), otherwise build
191
260
  2) Ensure pushed to registry, otherwise push
192
261
  3) Check for outdated images in existing task configurations
193
- 4) Create remote_[tasks].json with mcp_config pointing to mcp.hud.so and Mcp-Image
262
+ 4) Create remote_[tasks].json with mcp_config pointing to mcp.hud.ai and Mcp-Image
194
263
  5) Return the new tasks file path
195
264
  """
196
265
  tasks_path = Path(tasks_file).resolve()
197
266
 
198
- # Load validated tasks for decision-making (may resolve env vars)
199
- tasks: list[Task] = load_tasks(str(tasks_path)) # type: ignore[assignment]
200
-
201
- # Load raw tasks to preserve placeholders when writing back to disk
267
+ # Load raw tasks - we work with dicts directly to preserve placeholders
268
+ # when writing back to disk (e.g., ${HUD_API_KEY})
202
269
  raw_tasks: list[dict[str, Any]] = load_tasks(str(tasks_path), raw=True) # type: ignore[assignment]
203
270
 
271
+ # Use the same raw tasks for validation (they have mcp_config structure)
272
+ tasks = raw_tasks
273
+
204
274
  # Ensure HUD_API_KEY is available: prefer process env, else load from env_dir/.env
205
275
  from hud.settings import settings
206
276
 
@@ -224,9 +294,24 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
224
294
  hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
225
295
  raise typer.Exit(1)
226
296
 
227
- # Ensure built and pushed
228
- lock_data = ensure_built(env_dir, interactive=True)
229
- lock_data = _ensure_pushed(env_dir, lock_data)
297
+ # For convert command, we don't need Docker running - just check for lock file
298
+ # This avoids showing Docker-related messages during conversion
299
+ lock_path = env_dir / "hud.lock.yaml"
300
+ if not lock_path.exists():
301
+ hud_console.error("No hud.lock.yaml found. The environment needs to be built first.")
302
+ hud_console.info("Run 'hud build' in the environment directory to build it.")
303
+ raise typer.Exit(1)
304
+
305
+ # Load lock data directly
306
+ try:
307
+ with open(lock_path) as f:
308
+ lock_data: dict[str, Any] = yaml.safe_load(f) or {}
309
+ except Exception as e:
310
+ hud_console.error(f"Failed to read hud.lock.yaml: {e}")
311
+ raise typer.Exit(1) from e
312
+
313
+ # Check if pushed - don't check Docker for convert command
314
+ lock_data = _ensure_pushed(env_dir, lock_data, check_docker=False)
230
315
 
231
316
  # Derive remote image name org/name:tag
232
317
  remote_image = _derive_remote_image(lock_data)
@@ -297,20 +382,35 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
297
382
  hud_console.success(f"Updated {tasks_path.name} with latest image: {remote_image}")
298
383
  return str(tasks_path)
299
384
 
300
- # Extract additional API key headers from lock and suggest from .env
385
+ # Extract environment variables from multiple sources:
386
+ # 1. Lock file (authoritative for required env vars)
301
387
  provided_keys = _extract_api_key_vars(lock_data)
388
+
389
+ # 2. Task configs (docker run -e flags)
390
+ task_env_vars = _extract_vars_from_task_configs(raw_tasks)
391
+
392
+ # 3. .env file (detect API-like vars)
302
393
  dotenv_keys = _extract_dotenv_api_key_vars(env_dir)
303
394
 
304
- # If .env contains API-like vars not in lock, offer to include them
305
- missing = sorted(dotenv_keys - provided_keys)
395
+ # Combine: lock file vars + task config vars, then check for missing from .env
396
+ all_detected = provided_keys | task_env_vars
397
+
398
+ # If .env contains API-like vars not yet included, offer to add them
399
+ missing = sorted(dotenv_keys - all_detected)
306
400
  if missing:
307
401
  names_preview = ", ".join(missing)
308
402
  prompt = (
309
403
  f"Detected env vars in .env that look like API keys: {names_preview}.\n"
310
404
  "Include them as remote headers (values will be ${VAR} placeholders)?"
311
405
  )
312
- if hud_console.confirm(prompt, default=True):
313
- provided_keys.update(missing)
406
+ if not hud_console.confirm(prompt, default=True):
407
+ # User cancelled - exit without creating the file
408
+ hud_console.info("Conversion cancelled by user")
409
+ raise typer.Exit(0)
410
+ all_detected.update(missing)
411
+
412
+ # Final set of env vars to convert to headers
413
+ provided_keys = all_detected
314
414
 
315
415
  extra_api_key_headers: dict[str, str] = {}
316
416
  for var_name in provided_keys:
@@ -344,10 +444,10 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
344
444
  tasks_payload: list[dict[str, Any]] = []
345
445
  for t in tasks:
346
446
  item: dict[str, Any] = {
347
- "prompt": t.prompt,
447
+ "prompt": t.get("prompt"),
348
448
  "mcp_config": {
349
449
  "hud": {
350
- "url": "https://mcp.hud.so/v3/mcp",
450
+ "url": settings.hud_mcp_url,
351
451
  "headers": {
352
452
  "Authorization": "Bearer ${HUD_API_KEY}",
353
453
  "Mcp-Image": remote_image,
@@ -360,18 +460,16 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
360
460
  item["mcp_config"]["hud"]["headers"].update(extra_api_key_headers)
361
461
 
362
462
  # Optional fields, omit Nones
363
- if t.setup_tool is not None:
364
- item["setup_tool"] = _simplify_tool_call(t.setup_tool)
365
- if t.evaluate_tool is not None:
366
- item["evaluate_tool"] = _simplify_tool_call(t.evaluate_tool)
367
- if t.agent_tools is not None:
368
- item["agent_tools"] = t.agent_tools
369
- if t.system_prompt is not None:
370
- item["system_prompt"] = t.system_prompt
371
- if t.metadata:
372
- item["metadata"] = t.metadata
373
- if t.id is not None:
374
- item["id"] = t.id
463
+ if t.get("setup_tool") is not None:
464
+ item["setup_tool"] = _simplify_tool_call(t["setup_tool"])
465
+ if t.get("evaluate_tool") is not None:
466
+ item["evaluate_tool"] = _simplify_tool_call(t["evaluate_tool"])
467
+ if t.get("agent_config") is not None:
468
+ item["agent_config"] = t["agent_config"]
469
+ if t.get("metadata"):
470
+ item["metadata"] = t["metadata"]
471
+ if t.get("id") is not None:
472
+ item["id"] = t["id"]
375
473
 
376
474
  tasks_payload.append(item)
377
475
 
@@ -382,6 +480,5 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
382
480
  f.write("\n")
383
481
 
384
482
  hud_console.success(f"Created remote tasks file: {remote_path.name}")
385
- hud_console.hint("Proceeding with RL training on the remote environment")
386
483
 
387
484
  return str(remote_path)
@@ -0,0 +1,151 @@
1
+ """Templates for hud init command."""
2
+
3
+ DOCKERFILE_HUD = """\
4
+ FROM python:3.11-slim
5
+
6
+ RUN apt-get update && apt-get install -y --no-install-recommends curl \\
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ WORKDIR /app
10
+ COPY pyproject.toml uv.lock* ./
11
+ RUN pip install uv && uv sync --frozen --no-dev 2>/dev/null || uv sync --no-dev
12
+ COPY . .
13
+
14
+ # Most of the time this command should not change, except if you change your env path
15
+ # or launch some other service before running the environment
16
+ CMD ["uv", "run", "python", "-m", "hud", "dev", "env:env", "--stdio"]
17
+ """
18
+
19
+ # fmt: off
20
+ ENV_PY = '''\
21
+ """{env_name} - HUD Environment"""
22
+
23
+ import asyncio
24
+
25
+ import hud
26
+ from hud.settings import settings
27
+ from openai import AsyncOpenAI, Omit
28
+ from hud.environment import Environment
29
+
30
+ env = Environment("{env_name}")
31
+
32
+
33
+ # =============================================================================
34
+ # 1. TOOLS - Functions the agent can call
35
+ # =============================================================================
36
+
37
+ @env.tool()
38
+ def count_letter(text: str, letter: str) -> int:
39
+ """Count occurrences of a letter in text."""
40
+ return text.lower().count(letter.lower())
41
+
42
+
43
+ # =============================================================================
44
+ # 2. SCRIPTS - Define prompts and evaluation logic
45
+ # =============================================================================
46
+
47
+ @env.scenario("count")
48
+ async def count_script(sentence: str, letter: str, fmt: str = "integer"):
49
+ """Agent must count a letter. We check if they got it right."""
50
+ # Yield the prompt, receive the agent's final answer
51
+ answer = yield f"How many times does '{{letter}}' appear in: '{{sentence}}'? Format: {{fmt}}."
52
+
53
+ # Score: 1.0 if correct, 0.0 otherwise
54
+ correct = str(sentence.lower().count(letter.lower()))
55
+ yield correct in answer
56
+
57
+
58
+ # =============================================================================
59
+ # 3. CONNECT EXISTING SERVERS (optional)
60
+ # =============================================================================
61
+
62
+ # --- FastAPI app ---
63
+ # from my_app import app
64
+ # env.connect_fastapi(app)
65
+
66
+ # --- FastMCP / MCPServer ---
67
+ # from my_server import mcp
68
+ # env.connect_server(mcp)
69
+
70
+ # --- OpenAPI spec (URL or file path) ---
71
+ # env.connect_openapi("https://api.example.com/openapi.json")
72
+
73
+ # --- MCP config (stdio or SSE) ---
74
+ # env.connect_mcp_config({{
75
+ # "my-server": {{"command": "uvx", "args": ["some-mcp-server"]}}
76
+ # }})
77
+
78
+ # --- HUD hub (requires deployment, see below) ---
79
+ # env.connect_hub("my-org/my-env", prefix="remote")
80
+
81
+
82
+ # =============================================================================
83
+ # TEST - Run with: python env.py
84
+ # =============================================================================
85
+
86
+ async def test():
87
+ client = AsyncOpenAI(
88
+ base_url=settings.hud_gateway_url,
89
+ api_key=settings.api_key,
90
+ )
91
+
92
+ # Create a task from the scenario
93
+ task = env("count", sentence="Strawberry world", letter="r")
94
+
95
+ # Test with and without tools
96
+ async with hud.eval(task, variants={{"tools": [True, False]}}) as ctx:
97
+ response = await client.chat.completions.create(
98
+ model="gpt-4o-mini",
99
+ messages=[{{"role": "user", "content": ctx.prompt}}],
100
+ tools=ctx.as_openai_chat_tools() if ctx.variants["tools"] else Omit(),
101
+ )
102
+
103
+ # Handle tool calls if present
104
+ message = response.choices[0].message
105
+ if message.tool_calls:
106
+ result = await ctx.call_tool(message.tool_calls[0])
107
+ answer = str(result["content"])
108
+ else:
109
+ answer = message.content
110
+
111
+ await ctx.submit(answer or "")
112
+
113
+
114
+ if __name__ == "__main__":
115
+ asyncio.run(test())
116
+
117
+
118
+ # =============================================================================
119
+ # DEPLOYMENT
120
+ # =============================================================================
121
+ # To deploy this environment on HUD:
122
+ #
123
+ # 1. Push this repo to GitHub
124
+ # 2. Go to hud.ai -> New -> Environment
125
+ # 3. Choose "From GitHub URL" and paste your repo URL
126
+ # 4. This deploys the environment for remote connection
127
+ #
128
+ # Once deployed, connect to it from other environments:
129
+ # env.connect_hub("{env_name}")
130
+ #
131
+ # Remote deployment enables:
132
+ # - Parallelized evaluations (run many agents simultaneously)
133
+ # - Training data collection at scale
134
+ # - Shared environments across team members
135
+ #
136
+ # Note: The test() function above is just for local testing.
137
+ # It's not required for the deployed environment.
138
+ '''
139
+ # fmt: on
140
+
141
+ PYPROJECT_TOML = """\
142
+ [project]
143
+ name = "{name}"
144
+ version = "0.1.0"
145
+ requires-python = ">=3.10"
146
+ dependencies = ["hud-python", "openai"]
147
+
148
+ [build-system]
149
+ requires = ["hatchling"]
150
+ build-backend = "hatchling.build"
151
+ """
@@ -0,0 +1 @@
1
+ """Tests for CLI flows."""
@@ -0,0 +1,126 @@
1
+ """Tests for CLI flows dev module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ from unittest import mock
8
+
9
+ import pytest
10
+
11
+ from hud.cli.flows.dev import generate_cursor_deeplink
12
+
13
+
14
+ class TestGenerateCursorDeeplink:
15
+ """Test Cursor deeplink generation."""
16
+
17
+ def test_generate_deeplink_basic(self):
18
+ """Test basic deeplink generation."""
19
+ result = generate_cursor_deeplink("my-server", 8000)
20
+
21
+ assert result.startswith("cursor://anysphere.cursor-deeplink/mcp/install?")
22
+ assert "name=my-server" in result
23
+ assert "config=" in result
24
+
25
+ def test_generate_deeplink_config_content(self):
26
+ """Test that config contains correct URL."""
27
+ result = generate_cursor_deeplink("test-server", 9999)
28
+
29
+ # Extract and decode the config
30
+ config_part = result.split("config=")[1]
31
+ decoded = base64.b64decode(config_part).decode()
32
+ config = json.loads(decoded)
33
+
34
+ assert config["url"] == "http://localhost:9999/mcp"
35
+
36
+ def test_generate_deeplink_different_ports(self):
37
+ """Test deeplink generation with different ports."""
38
+ result_8000 = generate_cursor_deeplink("server", 8000)
39
+ result_3000 = generate_cursor_deeplink("server", 3000)
40
+
41
+ # Decode configs
42
+ config_8000 = json.loads(base64.b64decode(result_8000.split("config=")[1]))
43
+ config_3000 = json.loads(base64.b64decode(result_3000.split("config=")[1]))
44
+
45
+ assert "8000" in config_8000["url"]
46
+ assert "3000" in config_3000["url"]
47
+
48
+ def test_generate_deeplink_special_characters_in_name(self):
49
+ """Test deeplink with special characters in server name."""
50
+ # Server name with special characters should still work
51
+ result = generate_cursor_deeplink("my-cool_server.v2", 8000)
52
+
53
+ assert "name=my-cool_server.v2" in result
54
+
55
+
56
+ class TestCreateDynamicTrace:
57
+ """Test dynamic trace creation."""
58
+
59
+ @pytest.mark.asyncio
60
+ @mock.patch("hud.cli.flows.dev.make_request")
61
+ @mock.patch("hud.cli.utils.git.get_git_info")
62
+ @mock.patch("hud.cli.flows.dev.settings")
63
+ async def test_create_dynamic_trace_success(self, mock_settings, mock_git, mock_request):
64
+ """Test successful trace creation."""
65
+ from hud.cli.flows.dev import create_dynamic_trace
66
+
67
+ mock_settings.hud_api_url = "https://api.hud.ai"
68
+ mock_settings.api_key = "test-key"
69
+ mock_git.return_value = {"remote_url": "https://github.com/user/repo"}
70
+ mock_request.return_value = {"id": "trace-123"}
71
+
72
+ trace_id, url = await create_dynamic_trace(
73
+ mcp_config={"server": {"url": "http://localhost:8000"}},
74
+ build_status=True,
75
+ environment_name="test-env",
76
+ )
77
+
78
+ assert trace_id == "trace-123"
79
+ assert url == "https://hud.ai/trace/trace-123"
80
+ mock_request.assert_called_once()
81
+
82
+ @pytest.mark.asyncio
83
+ @mock.patch("hud.cli.flows.dev.make_request")
84
+ @mock.patch("hud.cli.utils.git.get_git_info")
85
+ @mock.patch("hud.cli.flows.dev.settings")
86
+ async def test_create_dynamic_trace_no_git(self, mock_settings, mock_git, mock_request):
87
+ """Test trace creation without git info."""
88
+ from hud.cli.flows.dev import create_dynamic_trace
89
+
90
+ mock_settings.hud_api_url = "https://api.hud.ai"
91
+ mock_settings.api_key = "test-key"
92
+ mock_git.return_value = {} # No remote_url
93
+ mock_request.return_value = {"id": "trace-456"}
94
+
95
+ trace_id, _ = await create_dynamic_trace(
96
+ mcp_config={"server": {"url": "http://localhost:8000"}},
97
+ build_status=False,
98
+ environment_name="test-env",
99
+ )
100
+
101
+ assert trace_id == "trace-456"
102
+ # Verify git_info was not included in payload
103
+ call_args = mock_request.call_args
104
+ assert "git_info" not in call_args.kwargs.get("json", {})
105
+
106
+ @pytest.mark.asyncio
107
+ @mock.patch("hud.cli.flows.dev.make_request")
108
+ @mock.patch("hud.cli.utils.git.get_git_info")
109
+ @mock.patch("hud.cli.flows.dev.settings")
110
+ async def test_create_dynamic_trace_api_error(self, mock_settings, mock_git, mock_request):
111
+ """Test trace creation when API fails."""
112
+ from hud.cli.flows.dev import create_dynamic_trace
113
+
114
+ mock_settings.hud_api_url = "https://api.hud.ai"
115
+ mock_settings.api_key = "test-key"
116
+ mock_git.return_value = {}
117
+ mock_request.side_effect = Exception("API Error")
118
+
119
+ trace_id, url = await create_dynamic_trace(
120
+ mcp_config={"server": {}},
121
+ build_status=True,
122
+ environment_name="test-env",
123
+ )
124
+
125
+ assert trace_id is None
126
+ assert url is None