hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/cli/init.py CHANGED
@@ -14,24 +14,29 @@ import typer
14
14
 
15
15
  from hud.utils.hud_console import HUDConsole
16
16
 
17
- # Presets mapping to environment folders in public SDK repo
17
+ # Presets mapping to public GitHub repositories under hud-evals org
18
18
  GITHUB_OWNER = "hud-evals"
19
- GITHUB_REPO = "hud-python"
20
19
  GITHUB_BRANCH = "main"
21
20
 
22
21
  PRESET_MAP: dict[str, str | None] = {
23
- "blank": "blank",
24
- "deep-research": "deepresearch",
25
- "browser": "browser",
22
+ "blank": "hud-blank",
23
+ "deep-research": "hud-deepresearch",
24
+ "browser": "hud-browser",
25
+ "rubrics": "hud-rubrics",
26
+ "verilog-coding-template": "verilog-coding-template",
27
+ "data-science-template": "data-science-template",
26
28
  }
27
29
 
28
30
  SKIP_DIR_NAMES = {"node_modules", "__pycache__", "dist", "build", ".next", ".git"}
29
31
 
30
32
  # Files that need placeholder replacement
31
33
  PLACEHOLDER_FILES = {
32
- "pyproject.toml",
34
+ "server/pyproject.toml",
35
+ "environment/pyproject.toml",
36
+ "server/main.py",
37
+ "server/README.md",
38
+ "environment/README.md",
33
39
  "tasks.json",
34
- "src/controller/server.py",
35
40
  "test_env.ipynb",
36
41
  "README.md",
37
42
  }
@@ -48,7 +53,7 @@ def _replace_placeholders(target_dir: Path, env_name: str) -> list[str]:
48
53
  List of files that were modified
49
54
  """
50
55
  modified_files = []
51
- placeholder = "test_test"
56
+ placeholder = "blank" # Placeholder used in blank environment template
52
57
 
53
58
  # Normalize environment name for use in code/configs
54
59
  # Replace spaces and special chars with underscores for Python identifiers
@@ -86,8 +91,11 @@ def _prompt_for_preset() -> str:
86
91
  try:
87
92
  choices = [
88
93
  {"name": "blank", "message": "blank"},
89
- {"name": "deep-research", "message": "deep-research"},
90
94
  {"name": "browser", "message": "browser"},
95
+ {"name": "deep-research", "message": "deep-research"},
96
+ {"name": "rubrics", "message": "rubrics"},
97
+ {"name": "verilog-coding-template", "message": "verilog-coding-template"},
98
+ {"name": "data-science-template", "message": "data-science-template"},
91
99
  ]
92
100
  display_choices = [c["message"] for c in choices]
93
101
  selected = questionary.select(
@@ -103,10 +111,10 @@ def _prompt_for_preset() -> str:
103
111
  return "blank"
104
112
 
105
113
 
106
- def _download_tarball_subdir(
107
- owner: str, repo: str, ref: str, subdir: str, dest_dir: Path, files_created: list[str]
114
+ def _download_tarball_repo(
115
+ owner: str, repo: str, ref: str, dest_dir: Path, files_created: list[str]
108
116
  ) -> None:
109
- """Download a GitHub tarball and extract only a subdirectory."""
117
+ """Download a GitHub tarball and extract the entire repository."""
110
118
  tarball_url = f"https://codeload.github.com/{owner}/{repo}/tar.gz/{ref}"
111
119
 
112
120
  token = os.getenv("GITHUB_TOKEN")
@@ -135,16 +143,17 @@ def _download_tarball_subdir(
135
143
  if not members:
136
144
  return
137
145
  top = members[0].name.split("/", 1)[0]
138
- target_prefix = f"{top}/environments/{subdir.strip('/')}"
139
146
 
140
147
  for member in members:
141
148
  name = member.name
142
- if not (name == target_prefix or name.startswith(target_prefix + "/")):
149
+ if name == top:
150
+ continue
151
+
152
+ if not name.startswith(top + "/"):
143
153
  continue
144
154
 
145
- rel_path = name[len(target_prefix) :].lstrip("/")
155
+ rel_path = name[len(top) + 1 :]
146
156
  if not rel_path:
147
- dest_dir.mkdir(parents=True, exist_ok=True)
148
157
  continue
149
158
 
150
159
  out_path = (dest_dir / rel_path).resolve()
@@ -177,21 +186,21 @@ def create_environment(
177
186
 
178
187
  hud_console = HUDConsole()
179
188
 
180
- # Determine environment name/target directory
181
- if name is None:
182
- current_dir = Path.cwd()
183
- name = current_dir.name
184
- target_dir = current_dir
185
- hud_console.info(f"Using current directory name: {name}")
186
- else:
187
- target_dir = Path(directory) / name
188
-
189
189
  # Choose preset
190
190
  preset_normalized = (preset or "").strip().lower() if preset else _prompt_for_preset()
191
+
192
+ # If no name is provided, use the preset name as the environment name
193
+ if name is None:
194
+ name = preset_normalized
195
+ hud_console.info(f"Using preset name as environment name: {name}")
196
+
197
+ # Always create a new directory based on the name
198
+ target_dir = Path.cwd() / name if directory == "." else Path(directory) / name
199
+
191
200
  if preset_normalized not in PRESET_MAP:
201
+ available = ", ".join(sorted(PRESET_MAP.keys()))
192
202
  hud_console.warning(
193
- f"Unknown preset '{preset_normalized}', defaulting to 'blank' "
194
- "(available: blank, deep-research, browser)"
203
+ f"Unknown preset '{preset_normalized}', defaulting to 'blank' (available: {available})"
195
204
  )
196
205
  preset_normalized = "blank"
197
206
 
@@ -205,17 +214,14 @@ def create_environment(
205
214
  hud_console.warning(f"Overwriting existing files in {target_dir}")
206
215
 
207
216
  # Download preset from GitHub
208
- env_folder = PRESET_MAP[preset_normalized]
209
- if env_folder is None:
210
- hud_console.error("Internal error: preset mapping missing folder name")
217
+ repo_name = PRESET_MAP[preset_normalized]
218
+ if repo_name is None:
219
+ hud_console.error("Internal error: preset mapping missing repo name")
211
220
  raise typer.Exit(1)
212
221
 
213
222
  hud_console.header(f"Initializing HUD Environment: {name} (preset: {preset_normalized})")
214
- hud_console.section_title("Downloading template from public SDK")
215
- source_url = (
216
- f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/tree/"
217
- f"{GITHUB_BRANCH}/environments/{env_folder}"
218
- )
223
+ hud_console.section_title("Downloading template from GitHub")
224
+ source_url = f"https://github.com/{GITHUB_OWNER}/{repo_name}"
219
225
  hud_console.info("Source: " + source_url)
220
226
 
221
227
  target_dir.mkdir(parents=True, exist_ok=True)
@@ -223,11 +229,10 @@ def create_environment(
223
229
  started = time.time()
224
230
  files_created_dl: list[str] = []
225
231
  try:
226
- _download_tarball_subdir(
232
+ _download_tarball_repo(
227
233
  owner=GITHUB_OWNER,
228
- repo=GITHUB_REPO,
234
+ repo=repo_name,
229
235
  ref=GITHUB_BRANCH,
230
- subdir=env_folder,
231
236
  dest_dir=target_dir,
232
237
  files_created=files_created_dl,
233
238
  )
@@ -240,31 +245,28 @@ def create_environment(
240
245
  f"Downloaded {len(files_created_dl)} files in {duration_ms} ms into {target_dir}"
241
246
  )
242
247
 
243
- # Replace placeholders in template files
244
- hud_console.section_title("Customizing template files")
245
- modified_files = _replace_placeholders(target_dir, name)
246
- if modified_files:
247
- hud_console.success(f"Replaced placeholders in {len(modified_files)} files:")
248
- for file in modified_files[:5]: # Show first 5 files
249
- hud_console.status_item(file, "updated")
250
- if len(modified_files) > 5:
251
- hud_console.info(f"... and {len(modified_files) - 5} more files")
252
- else:
253
- hud_console.info("No placeholder replacements needed")
248
+ # Replace placeholders in template files (only for blank preset)
249
+ if preset_normalized == "blank":
250
+ hud_console.section_title("Customizing template files")
251
+ modified_files = _replace_placeholders(target_dir, name)
252
+ if modified_files:
253
+ hud_console.success(f"Replaced placeholders in {len(modified_files)} files:")
254
+ for file in modified_files[:5]: # Show first 5 files
255
+ hud_console.status_item(file, "updated")
256
+ if len(modified_files) > 5:
257
+ hud_console.info(f"... and {len(modified_files) - 5} more files")
258
+ else:
259
+ hud_console.info("No placeholder replacements needed")
254
260
 
255
261
  hud_console.section_title("Top-level files and folders")
256
262
  for entry in sorted(os.listdir(target_dir)):
257
263
  hud_console.status_item(entry, "added")
258
264
 
259
265
  hud_console.section_title("Next steps")
260
- if target_dir == Path.cwd():
261
- hud_console.info("1. Start development server (with MCP inspector):")
262
- hud_console.command_example("hud dev --inspector")
263
- else:
264
- hud_console.info("1. Enter the directory:")
265
- hud_console.command_example(f"cd {target_dir}")
266
- hud_console.info("\n2. Start development server (with MCP inspector):")
267
- hud_console.command_example("hud dev --inspector")
268
-
266
+ # Since we now almost always create a new directory, show cd command
267
+ hud_console.info("1. Enter the directory:")
268
+ hud_console.command_example(f"cd {target_dir.name}")
269
+ hud_console.info("\n2. Start development server (with MCP inspector):")
270
+ hud_console.command_example("hud dev --inspector")
269
271
  hud_console.info("\n3. Review the README in this preset for specific instructions.")
270
272
  hud_console.info("\n4. Customize as needed.")
hud/cli/pull.py CHANGED
@@ -63,7 +63,7 @@ def fetch_lock_from_registry(reference: str) -> dict | None:
63
63
 
64
64
  # URL-encode the path segments to handle special characters in tags
65
65
  url_safe_path = "/".join(quote(part, safe="") for part in reference.split("/"))
66
- registry_url = f"{settings.hud_telemetry_url.rstrip('/')}/registry/envs/{url_safe_path}"
66
+ registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
67
67
 
68
68
  headers = {}
69
69
  if settings.api_key:
hud/cli/push.py CHANGED
@@ -152,7 +152,7 @@ def push_environment(
152
152
  hud_console.error("No HUD API key found")
153
153
  hud_console.warning("A HUD API key is required to push environments.")
154
154
  hud_console.info("\nTo get started:")
155
- hud_console.info("1. Get your API key at: https://hud.so/settings")
155
+ hud_console.info("1. Get your API key at: https://hud.ai/settings")
156
156
  hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
157
157
  hud_console.command_example("hud push", "Try again")
158
158
  hud_console.info("")
@@ -163,10 +163,7 @@ def push_environment(
163
163
  lock_data = yaml.safe_load(f)
164
164
 
165
165
  # Handle both old and new lock file formats
166
- local_image = lock_data.get("image", "")
167
- if not local_image and "build" in lock_data:
168
- # New format might have image elsewhere
169
- local_image = lock_data.get("image", "")
166
+ local_image = lock_data.get("images", {}).get("local") or lock_data.get("image", "")
170
167
 
171
168
  # Get internal version from lock file
172
169
  internal_version = lock_data.get("build", {}).get("version", None)
@@ -293,7 +290,7 @@ def push_environment(
293
290
  # Push the image
294
291
  hud_console.progress_message(f"Pushing {image} to registry...")
295
292
 
296
- # Show push output
293
+ # Show push output (filtered for cleaner display)
297
294
  process = subprocess.Popen( # noqa: S603
298
295
  ["docker", "push", image], # noqa: S607
299
296
  stdout=subprocess.PIPE,
@@ -303,8 +300,27 @@ def push_environment(
303
300
  errors="replace",
304
301
  )
305
302
 
303
+ # Filter output to only show meaningful progress
304
+ layers_pushed = 0
306
305
  for line in process.stdout or []:
307
- hud_console.info(line.rstrip())
306
+ line = line.rstrip()
307
+ # Only show: digest, pushed, mounted, or error lines
308
+ if any(
309
+ keyword in line.lower()
310
+ for keyword in ["digest:", "pushed", "mounted", "error", "denied"]
311
+ ):
312
+ if "pushed" in line.lower():
313
+ layers_pushed += 1
314
+ if (
315
+ verbose
316
+ or "error" in line.lower()
317
+ or "denied" in line.lower()
318
+ or "digest:" in line.lower()
319
+ ):
320
+ hud_console.info(line)
321
+
322
+ if layers_pushed > 0 and not verbose:
323
+ hud_console.info(f"Pushed {layers_pushed} layer(s)")
308
324
 
309
325
  process.wait()
310
326
 
@@ -331,8 +347,10 @@ def push_environment(
331
347
  hud_console.section_title("Pushed Image")
332
348
  hud_console.status_item("Registry", pushed_digest, primary=True)
333
349
 
334
- # Update the lock file with registry information
335
- lock_data["image"] = pushed_digest
350
+ # Update the lock file with pushed image reference
351
+ if "images" not in lock_data:
352
+ lock_data["images"] = {}
353
+ lock_data["images"]["pushed"] = image
336
354
 
337
355
  # Add push information
338
356
  from datetime import UTC, datetime
@@ -348,7 +366,7 @@ def push_environment(
348
366
  with open(lock_path, "w") as f:
349
367
  yaml.dump(lock_data, f, default_flow_style=False, sort_keys=False)
350
368
 
351
- hud_console.success("Updated lock file with registry image")
369
+ hud_console.success("Updated lock file with pushed image reference")
352
370
 
353
371
  # Upload lock file to HUD registry
354
372
  try:
@@ -402,13 +420,20 @@ def push_environment(
402
420
 
403
421
  # URL-encode the path segments to handle special characters in tags
404
422
  url_safe_path = "/".join(quote(part, safe="") for part in name_with_tag.split("/"))
405
- registry_url = f"{settings.hud_telemetry_url.rstrip('/')}/registry/envs/{url_safe_path}"
423
+ registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
424
+
425
+ # Detect git remote URL for matching existing GitHub-connected registries
426
+ from hud.cli.utils.git import get_git_remote_url
427
+
428
+ github_url = get_git_remote_url(Path(directory))
406
429
 
407
430
  # Prepare the payload
408
- payload = {
431
+ payload: dict[str, str | None] = {
409
432
  "lock": yaml.dump(lock_data, default_flow_style=False, sort_keys=False),
410
433
  "digest": pushed_digest.split("@")[-1] if "@" in pushed_digest else None,
411
434
  }
435
+ if github_url:
436
+ payload["github_url"] = github_url
412
437
 
413
438
  headers = {"Authorization": f"Bearer {settings.api_key}"}
414
439
 
@@ -422,7 +447,7 @@ def push_environment(
422
447
  elif response.status_code == 401:
423
448
  hud_console.error("Authentication failed")
424
449
  hud_console.info("Check your HUD_API_KEY is valid")
425
- hud_console.info("Get a new key at: https://hud.so/settings")
450
+ hud_console.info("Get a new key at: https://hud.ai/settings")
426
451
  hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
427
452
  elif response.status_code == 403:
428
453
  hud_console.error("Permission denied")
hud/cli/rft.py ADDED
@@ -0,0 +1,311 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ import httpx
7
+ import typer
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+
11
+ from hud.datasets import load_tasks
12
+ from hud.settings import settings
13
+ from hud.utils.hud_console import HUDConsole
14
+
15
+ logger = logging.getLogger(__name__)
16
+ console = Console()
17
+ hud_console = HUDConsole()
18
+
19
+
20
+ def _patch_mcp_urls_to_staging(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
21
+ """Recursively patch all mcp.hud.so URLs to https://orcstaging.hud.so in task configs."""
22
+
23
+ def patch_value(obj: Any) -> Any:
24
+ if isinstance(obj, dict):
25
+ return {k: patch_value(v) for k, v in obj.items()}
26
+ elif isinstance(obj, list):
27
+ return [patch_value(item) for item in obj]
28
+ elif isinstance(obj, str):
29
+ # Replace any occurrence of mcp.hud.so with orcstaging.hud.so
30
+ # Handle various URL formats
31
+ if "mcp.hud.so" in obj:
32
+ # Replace the domain while preserving the protocol and path
33
+ return obj.replace("mcp.hud.so", "orcstaging.hud.so")
34
+ elif "mcp.hud.ai" in obj:
35
+ # Also handle mcp.hud.ai URLs
36
+ return obj.replace("mcp.hud.ai", "orcstaging.hud.so")
37
+ return obj
38
+ else:
39
+ return obj
40
+
41
+ return [patch_value(task) for task in tasks]
42
+
43
+
44
+ def _fetch_models() -> list[dict[str, Any]]:
45
+ """Fetch trainable models from the HUD API for the user's team."""
46
+ url = f"{settings.hud_api_url}/models/"
47
+ headers = {
48
+ "Authorization": f"Bearer {settings.api_key}",
49
+ "x-api-key": settings.api_key or "",
50
+ }
51
+ params = {"team_only": "true", "limit": 200}
52
+
53
+ try:
54
+ with httpx.Client(timeout=30.0) as client:
55
+ resp = client.get(url, headers=headers, params=params)
56
+ resp.raise_for_status()
57
+ data = resp.json()
58
+ return data.get("models", [])
59
+ except httpx.HTTPStatusError as e:
60
+ hud_console.error(f"Failed to fetch models: {e.response.status_code}")
61
+ if e.response.status_code == 401:
62
+ hud_console.hint("Check that your HUD_API_KEY is valid")
63
+ raise typer.Exit(1) from e
64
+ except httpx.RequestError as e:
65
+ hud_console.error(f"Connection error while fetching models: {e}")
66
+ raise typer.Exit(1) from e
67
+
68
+
69
+ def _select_model(models: list[dict[str, Any]]) -> dict[str, Any]:
70
+ """Display models and let user select one for training."""
71
+ # Filter to only trainable models that are ready
72
+ trainable_models = [
73
+ m
74
+ for m in models
75
+ if m.get("is_trainable", False)
76
+ and m.get("status") == "ready"
77
+ and not m.get("public", False)
78
+ and m.get("model_name") is not None
79
+ ]
80
+
81
+ if not trainable_models:
82
+ hud_console.error("No trainable models found in your team.")
83
+ hud_console.hint("Fork a trainable model at https://api.hud.so/models to start training.")
84
+ raise typer.Exit(1)
85
+
86
+ # Display models in a table
87
+ hud_console.section_title("Available Trainable Models")
88
+ table = Table(show_header=True, header_style="bold")
89
+ table.add_column("#", style="dim", width=4)
90
+ table.add_column("Name", style="bold")
91
+ table.add_column("Status")
92
+ table.add_column("Provider")
93
+
94
+ for i, model in enumerate(trainable_models, 1):
95
+ provider_name = (
96
+ model.get("provider", {}).get("name", "unknown") if model.get("provider") else "unknown"
97
+ )
98
+ table.add_row(
99
+ str(i),
100
+ model.get("name", "unnamed"),
101
+ model.get("status", "unknown"),
102
+ provider_name,
103
+ )
104
+
105
+ hud_console.console.print(table)
106
+ hud_console.print("")
107
+
108
+ # Build choices for selection
109
+ choices = [
110
+ {"name": f"{m.get('name', 'unnamed')} ({m.get('base_model', 'unknown')})", "value": m}
111
+ for m in trainable_models
112
+ ]
113
+
114
+ selected: dict[str, Any] = hud_console.select("Select a model to train:", choices) # type: ignore[assignment]
115
+ return selected
116
+
117
+
118
+ def rft_command(
119
+ tasks_file: str,
120
+ reasoning_effort: str = "medium",
121
+ verbose: bool = False,
122
+ yes: bool = False,
123
+ model_id: str | None = None,
124
+ ) -> None:
125
+ """
126
+ Run Reinforcement Fine-Tuning (RFT) via the HUD RL service.
127
+ """
128
+ hud_console.header("HUD RFT (Reinforcement Fine-Tuning)")
129
+
130
+ # Preflight check: API key
131
+ if not settings.api_key:
132
+ hud_console.error("HUD_API_KEY not found in environment.")
133
+ hud_console.info("Run 'hud set HUD_API_KEY=...' or export it.")
134
+ raise typer.Exit(1)
135
+
136
+ # Model selection
137
+ selected_model_id: str
138
+ if model_id:
139
+ # Use provided model_id directly
140
+ selected_model_id = model_id
141
+ hud_console.info(f"Using provided model ID: {selected_model_id}")
142
+ else:
143
+ # Fetch and let user select a model
144
+ hud_console.section_title("Fetching available models")
145
+ hud_console.info("Loading models from your team...")
146
+ models = _fetch_models()
147
+
148
+ if yes:
149
+ # Auto-select first trainable model in non-interactive mode
150
+ trainable_models = [
151
+ m
152
+ for m in models
153
+ if m.get("is_trainable", False)
154
+ and m.get("status") == "ready"
155
+ and not m.get("public", False)
156
+ and m.get("model_name") is not None
157
+ ]
158
+ if not trainable_models:
159
+ hud_console.error("No trainable models found in your team.")
160
+ hud_console.hint(
161
+ "Fork a trainable model at https://api.hud.so/models to start training."
162
+ )
163
+ raise typer.Exit(1)
164
+ selected_model = trainable_models[0]
165
+ hud_console.info(
166
+ f"Auto-selected first trainable model (--yes mode): "
167
+ f"{selected_model.get('name', 'unnamed')}"
168
+ )
169
+ else:
170
+ selected_model = _select_model(models)
171
+
172
+ selected_model_id = selected_model["id"]
173
+ hud_console.success(
174
+ f"Selected model: {selected_model.get('name', 'unnamed')} (ID: {selected_model_id})"
175
+ )
176
+
177
+ # Preflight check: Convert tasks to remote if needed
178
+ hud_console.section_title("Preparing tasks for remote training")
179
+ try:
180
+ from hud.cli.flows.tasks import convert_tasks_to_remote
181
+
182
+ hud_console.info("Checking task configuration...")
183
+ tasks_file = convert_tasks_to_remote(tasks_file)
184
+ hud_console.success("Tasks are ready for remote training")
185
+ except typer.Exit:
186
+ raise
187
+ except Exception as e:
188
+ hud_console.error(f"Tasks file is not valid for remote training: {e!s}")
189
+ hud_console.hint("Either ensure the tasks file has remote urls")
190
+ hud_console.hint("Or run 'hud rft' within an environment directory")
191
+ raise typer.Exit(1) from e
192
+
193
+ # Load and validate tasks
194
+ try:
195
+ # Load tasks as raw dicts for patching and serialization
196
+ tasks: list[dict[str, Any]] = load_tasks(tasks_file, raw=True) # type: ignore[assignment]
197
+ if not tasks:
198
+ hud_console.error(f"No tasks found in {tasks_file}")
199
+ raise typer.Exit(1)
200
+
201
+ # Preflight check: Minimum task count
202
+ task_count = len(tasks)
203
+ if task_count < 10:
204
+ hud_console.error(
205
+ f"Insufficient tasks for RFT training: found {task_count}, need at least 10"
206
+ )
207
+ hud_console.hint("RFT requires a minimum of 10 tasks for effective training")
208
+ raise typer.Exit(1)
209
+
210
+ hud_console.info(f"Loaded {task_count} tasks from {tasks_file}")
211
+
212
+ # Preflight check: Vision support
213
+ hud_console.section_title("Vision Support Check")
214
+ hud_console.warning(
215
+ "RFT does not currently support environments that require vision capabilities."
216
+ )
217
+ hud_console.info(
218
+ "Vision support includes: screenshots, image analysis, visual UI interaction, etc."
219
+ )
220
+
221
+ if not yes:
222
+ if hud_console.confirm("Does your environment require vision support?", default=False):
223
+ hud_console.error("RFT does not support vision-based environments at this time.")
224
+ hud_console.hint(
225
+ "Please use environments that rely on text-based interactions only."
226
+ )
227
+ raise typer.Exit(1)
228
+ else:
229
+ hud_console.info("Skipping vision support check (--yes mode)")
230
+
231
+ # Patch all mcp.hud.so URLs to orcstaging.hud.so
232
+ hud_console.info("Patching MCP URLs for staging environment...")
233
+ tasks = _patch_mcp_urls_to_staging(tasks)
234
+
235
+ # Show task preview
236
+ if tasks:
237
+ if yes:
238
+ # Skip interactive preview in auto-accept mode
239
+ hud_console.info("Skipping task preview in auto-accept mode (--yes)")
240
+ else:
241
+ try:
242
+ from hud.cli.utils.viewer import show_json_interactive
243
+
244
+ hud_console.section_title("Task Preview")
245
+ show_json_interactive(
246
+ tasks[0], title="Example Task from Dataset", initial_expanded=False
247
+ )
248
+ hud_console.info("This is how your task will be sent to the RFT service.")
249
+
250
+ # Ask for confirmation
251
+ if not hud_console.confirm(
252
+ "\nProceed with RFT training on this dataset?", default=True
253
+ ):
254
+ hud_console.error("RFT training cancelled")
255
+ raise typer.Exit(0)
256
+ except typer.Exit:
257
+ raise # Re-raise typer.Exit to properly exit on cancellation
258
+ except Exception as e:
259
+ hud_console.warning(f"Could not display task preview: {e}")
260
+
261
+ except typer.Exit:
262
+ raise # Re-raise typer.Exit to properly exit
263
+ except Exception as e:
264
+ hud_console.error(f"Failed to load tasks file: {e}")
265
+ raise typer.Exit(1) from e
266
+
267
+ # Prepare payload
268
+ payload = {
269
+ "model_id": selected_model_id,
270
+ "dataset": {"tasks": tasks},
271
+ "config": {"parameters": {"reasoning_effort": reasoning_effort}},
272
+ }
273
+
274
+ # Send request to service
275
+ hud_console.section_title("Submitting RFT job")
276
+
277
+ base_url = settings.hud_rl_url
278
+ url = f"{base_url}/training/jobs"
279
+
280
+ headers = {"Authorization": f"Bearer {settings.api_key}", "Content-Type": "application/json"}
281
+
282
+ hud_console.info(
283
+ f"Submitting job to {url}... (this may take a few minutes to run all safety checks)"
284
+ )
285
+
286
+ try:
287
+ with httpx.Client(timeout=300.0) as client:
288
+ resp = client.post(url, json=payload, headers=headers)
289
+
290
+ if resp.status_code >= 400:
291
+ try:
292
+ detail = resp.json()
293
+ except Exception as e:
294
+ detail = f"{resp.text} - {e}"
295
+ hud_console.error(f"Request failed ({resp.status_code}): {detail}")
296
+ raise typer.Exit(1)
297
+
298
+ data = resp.json()
299
+ job_id = data.get("job_id")
300
+ model_id = data.get("model", {}).get("id")
301
+
302
+ hud_console.success(f"Job launched successfully! ID: {job_id}")
303
+ hud_console.info(f"Model ID: {model_id}")
304
+
305
+ # Provide helpful next steps
306
+ hud_console.info(f"To check job status, run: hud rft status {model_id}")
307
+
308
+ except httpx.RequestError as e:
309
+ hud_console.error(f"Connection error: {e}")
310
+ hud_console.info("Is the RL service running?")
311
+ raise typer.Exit(1) from e