hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/cli/__init__.py CHANGED
@@ -3,32 +3,29 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
- import contextlib
7
6
  import json
8
7
  import sys
9
8
  from pathlib import Path
10
9
 
10
+ import httpx
11
11
  import typer
12
12
  from rich.console import Console
13
13
  from rich.panel import Panel
14
14
  from rich.table import Table
15
15
 
16
+ from hud.utils.hud_console import HUDConsole
17
+
16
18
  from . import list_func as list_module
17
- from .analyze import (
18
- analyze_environment,
19
- analyze_environment_from_config,
20
- analyze_environment_from_mcp_config,
21
- )
22
19
  from .build import build_command
23
20
  from .clone import clone_repository, get_clone_message, print_error, print_tutorial
24
21
  from .debug import debug_mcp_stdio
25
22
  from .dev import run_mcp_dev_server
26
-
27
- # Import new commands
28
- from .init import create_environment
23
+ from .eval import eval_command
29
24
  from .pull import pull_command
30
25
  from .push import push_command
31
26
  from .remove import remove_command
27
+ from .rft import rft_command
28
+ from .rft_status import rft_status_command
32
29
  from .utils.config import set_env_values
33
30
  from .utils.cursor import get_cursor_config_path, list_cursor_servers, parse_cursor_config
34
31
  from .utils.logging import CaptureLogger
@@ -39,6 +36,7 @@ app = typer.Typer(
39
36
  help="🚀 HUD CLI for MCP environment analysis and debugging",
40
37
  add_completion=False,
41
38
  rich_markup_mode="rich",
39
+ pretty_exceptions_enable=False, # Disable Rich's verbose tracebacks
42
40
  )
43
41
 
44
42
  console = Console()
@@ -91,15 +89,22 @@ def analyze(
91
89
  ) -> None:
92
90
  """🔍 Analyze MCP environment - discover tools, resources, and capabilities.
93
91
 
94
- By default, uses cached metadata for instant results.
92
+ [not dim]By default, uses cached metadata for instant results.
95
93
  Use --live to run the container for real-time analysis.
96
94
 
97
95
  Examples:
98
96
  hud analyze hudpython/test_init # Fast metadata inspection
99
97
  hud analyze my-env --live # Full container analysis
100
98
  hud analyze --config mcp-config.json # From MCP config
101
- hud analyze --cursor text-2048-dev # From Cursor config
99
+ hud analyze --cursor text-2048-dev # From Cursor config[/not dim]
102
100
  """
101
+ # Lazy import to avoid loading mcp_use on simple CLI commands
102
+ from .analyze import (
103
+ analyze_environment,
104
+ analyze_environment_from_config,
105
+ analyze_environment_from_mcp_config,
106
+ )
107
+
103
108
  if config:
104
109
  # Load config from JSON file (always live for configs)
105
110
  asyncio.run(analyze_environment_from_config(config, output_format, verbose))
@@ -175,7 +180,7 @@ def debug(
175
180
  ) -> None:
176
181
  """🐛 Debug MCP environment - test initialization, tools, and readiness.
177
182
 
178
- Examples:
183
+ [not dim]Examples:
179
184
  hud debug . # Debug current directory
180
185
  hud debug environments/browser # Debug specific directory
181
186
  hud debug . --build # Build then debug
@@ -183,10 +188,9 @@ def debug(
183
188
  hud debug my-mcp-server:v1 -e API_KEY=xxx
184
189
  hud debug --config mcp-config.json
185
190
  hud debug --cursor text-2048-dev
186
- hud debug . --max-phase 3 # Stop after phase 3
191
+ hud debug . --max-phase 3 # Stop after phase 3[/not dim]
187
192
  """
188
193
  # Import here to avoid circular imports
189
- from hud.utils.hud_console import HUDConsole
190
194
 
191
195
  from .utils.environment import (
192
196
  build_environment,
@@ -242,16 +246,32 @@ def debug(
242
246
  if build and not build_environment(directory, image_name):
243
247
  raise typer.Exit(1)
244
248
 
245
- # Build Docker command
246
- from .utils.docker import build_run_command
249
+ # Build Docker command with folder-mode envs
250
+ from .utils.docker import create_docker_run_command
247
251
 
248
- command = build_run_command(image_name, docker_args)
252
+ command = create_docker_run_command(
253
+ image_name, docker_args=docker_args, env_dir=directory
254
+ )
249
255
  else:
250
256
  # Assume it's an image name
251
257
  image = first_param
252
- from .utils.docker import build_run_command
258
+ from .utils.docker import create_docker_run_command
259
+
260
+ # For image mode, check if there's a .env file in current directory
261
+ # and use it if available (similar to hud dev behavior)
262
+ cwd = Path.cwd()
263
+ if (cwd / ".env").exists():
264
+ # Use create_docker_run_command to load .env from current directory
265
+ command = create_docker_run_command(
266
+ image,
267
+ docker_args=docker_args,
268
+ env_dir=cwd, # Load .env from current directory
269
+ )
270
+ else:
271
+ # No .env file, use basic command without env loading
272
+ from .utils.docker import build_run_command
253
273
 
254
- command = build_run_command(image, docker_args)
274
+ command = build_run_command(image, docker_args)
255
275
  else:
256
276
  console.print(
257
277
  "[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
@@ -269,8 +289,6 @@ def debug(
269
289
  phases_completed = asyncio.run(debug_mcp_stdio(command, logger, max_phase=max_phase))
270
290
 
271
291
  # Show summary using design system
272
- from hud.utils.hud_console import HUDConsole
273
-
274
292
  hud_console = HUDConsole()
275
293
 
276
294
  hud_console.info("") # Empty line
@@ -348,80 +366,152 @@ def version() -> None:
348
366
  console.print("HUD CLI version: [cyan]unknown[/cyan]")
349
367
 
350
368
 
369
+ @app.command()
370
+ def models(
371
+ json_output: bool = typer.Option(False, "--json", help="Output as JSON"),
372
+ ) -> None:
373
+ """📋 List available models from HUD inference gateway.
374
+
375
+ [not dim]Shows models available via the HUD inference gateway at inference.hud.ai.
376
+
377
+ Examples:
378
+ hud models # List all models
379
+ hud models --json # Output as JSON[/not dim]
380
+ """
381
+ from hud.settings import settings
382
+
383
+ try:
384
+ response = httpx.get(
385
+ f"{settings.hud_gateway_url}/models",
386
+ headers={"Authorization": f"Bearer {settings.api_key}"} if settings.api_key else {},
387
+ timeout=30.0,
388
+ )
389
+ response.raise_for_status()
390
+ data = response.json()
391
+
392
+ if json_output:
393
+ console.print_json(json.dumps(data, indent=2))
394
+ return
395
+
396
+ # Parse and display models
397
+ models_list = data.get("data", data) if isinstance(data, dict) else data
398
+
399
+ if not models_list:
400
+ console.print("[yellow]No models found[/yellow]")
401
+ return
402
+
403
+ console.print(Panel.fit("📋 [bold cyan]Available Models[/bold cyan]", border_style="cyan"))
404
+
405
+ table = Table()
406
+ table.add_column("Name", style="cyan")
407
+ table.add_column("Model (API)", style="green")
408
+ table.add_column("Routes", style="yellow")
409
+
410
+ for model in models_list:
411
+ if isinstance(model, dict):
412
+ name = model.get("name", "-")
413
+ api_model = model.get("model", model.get("id", "-"))
414
+ routes = model.get("routes", [])
415
+ routes_str = ", ".join(routes) if routes else "-"
416
+ table.add_row(name, api_model, routes_str)
417
+ else:
418
+ table.add_row(str(model), "-", "-")
419
+
420
+ console.print(table)
421
+ console.print(f"\n[dim]Gateway: {settings.hud_gateway_url}[/dim]")
422
+
423
+ except httpx.HTTPStatusError as e:
424
+ console.print(f"[red]❌ API error: {e.response.status_code}[/red]")
425
+ console.print(f"[dim]{e.response.text}[/dim]")
426
+ raise typer.Exit(1) from e
427
+ except Exception as e:
428
+ console.print(f"[red]❌ Failed to fetch models: {e}[/red]")
429
+ raise typer.Exit(1) from e
430
+
431
+
351
432
  @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
352
433
  def dev(
353
434
  params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
354
435
  None,
355
- help="Environment directory followed by optional Docker arguments (e.g., '. -e KEY=value')",
356
- ),
357
- image: str | None = typer.Option(
358
- None, "--image", "-i", help="Docker image name (overrides auto-detection)"
436
+ help="Module path or extra Docker args (when using --docker)",
359
437
  ),
360
- build: bool = typer.Option(False, "--build", "-b", help="Build image before starting"),
361
- no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild without cache"),
362
- transport: str = typer.Option(
363
- "http", "--transport", "-t", help="Transport protocol: http (default) or stdio"
438
+ docker: bool = typer.Option(
439
+ False,
440
+ "--docker",
441
+ help="Run in Docker with volume mounts for hot-reload (for complex environments)",
364
442
  ),
365
- port: int = typer.Option(8765, "--port", "-p", help="HTTP server port (ignored for stdio)"),
366
- no_reload: bool = typer.Option(False, "--no-reload", help="Disable hot-reload"),
367
- full_reload: bool = typer.Option(
443
+ stdio: bool = typer.Option(
368
444
  False,
369
- "--full-reload",
370
- help="Restart entire container on file changes (instead of just server process)",
445
+ "--stdio",
446
+ help="Use stdio transport (default: HTTP)",
371
447
  ),
372
- verbose: bool = typer.Option(False, "--verbose", "-v", help="Show server logs"),
448
+ port: int = typer.Option(8765, "--port", "-p", help="HTTP server port (ignored for stdio)"),
449
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed logs"),
373
450
  inspector: bool = typer.Option(
374
451
  False, "--inspector", help="Launch MCP Inspector (HTTP mode only)"
375
452
  ),
376
- no_logs: bool = typer.Option(False, "--no-logs", help="Disable streaming Docker logs"),
377
453
  interactive: bool = typer.Option(
378
454
  False, "--interactive", help="Launch interactive testing mode (HTTP mode only)"
379
455
  ),
456
+ watch: list[str] = typer.Option( # noqa: B008
457
+ [],
458
+ "--watch",
459
+ "-w",
460
+ help="Paths to watch for hot-reload (repeatable: -w tools -w env.py)",
461
+ ),
462
+ new: bool = typer.Option(
463
+ False,
464
+ "--new",
465
+ help="Create a new dev trace on hud.ai (opens in browser)",
466
+ ),
380
467
  ) -> None:
381
- """🔥 Development mode - interactive MCP environment.
468
+ """🔥 Development mode - run MCP server with hot-reload.
469
+
470
+ [not dim]TWO MODES:
471
+
472
+ 1. Python Module:
473
+ hud dev # Auto-detects module
474
+ hud dev env:env # Explicit module:attribute
475
+ hud dev -w . # Watch current directory
382
476
 
383
- Runs your MCP environment in Docker with mounted source for development.
384
- The container's CMD determines reload behavior.
477
+ 2. Docker (Complex environments):
478
+ hud dev # Auto-detects Dockerfile, no hot-reload
479
+ hud dev -w tools -w env.py # Mount & watch specific paths
480
+ hud dev -w tools # Just watch tools folder
481
+
482
+ For Docker mode, use --watch to specify which folders to mount and watch.
483
+ Paths not in --watch stay in the built image (no hot-reload).
385
484
 
386
485
  Examples:
387
- hud dev # Auto-detect in current directory
388
- hud dev environments/browser # Specific directory
389
- hud dev . --build # Build image first
390
- hud dev . --image custom:tag # Use specific image
391
- hud dev . --no-cache # Force clean rebuild
392
- hud dev . --verbose # Show detailed logs
393
- hud dev . --transport stdio # Use stdio proxy for multiple connections
394
- hud dev . --inspector # Launch MCP Inspector (HTTP mode only)
395
- hud dev . --interactive # Launch interactive testing mode (HTTP mode only)
396
- hud dev . --no-logs # Disable Docker log streaming
397
-
398
- # With Docker arguments (after all options):
399
- hud dev . -e BROWSER_PROVIDER=anchorbrowser -e ANCHOR_API_KEY=xxx
400
- hud dev . -e API_KEY=secret -v /tmp/data:/data --network host
401
- hud dev . --build -e DEBUG=true --memory 2g
486
+ hud dev # Auto-detect mode
487
+ hud dev --new # Create live dev trace on hud.ai
488
+ hud dev env:env # Run specific module
489
+ hud dev --inspector # Launch MCP Inspector
490
+ hud dev --interactive # Launch interactive testing mode
491
+ hud dev -w 'tools env.py' # Docker: hot-reload tools/ and env.py
492
+
493
+ Local development pattern (Docker + local scenarios):
494
+ Terminal 1: hud dev -w 'tools env.py' --port 8000
495
+ Terminal 2: python local_test.py # Uses connect_url()[/not dim]
402
496
  """
403
- # Parse directory and Docker arguments
404
- if params:
405
- directory = params[0]
406
- docker_args = params[1:] if len(params) > 1 else []
407
- else:
408
- directory = "."
409
- docker_args = []
497
+ # Extract module from params if provided (first param when not --docker)
498
+ module = params[0] if params and not docker else None
499
+ docker_args = params if docker else []
500
+
501
+ # Convert empty list to None for run_mcp_dev_server
502
+ watch_paths = watch if watch else None
410
503
 
411
504
  run_mcp_dev_server(
412
- directory,
413
- image,
414
- build,
415
- no_cache,
416
- transport,
505
+ module,
506
+ stdio,
417
507
  port,
418
- no_reload,
419
- full_reload,
420
508
  verbose,
421
509
  inspector,
422
- no_logs,
423
510
  interactive,
424
- docker_args,
511
+ watch_paths,
512
+ docker=docker,
513
+ docker_args=docker_args,
514
+ new_trace=new,
425
515
  )
426
516
 
427
517
 
@@ -429,17 +519,13 @@ def dev(
429
519
  def run(
430
520
  params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
431
521
  None,
432
- help="Python file/module/package or Docker image followed by optional arguments",
522
+ help="Docker image followed by optional Docker run arguments "
523
+ "(e.g., 'my-image:latest -e KEY=value')",
433
524
  ),
434
525
  local: bool = typer.Option(
435
526
  False,
436
527
  "--local",
437
- help="Run locally with Docker (default: remote via mcp.hud.so)",
438
- ),
439
- remote: bool = typer.Option(
440
- False,
441
- "--remote",
442
- help="Run remotely via mcp.hud.so (default)",
528
+ help="Run locally with Docker (default: remote via mcp.hud.ai)",
443
529
  ),
444
530
  transport: str = typer.Option(
445
531
  "stdio",
@@ -456,7 +542,7 @@ def run(
456
542
  url: str = typer.Option(
457
543
  None,
458
544
  "--url",
459
- help="Remote MCP server URL (default: HUD_MCP_URL or mcp.hud.so)",
545
+ help="Remote MCP server URL (default: HUD_MCP_URL or mcp.hud.ai)",
460
546
  ),
461
547
  api_key: str | None = typer.Option(
462
548
  None,
@@ -474,180 +560,54 @@ def run(
474
560
  "-v",
475
561
  help="Show detailed output",
476
562
  ),
477
- interactive: bool = typer.Option(
478
- False,
479
- "--interactive",
480
- help="Launch interactive testing mode (HTTP transport only)",
481
- ),
482
- reload: bool = typer.Option(
483
- False,
484
- "--reload",
485
- help="Enable auto-reload on file changes (local Python files only)",
486
- ),
487
- watch: list[str] = typer.Option( # noqa: B008
488
- None,
489
- "--watch",
490
- help="Directories to watch for changes (can be used multiple times). Defaults to current directory.", # noqa: E501
491
- ),
492
- cmd: str | None = typer.Option(
493
- None,
494
- "--cmd",
495
- help="Command to run as MCP server (e.g., 'python -m controller')",
496
- ),
497
563
  ) -> None:
498
- """🚀 Run MCP server.
564
+ """🚀 Run Docker image as MCP server.
499
565
 
500
- Modes:
501
- - Python (decorator-based): pass a dotted module path. Example: hud run controller
502
- The module is imported, decorators register implicitly, and the server runs.
503
- Use --reload to watch the module/package directory.
566
+ [not dim]A simple wrapper around 'docker run' that can launch images locally or remotely.
567
+ By default, runs remotely via mcp.hud.ai. Use --local to run with local Docker.
504
568
 
505
- - Command: use --cmd to run any command as an MCP server. Example: hud run --cmd "python -m controller"
506
- Works with Docker, binaries, or any executable. Supports --reload.
569
+ For local Python development with hot-reload, use 'hud dev' instead.
507
570
 
508
- - Docker image: pass a Docker image name (optionally with --local to run locally).
509
- """ # noqa: E501
510
- if not params and not cmd:
511
- typer.echo("❌ Dotted module path, Docker image, or --cmd is required")
571
+ Examples:
572
+ hud run my-image:latest # Run remotely (default)
573
+ hud run my-image:latest --local # Run with local Docker
574
+ hud run my-image:latest -e KEY=value # Remote with env vars
575
+ hud run my-image:latest --local -e KEY=val # Local with env vars
576
+ hud run my-image:latest --transport http # Use HTTP transport[/not dim]
577
+ """
578
+ if not params:
579
+ console.print("[red]❌ Docker image is required[/red]")
580
+ console.print("\nExamples:")
581
+ console.print(" hud run my-image:latest # Run remotely (default)")
582
+ console.print(" hud run my-image:latest --local # Run with local Docker")
583
+ console.print("\n[yellow]For local Python development:[/yellow]")
584
+ console.print(" hud dev # Run with hot-reload")
512
585
  raise typer.Exit(1)
513
586
 
514
- # Handle --cmd mode
515
- if cmd:
516
- import asyncio
517
-
518
- from .utils.package_runner import run_package_as_mcp
519
-
520
- asyncio.run(
521
- run_package_as_mcp(
522
- cmd, # Pass command string
523
- transport=transport,
524
- port=port,
525
- verbose=verbose,
526
- reload=reload,
527
- watch_paths=watch if watch else None,
528
- )
529
- )
530
- return
587
+ image = params[0]
588
+ docker_args = params[1:] if len(params) > 1 else []
531
589
 
532
- first_param = params[0]
533
- extra_args = params[1:] if len(params) > 1 else []
590
+ # Check if user accidentally passed a module path
591
+ from pathlib import Path
534
592
 
535
- # Guard: strip accidental nested 'run' token from positional args,
536
- # which can happen with nested invocations or reload wrappers.
537
- if first_param == "run" and extra_args:
538
- first_param, extra_args = extra_args[0], extra_args[1:]
539
-
540
- # Try to interpret first_param as module[:attr] or file[:attr]
541
- target = first_param
542
- server_attr = "mcp"
543
- if ":" in target:
544
- target, server_attr = target.split(":", 1)
545
-
546
- # Only allow dotted import paths or python files for Python mode
547
- import importlib.util as _importlib_util
548
-
549
- # Ensure current working directory is importable for local packages like 'controller'
550
- try:
551
- import sys as _sys
552
- from pathlib import Path as _Path
553
-
554
- cwd_str = str(_Path.cwd())
555
- if cwd_str not in _sys.path:
556
- _sys.path.insert(0, cwd_str)
557
- except Exception: # noqa: S110
558
- pass
559
- try:
560
- # If given a file path, detect and import via file spec
561
- from pathlib import Path as _Path
562
-
563
- if target.endswith(".py") and _Path(target).exists():
564
- spec = _importlib_util.spec_from_file_location("_hud_module", target)
565
- else:
566
- spec = _importlib_util.find_spec(target)
567
- except Exception:
568
- spec = None
569
-
570
- # Fallback: treat a local package directory (e.g. 'controller') as a module target
571
- from pathlib import Path as _Path
572
-
573
- pkg_dir = _Path(target)
574
- is_pkg_dir = pkg_dir.is_dir() and (pkg_dir / "__init__.py").exists()
575
-
576
- is_python_target = (spec is not None) or is_pkg_dir
577
-
578
- if is_python_target and not (local or remote):
579
- # Python file/package mode - use implicit MCP server
580
- import asyncio
581
-
582
- from .utils.package_runner import run_package_as_mcp, run_with_reload
583
-
584
- if reload:
585
- # Run with watchfiles reload
586
- # Use user-provided watch paths or compute from module
587
- if watch:
588
- watch_paths = watch
589
- else:
590
- # Compute a watch path that works for dotted modules as well
591
- watch_paths = [target]
592
- if spec is not None:
593
- origin = getattr(spec, "origin", None)
594
- sublocs = getattr(spec, "submodule_search_locations", None)
595
- if origin:
596
- p = _Path(origin)
597
- # If package __init__.py, watch the package directory
598
- watch_paths = [str(p.parent if p.name == "__init__.py" else p)]
599
- elif sublocs:
600
- with contextlib.suppress(Exception):
601
- watch_paths = [next(iter(sublocs))]
602
-
603
- # Always run as subprocess when using reload to enable proper file watching
604
- # This ensures the parent process can watch files while the child runs the server
605
- run_with_reload(
606
- None, # This forces subprocess mode for both stdio and http
607
- watch_paths,
608
- verbose=verbose,
609
- )
610
- else:
611
- # Run normally (but still pass reload=False for consistency)
612
- asyncio.run(
613
- run_package_as_mcp(
614
- target,
615
- transport=transport,
616
- port=port,
617
- verbose=verbose,
618
- server_attr=server_attr,
619
- reload=False, # Explicitly pass reload state
620
- watch_paths=None,
621
- )
622
- )
623
- return
624
-
625
- # Docker image mode
626
- image = first_param
627
- docker_args = extra_args
628
-
629
- # Handle conflicting flags
630
- if local and remote:
631
- typer.echo("❌ Cannot use both --local and --remote")
593
+ if not any(c in image for c in [":", "/"]) and (
594
+ Path(image).is_dir() or Path(image).is_file() or "." in image
595
+ ):
596
+ console.print(f"[yellow]⚠️ '{image}' looks like a module path, not a Docker image[/yellow]")
597
+ console.print("\n[green]For local Python development, use:[/green]")
598
+ console.print(f" hud dev {image}")
599
+ console.print("\n[green]For Docker images:[/green]")
600
+ console.print(" hud run my-image:latest")
632
601
  raise typer.Exit(1)
633
602
 
634
603
  # Default to remote if not explicitly local
635
- is_local = local and not remote
636
-
637
- # Check for interactive mode restrictions
638
- if interactive:
639
- if transport != "http":
640
- typer.echo("❌ Interactive mode requires HTTP transport (use --transport http)")
641
- raise typer.Exit(1)
642
- if not is_local:
643
- typer.echo("❌ Interactive mode is only available for local execution (use --local)")
644
- raise typer.Exit(1)
604
+ is_local = local
645
605
 
646
606
  if is_local:
647
607
  # Local Docker execution
648
608
  from .utils.runner import run_mcp_server
649
609
 
650
- run_mcp_server(image, docker_args, transport, port, verbose, interactive)
610
+ run_mcp_server(image, docker_args, transport, port, verbose, interactive=False)
651
611
  else:
652
612
  # Remote execution via proxy
653
613
  from .utils.remote_runner import run_remote_server
@@ -661,6 +621,74 @@ def run(
661
621
  run_remote_server(image, docker_args, transport, port, url, api_key, run_id, verbose)
662
622
 
663
623
 
624
+ # Create RFT subcommand app
625
+ rft_app = typer.Typer(help="🚀 Reinforcement Fine-Tuning (RFT) commands")
626
+
627
+
628
+ @rft_app.command("run")
629
+ def rft_run(
630
+ tasks_file: str = typer.Argument(
631
+ ...,
632
+ help="Path to tasks file (JSON/JSONL)",
633
+ ),
634
+ model_id: str | None = typer.Option(
635
+ None,
636
+ "--model-id",
637
+ "-m",
638
+ help="Model ID to train (skip interactive selection)",
639
+ ),
640
+ reasoning_effort: str = typer.Option(
641
+ "medium",
642
+ "--reasoning-effort",
643
+ help="Reasoning effort level (low, medium, high)",
644
+ ),
645
+ verbose: bool = typer.Option(
646
+ False,
647
+ "--verbose",
648
+ "-v",
649
+ help="Enable verbose output",
650
+ ),
651
+ yes: bool = typer.Option(
652
+ False,
653
+ "--yes",
654
+ "-y",
655
+ help="Auto-accept all prompts",
656
+ ),
657
+ ) -> None:
658
+ """Launch an RFT training job."""
659
+ rft_command(
660
+ tasks_file=tasks_file,
661
+ reasoning_effort=reasoning_effort,
662
+ verbose=verbose,
663
+ yes=yes,
664
+ model_id=model_id,
665
+ )
666
+
667
+
668
+ @rft_app.command("status")
669
+ def rft_status(
670
+ model_id: str = typer.Argument(
671
+ ...,
672
+ help="Model ID or job ID to check status for",
673
+ ),
674
+ verbose: bool = typer.Option(
675
+ False,
676
+ "--verbose",
677
+ "-v",
678
+ help="Show full status details",
679
+ ),
680
+ ) -> None:
681
+ """Check the status of an RFT job."""
682
+ rft_status_command(
683
+ model_id=model_id,
684
+ verbose=verbose,
685
+ )
686
+
687
+
688
+ # Add RFT app as a command group
689
+ app.add_typer(rft_app, name="rft")
690
+
691
+
664
692
  @app.command()
665
693
  def clone(
666
694
  url: str = typer.Argument(
@@ -670,7 +698,7 @@ def clone(
670
698
  ) -> None:
671
699
  """🚀 Clone a git repository quietly with a pretty output.
672
700
 
673
- This command wraps 'git clone' with the --quiet flag and displays
701
+ [not dim]This command wraps 'git clone' with the --quiet flag and displays
674
702
  a rich formatted success message. If the repository contains a clone
675
703
  message in pyproject.toml, it will be displayed as a tutorial.
676
704
 
@@ -685,7 +713,7 @@ def clone(
685
713
  # style = "cyan"
686
714
 
687
715
  Examples:
688
- hud clone https://github.com/user/repo.git
716
+ hud clone https://github.com/user/repo.git[/not dim]
689
717
  """
690
718
  # Run the clone
691
719
  success, result = clone_repository(url)
@@ -713,10 +741,18 @@ def build(
713
741
  platform: str | None = typer.Option(
714
742
  None, "--platform", help="Set Docker target platform (e.g., linux/amd64)"
715
743
  ),
744
+ secrets: list[str] | None = typer.Option( # noqa: B008
745
+ None,
746
+ "--secret",
747
+ help=("Docker build secret (repeatable), e.g. --secret id=GITHUB_TOKEN,env=GITHUB_TOKEN"),
748
+ ),
749
+ remote_cache: str | None = typer.Option(
750
+ None, "--remote-cache", help="Enable remote cache using Amazon ECR with specified repo name"
751
+ ),
716
752
  ) -> None:
717
753
  """🏗️ Build a HUD environment and generate lock file.
718
754
 
719
- This command:
755
+ [not dim]This command:
720
756
  - Builds a Docker image from your environment
721
757
  - Analyzes the MCP server to extract metadata
722
758
  - Generates a hud.lock.yaml file for reproducibility
@@ -726,7 +762,9 @@ def build(
726
762
  hud build environments/text_2048 -e API_KEY=secret
727
763
  hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
728
764
  hud build . --no-cache # Force rebuild
729
- """
765
+ hud build . --remote-cache my-cache-repo # Use ECR remote cache (requires AWS_ACCOUNT_ID and AWS_DEFAULT_REGION)
766
+ hud build . --build-arg NODE_ENV=production # Pass Docker build args[/not dim]
767
+ """ # noqa: E501
730
768
  # Parse directory and extra arguments
731
769
  if params:
732
770
  directory = params[0]
@@ -735,8 +773,9 @@ def build(
735
773
  directory = "."
736
774
  extra_args = []
737
775
 
738
- # Parse environment variables from extra args
776
+ # Parse environment variables and build args from extra args
739
777
  env_vars = {}
778
+ build_args = {}
740
779
  i = 0
741
780
  while i < len(extra_args):
742
781
  if extra_args[i] == "-e" and i + 1 < len(extra_args):
@@ -760,10 +799,34 @@ def build(
760
799
  key, value = env_arg.split("=", 1)
761
800
  env_vars[key] = value
762
801
  i += 2
802
+ elif extra_args[i] == "--build-arg" and i + 1 < len(extra_args):
803
+ # Parse --build-arg KEY=VALUE format
804
+ build_arg = extra_args[i + 1]
805
+ if "=" in build_arg:
806
+ key, value = build_arg.split("=", 1)
807
+ build_args[key] = value
808
+ i += 2
809
+ elif extra_args[i].startswith("--build-arg="):
810
+ # Parse --build-arg=KEY=VALUE format
811
+ build_arg = extra_args[i][12:] # Remove --build-arg=
812
+ if "=" in build_arg:
813
+ key, value = build_arg.split("=", 1)
814
+ build_args[key] = value
815
+ i += 1
763
816
  else:
764
817
  i += 1
765
818
 
766
- build_command(directory, tag, no_cache, verbose, env_vars, platform)
819
+ build_command(
820
+ directory,
821
+ tag,
822
+ no_cache,
823
+ verbose,
824
+ env_vars,
825
+ platform,
826
+ secrets,
827
+ remote_cache,
828
+ build_args or None,
829
+ )
767
830
 
768
831
 
769
832
  @app.command()
@@ -781,14 +844,14 @@ def push(
781
844
  ) -> None:
782
845
  """📤 Push HUD environment to registry.
783
846
 
784
- Reads hud.lock.yaml from the directory and pushes to registry.
847
+ [not dim]Reads hud.lock.yaml from the directory and pushes to registry.
785
848
  Auto-detects your Docker username if --image not specified.
786
849
 
787
850
  Examples:
788
851
  hud push # Push with auto-detected name
789
852
  hud push --tag v1.0 # Push with specific tag
790
853
  hud push . --image myuser/myenv:v1.0
791
- hud push --yes # Skip confirmation
854
+ hud push --yes # Skip confirmation[/not dim]
792
855
  """
793
856
  push_command(directory, image, tag, sign, yes, verbose)
794
857
 
@@ -807,12 +870,12 @@ def pull(
807
870
  ) -> None:
808
871
  """📥 Pull HUD environment from registry with metadata preview.
809
872
 
810
- Shows environment details before downloading.
873
+ [not dim]Shows environment details before downloading.
811
874
 
812
875
  Examples:
813
876
  hud pull hud.lock.yaml # Pull from lock file
814
877
  hud pull myuser/myenv:latest # Pull by image reference
815
- hud pull myuser/myenv --verify-only # Check metadata only
878
+ hud pull myuser/myenv --verify-only # Check metadata only[/not dim]
816
879
  """
817
880
  pull_command(target, lock_file, yes, verify_only, verbose)
818
881
 
@@ -828,14 +891,14 @@ def list_environments(
828
891
  ) -> None:
829
892
  """📋 List all HUD environments in local registry.
830
893
 
831
- Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
894
+ [not dim]Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
832
895
 
833
896
  Examples:
834
897
  hud list # List all environments
835
898
  hud list --filter text # Filter by name
836
899
  hud list --json # Output as JSON
837
900
  hud list --all # Show digest column
838
- hud list --verbose # Show full descriptions
901
+ hud list --verbose # Show full descriptions[/not dim]
839
902
  """
840
903
  list_module.list_command(filter_name, json_output, show_all, verbose)
841
904
 
@@ -850,7 +913,7 @@ def remove(
850
913
  ) -> None:
851
914
  """🗑️ Remove HUD environments from local registry.
852
915
 
853
- Removes environment metadata from ~/.hud/envs/
916
+ [not dim]Removes environment metadata from ~/.hud/envs/
854
917
  Note: This does not remove the Docker images.
855
918
 
856
919
  Examples:
@@ -858,37 +921,44 @@ def remove(
858
921
  hud remove text_2048 # Remove by name
859
922
  hud remove hudpython/test_init # Remove by full name
860
923
  hud remove all # Remove all environments
861
- hud remove all --yes # Remove all without confirmation
924
+ hud remove all --yes # Remove all without confirmation[/not dim]
862
925
  """
863
926
  remove_command(target, yes, verbose)
864
927
 
865
928
 
866
929
  @app.command()
867
930
  def init(
868
- name: str = typer.Argument(None, help="Environment name (default: current directory name)"),
931
+ name: str = typer.Argument(None, help="Environment name (default: directory name)"),
932
+ directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
933
+ force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
869
934
  preset: str | None = typer.Option(
870
935
  None,
871
936
  "--preset",
872
937
  "-p",
873
- help="Preset to use: blank, deep-research, browser. If omitted, you'll choose interactively.", # noqa: E501
938
+ help="Download a preset: blank, deep-research, browser, rubrics",
874
939
  ),
875
- directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
876
- force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
877
940
  ) -> None:
878
- """🚀 Initialize a new HUD environment with minimal boilerplate.
941
+ """🚀 Initialize a HUD environment.
879
942
 
880
- Creates a working MCP environment with:
881
- - Dockerfile for containerization
882
- - pyproject.toml for dependencies
883
- - Minimal MCP server with context
884
- - Required setup/evaluate tools
943
+ [not dim]• Empty directory: Choose a preset interactively
944
+ Existing project: Add Dockerfile.hud and hud.py
945
+
946
+ Use --preset to skip selection and download a specific template.
885
947
 
886
948
  Examples:
887
- hud init # Use current directory name
888
- hud init my-env # Create in ./my-env/
889
- hud init my-env --dir /tmp # Create in /tmp/my-env/
949
+ hud init # Auto-detect mode
950
+ hud init my-env # Initialize with custom name
951
+ hud init --preset browser # Download browser preset[/not dim]
952
+
890
953
  """
891
- create_environment(name, directory, force, preset)
954
+ if preset:
955
+ from hud.cli.init import create_environment
956
+
957
+ create_environment(name, directory, force, preset)
958
+ else:
959
+ from hud.cli.flows.init import smart_init
960
+
961
+ smart_init(name, directory, force)
892
962
 
893
963
 
894
964
  @app.command()
@@ -900,200 +970,7 @@ def quickstart() -> None:
900
970
  clone("https://github.com/hud-evals/quickstart.git")
901
971
 
902
972
 
903
- @app.command()
904
- def eval(
905
- source: str | None = typer.Argument(
906
- None,
907
- help=(
908
- "HuggingFace dataset (e.g. 'hud-evals/SheetBench-50') or task JSON file. "
909
- "If not provided, looks for task.json in current directory."
910
- ),
911
- ),
912
- agent: str | None = typer.Argument(
913
- None,
914
- help=(
915
- "Agent backend to use (claude, openai, vllm, or litellm). If not provided, will prompt interactively." # noqa: E501
916
- ),
917
- ),
918
- full: bool = typer.Option(
919
- False,
920
- "--full",
921
- help="Run the entire dataset (omit for single-task debug mode)",
922
- ),
923
- model: str | None = typer.Option(
924
- None,
925
- "--model",
926
- help="Model name for the chosen agent",
927
- ),
928
- allowed_tools: str | None = typer.Option(
929
- None,
930
- "--allowed-tools",
931
- help="Comma-separated list of allowed tools",
932
- ),
933
- max_concurrent: int = typer.Option(
934
- 50,
935
- "--max-concurrent",
936
- help="Max concurrent tasks (prevents rate limits in both asyncio and parallel modes)",
937
- ),
938
- max_steps: int = typer.Option(
939
- 30,
940
- "--max-steps",
941
- help="Maximum steps per task (default: 10 for single, 50 for full)",
942
- ),
943
- parallel: bool = typer.Option(
944
- False,
945
- "--parallel",
946
- help="Use process-based parallel execution for large datasets (100+ tasks)",
947
- ),
948
- max_workers: int | None = typer.Option(
949
- None,
950
- "--max-workers",
951
- help="Number of worker processes for parallel mode (auto-optimized if not set)",
952
- ),
953
- max_concurrent_per_worker: int = typer.Option(
954
- 20,
955
- "--max-concurrent-per-worker",
956
- help="Maximum concurrent tasks per worker in parallel mode",
957
- ),
958
- verbose: bool = typer.Option(
959
- False,
960
- "--verbose",
961
- help="Enable verbose output from the agent",
962
- ),
963
- very_verbose: bool = typer.Option(
964
- False,
965
- "--very-verbose",
966
- "-vv",
967
- help="Enable debug-level logs for maximum visibility",
968
- ),
969
- vllm_base_url: str | None = typer.Option(
970
- None,
971
- "--vllm-base-url",
972
- help="Base URL for vLLM server (when using --agent vllm)",
973
- ),
974
- group_size: int = typer.Option(
975
- 1,
976
- "--group-size",
977
- help="Number of times to run each task (similar to RL training)",
978
- ),
979
- integration_test: bool = typer.Option(
980
- False,
981
- "--integration-test",
982
- help=(
983
- "Run integration_test_tool, where problem is setup, "
984
- "actions are applied, and evaluation is performed, without "
985
- "spinning up an agent"
986
- ),
987
- ),
988
- ) -> None:
989
- """🚀 Run evaluation on datasets or individual tasks with agents."""
990
- from hud.settings import settings
991
- from hud.utils.hud_console import HUDConsole
992
-
993
- hud_console = HUDConsole()
994
-
995
- if integration_test:
996
- agent = "integration_test"
997
-
998
- # If no source provided, reuse RL helper to find a tasks file interactively
999
- if source is None:
1000
- try:
1001
- from hud.cli.utils.tasks import find_tasks_file
1002
-
1003
- source = find_tasks_file(None, msg="Select a tasks file to run")
1004
- hud_console.success(f"Selected: {source}")
1005
- except Exception as e:
1006
- hud_console.error(
1007
- "No source provided and no task/eval JSON files found in current directory"
1008
- )
1009
- hud_console.info(
1010
- "Usage: hud eval <source> or create a task JSON file (e.g., task.json, tasks.jsonl)"
1011
- )
1012
- raise typer.Exit(1) from e
1013
-
1014
- # Import eval_command lazily to avoid importing agent dependencies
1015
- try:
1016
- from .eval import eval_command, get_available_models
1017
- except ImportError as e:
1018
- hud_console.error(
1019
- "Evaluation dependencies are not installed. "
1020
- "Please install with: pip install 'hud-python[agent]'"
1021
- )
1022
- raise typer.Exit(1) from e
1023
-
1024
- # If no agent specified, fetch available models and prompt for selection
1025
- base_model = None
1026
- if agent is None:
1027
- # Get available HUD models first
1028
- hud_models = get_available_models()
1029
-
1030
- # Build choices starting with HUD models
1031
- choices = []
1032
-
1033
- # Add HUD models as agent choices
1034
- for hud_model in hud_models:
1035
- model_name = hud_model["name"]
1036
- base_model = hud_model["base_model"]
1037
- vllm_status = " ⚡" if hud_model.get("vllm_url") else ""
1038
- choices.append({"name": f"{model_name}{vllm_status}", "value": f"{model_name}"})
1039
-
1040
- # Add standard agent choices
1041
- choices.extend(
1042
- [
1043
- {"name": "Claude 4 Sonnet", "value": "claude"},
1044
- {"name": "OpenAI Computer Use", "value": "openai"},
1045
- {"name": "vLLM (Local Server)", "value": "vllm"},
1046
- {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
1047
- ]
1048
- )
1049
-
1050
- agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
1051
-
1052
- # Handle HUD model selection
1053
- if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]:
1054
- # Find remote model name
1055
- model = agent
1056
- if not vllm_base_url:
1057
- vllm_base_url = f"{settings.hud_rl_url}/models/{model}/vllm"
1058
-
1059
- # Set model to base model for the vllm endpoint
1060
- if not base_model:
1061
- hud_models = get_available_models()
1062
- for hud_model in hud_models:
1063
- if hud_model["name"] == model:
1064
- base_model = hud_model["base_model"]
1065
- break
1066
- if not base_model:
1067
- hud_console.error(f"Model {model} not found")
1068
- raise typer.Exit(1)
1069
- model = base_model
1070
- agent = "vllm" # Use vLLM backend for HUD models
1071
- hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
1072
-
1073
- # Validate agent choice
1074
- valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"]
1075
- if agent not in valid_agents:
1076
- hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
1077
- raise typer.Exit(1)
1078
-
1079
- # Run the command
1080
- eval_command(
1081
- source=source,
1082
- full=full,
1083
- agent=agent, # type: ignore
1084
- model=model,
1085
- allowed_tools=allowed_tools,
1086
- max_concurrent=max_concurrent,
1087
- max_steps=max_steps,
1088
- parallel=parallel,
1089
- max_workers=max_workers,
1090
- max_concurrent_per_worker=max_concurrent_per_worker,
1091
- verbose=verbose,
1092
- very_verbose=very_verbose,
1093
- vllm_base_url=vllm_base_url,
1094
- group_size=group_size,
1095
- integration_test=integration_test,
1096
- )
973
+ app.command(name="eval")(eval_command)
1097
974
 
1098
975
 
1099
976
  @app.command()
@@ -1130,97 +1007,161 @@ def get(
1130
1007
 
1131
1008
 
1132
1009
  @app.command()
1133
- def rl(
1134
- tasks_file: str | None = typer.Argument(
1135
- None,
1136
- help=(
1137
- "Path to tasks file (JSON/JSONL) or HuggingFace dataset name. "
1138
- "If not provided, looks for tasks.json or tasks.jsonl in current directory."
1139
- ),
1140
- ),
1141
- model: str | None = typer.Argument(
1142
- None,
1143
- help="Model to train from https://hud.so/models (default: interactive selection)",
1144
- ),
1145
- config_file: Path | None = typer.Option( # noqa: B008
1146
- None,
1147
- "--config",
1148
- "-c",
1149
- help="Path to existing configuration file",
1150
- ),
1151
- output_dir: str = typer.Option(
1152
- "checkpoints",
1153
- "--output-dir",
1154
- "-o",
1155
- help="Output directory for checkpoints",
1010
+ def convert(
1011
+ tasks_file: str = typer.Argument(
1012
+ ..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
1156
1013
  ),
1157
- restart: bool = typer.Option(
1158
- False,
1159
- "--restart",
1160
- help="Restart the vLLM server before training",
1161
- ),
1162
- verbose: bool = typer.Option(
1163
- False,
1164
- "--verbose",
1165
- "-v",
1166
- help="Enable verbose output",
1167
- ),
1168
- local: bool = typer.Option(
1169
- False,
1170
- "--local",
1171
- help="Run training locally instead of using remote API server",
1172
- ),
1173
- no_ddp: bool = typer.Option(
1174
- False,
1175
- "--no-ddp",
1176
- help="Disable DDP even with multiple GPUs",
1177
- ),
1178
- ddp_gpus: str | None = typer.Option(
1179
- None,
1180
- "--ddp-gpus",
1181
- help="Specific GPUs for DDP (e.g., '0,1,2,3')",
1182
- ),
1183
- yes: bool = typer.Option(
1184
- False,
1185
- "--yes",
1186
- "-y",
1187
- help="Auto-accept all prompts and use defaults (lazy mode)",
1188
- ),
1189
- vllm_gpu: int | None = typer.Option(
1190
- None,
1191
- "--vllm-gpu",
1192
- help="Specific GPU for vLLM server",
1014
+ ) -> None:
1015
+ """Convert local MCP task configs to remote (mcp.hud.ai) format.
1016
+
1017
+ This mirrors the implicit conversion flow used by 'hud rl' and writes a new
1018
+ remote_<name>.json next to the source file when needed.
1019
+ """
1020
+ from pathlib import Path
1021
+
1022
+ hud_console = HUDConsole()
1023
+
1024
+ try:
1025
+ from .flows.tasks import convert_tasks_to_remote
1026
+
1027
+ result_path = convert_tasks_to_remote(tasks_file)
1028
+
1029
+ # If nothing changed, inform the user
1030
+ try:
1031
+ if Path(result_path).resolve() == Path(tasks_file).resolve():
1032
+ hud_console.success(
1033
+ "Tasks already reference remote MCP URLs. No conversion needed."
1034
+ )
1035
+ hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
1036
+ return
1037
+ except Exception as e:
1038
+ # Best effort; continue with success message
1039
+ hud_console.debug(f"Path comparison failed, continuing: {e}")
1040
+
1041
+ hud_console.success(f"Converted tasks written to: {result_path}")
1042
+ hud_console.hint(
1043
+ "You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
1044
+ )
1045
+ except typer.Exit:
1046
+ raise
1047
+ except Exception as e:
1048
+ hud_console.error(f"Failed to convert tasks: {e}")
1049
+ raise typer.Exit(1) from e
1050
+
1051
+
1052
+ @app.command()
1053
+ def cancel(
1054
+ job_id: str | None = typer.Argument(
1055
+ None, help="Job ID to cancel. Omit to cancel all active jobs with --all."
1193
1056
  ),
1194
- vllm_gpu_count: int = typer.Option(
1195
- 1,
1196
- "--vllm-gpu-count",
1197
- help="Number of GPUs for vLLM server",
1057
+ task_id: str | None = typer.Option(
1058
+ None, "--task", "-t", help="Specific task ID within the job to cancel."
1198
1059
  ),
1199
- skip_vllm_startup: bool = typer.Option(
1200
- False,
1201
- "--skip-vllm-startup",
1202
- help="Skip the vLLM server startup",
1060
+ all_jobs: bool = typer.Option(
1061
+ False, "--all", "-a", help="Cancel ALL active jobs for your account (panic button)."
1203
1062
  ),
1063
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt."),
1204
1064
  ) -> None:
1205
- """🎯 Run GRPO reinforcement learning training on tasks."""
1206
- # Import from the rl module
1207
- from .rl import rl_command
1065
+ """Cancel remote rollouts.
1208
1066
 
1209
- rl_command(
1210
- tasks_file=tasks_file,
1211
- model=model,
1212
- config_file=config_file,
1213
- output_dir=output_dir,
1214
- restart=restart,
1215
- verbose=verbose,
1216
- local=local,
1217
- no_ddp=no_ddp,
1218
- ddp_gpus=ddp_gpus,
1219
- vllm_gpu=vllm_gpu,
1220
- vllm_gpu_count=vllm_gpu_count,
1221
- yes=yes,
1222
- skip_vllm_startup=skip_vllm_startup,
1223
- )
1067
+ Examples:
1068
+ hud cancel <job_id> # Cancel all tasks in a job
1069
+ hud cancel <job_id> --task <id> # Cancel specific task
1070
+ hud cancel --all # Cancel ALL active jobs (panic button)
1071
+ """
1072
+ import asyncio
1073
+
1074
+ import questionary
1075
+
1076
+ hud_console = HUDConsole()
1077
+
1078
+ if not job_id and not all_jobs:
1079
+ hud_console.error("Provide a job_id or use --all to cancel all active jobs.")
1080
+ raise typer.Exit(1)
1081
+
1082
+ if job_id and all_jobs:
1083
+ hud_console.error("Cannot specify both job_id and --all.")
1084
+ raise typer.Exit(1)
1085
+
1086
+ # Handle confirmations BEFORE entering async context (questionary uses asyncio internally)
1087
+ if (
1088
+ all_jobs
1089
+ and not yes
1090
+ and not questionary.confirm(
1091
+ "⚠️ This will cancel ALL your active jobs. Continue?",
1092
+ default=False,
1093
+ ).ask()
1094
+ ):
1095
+ hud_console.info("Cancelled.")
1096
+ raise typer.Exit(0)
1097
+
1098
+ if (
1099
+ job_id
1100
+ and not task_id
1101
+ and not yes
1102
+ and not questionary.confirm(
1103
+ f"Cancel all tasks in job {job_id}?",
1104
+ default=True,
1105
+ ).ask()
1106
+ ):
1107
+ hud_console.info("Cancelled.")
1108
+ raise typer.Exit(0)
1109
+
1110
+ async def _cancel() -> None:
1111
+ from hud.datasets.utils import cancel_all_jobs, cancel_job, cancel_task
1112
+
1113
+ if all_jobs:
1114
+ hud_console.info("Cancelling all active jobs...")
1115
+ result = await cancel_all_jobs()
1116
+
1117
+ jobs_cancelled = result.get("jobs_cancelled", 0)
1118
+ tasks_cancelled = result.get("total_tasks_cancelled", 0)
1119
+
1120
+ if jobs_cancelled == 0:
1121
+ hud_console.info("No active jobs found.")
1122
+ else:
1123
+ hud_console.success(
1124
+ f"Cancelled {jobs_cancelled} job(s), {tasks_cancelled} task(s) total."
1125
+ )
1126
+ for job in result.get("job_details", []):
1127
+ hud_console.info(f" • {job['job_id']}: {job['cancelled']} tasks cancelled")
1128
+
1129
+ elif task_id:
1130
+ hud_console.info(f"Cancelling task {task_id} in job {job_id}...")
1131
+ result = await cancel_task(job_id, task_id) # type: ignore[arg-type]
1132
+
1133
+ status = result.get("status", "unknown")
1134
+ if status in ("revoked", "terminated"):
1135
+ hud_console.success(f"Task cancelled: {result.get('message', '')}")
1136
+ elif status == "not_found":
1137
+ hud_console.warning(f"Task not found: {result.get('message', '')}")
1138
+ else:
1139
+ hud_console.info(f"Status: {status} - {result.get('message', '')}")
1140
+
1141
+ else:
1142
+ hud_console.info(f"Cancelling job {job_id}...")
1143
+ result = await cancel_job(job_id) # type: ignore[arg-type]
1144
+
1145
+ total = result.get("total_found", 0)
1146
+ cancelled = result.get("cancelled", 0)
1147
+
1148
+ if total == 0:
1149
+ hud_console.warning(f"No tasks found for job {job_id}")
1150
+ else:
1151
+ hud_console.success(
1152
+ f"Cancelled {cancelled}/{total} tasks "
1153
+ f"({result.get('running_terminated', 0)} running, "
1154
+ f"{result.get('queued_revoked', 0)} queued)"
1155
+ )
1156
+
1157
+ try:
1158
+ asyncio.run(_cancel())
1159
+ except httpx.HTTPStatusError as e:
1160
+ hud_console.error(f"API error: {e.response.status_code} - {e.response.text}")
1161
+ raise typer.Exit(1) from e
1162
+ except Exception as e:
1163
+ hud_console.error(f"Failed to cancel: {e}")
1164
+ raise typer.Exit(1) from e
1224
1165
 
1225
1166
 
1226
1167
  @app.command()
@@ -1231,13 +1172,12 @@ def set(
1231
1172
  ) -> None:
1232
1173
  """Persist API keys or other variables for HUD to use by default.
1233
1174
 
1234
- Examples:
1175
+ [not dim]Examples:
1235
1176
  hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
1236
1177
 
1237
1178
  Values are stored in ~/.hud/.env and are loaded by hud.settings with
1238
- the lowest precedence (overridden by process env and project .env).
1179
+ the lowest precedence (overridden by process env and project .env).[/not dim]
1239
1180
  """
1240
- from hud.utils.hud_console import HUDConsole
1241
1181
 
1242
1182
  hud_console = HUDConsole()
1243
1183
 
@@ -1261,6 +1201,13 @@ def set(
1261
1201
 
1262
1202
  def main() -> None:
1263
1203
  """Main entry point for the CLI."""
1204
+ # Check for updates (including on --version command)
1205
+ # Skip only on help-only commands
1206
+ if not (len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"])):
1207
+ from .utils.version_check import display_update_prompt
1208
+
1209
+ display_update_prompt()
1210
+
1264
1211
  # Handle --version flag before Typer parses args
1265
1212
  if "--version" in sys.argv:
1266
1213
  try: