hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +11 -5
  3. hud/agents/base.py +220 -500
  4. hud/agents/claude.py +200 -240
  5. hud/agents/gemini.py +275 -0
  6. hud/agents/gemini_cua.py +335 -0
  7. hud/agents/grounded_openai.py +98 -100
  8. hud/agents/misc/integration_test_agent.py +51 -20
  9. hud/agents/misc/response_agent.py +41 -36
  10. hud/agents/openai.py +291 -292
  11. hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
  12. hud/agents/operator.py +211 -0
  13. hud/agents/tests/conftest.py +133 -0
  14. hud/agents/tests/test_base.py +300 -622
  15. hud/agents/tests/test_base_runtime.py +233 -0
  16. hud/agents/tests/test_claude.py +379 -210
  17. hud/agents/tests/test_client.py +9 -10
  18. hud/agents/tests/test_gemini.py +369 -0
  19. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  20. hud/agents/tests/test_openai.py +376 -140
  21. hud/agents/tests/test_operator.py +362 -0
  22. hud/agents/tests/test_run_eval.py +179 -0
  23. hud/cli/__init__.py +461 -545
  24. hud/cli/analyze.py +43 -5
  25. hud/cli/build.py +664 -110
  26. hud/cli/debug.py +8 -5
  27. hud/cli/dev.py +882 -734
  28. hud/cli/eval.py +782 -668
  29. hud/cli/flows/dev.py +167 -0
  30. hud/cli/flows/init.py +191 -0
  31. hud/cli/flows/tasks.py +153 -56
  32. hud/cli/flows/templates.py +151 -0
  33. hud/cli/flows/tests/__init__.py +1 -0
  34. hud/cli/flows/tests/test_dev.py +126 -0
  35. hud/cli/init.py +60 -58
  36. hud/cli/push.py +29 -11
  37. hud/cli/rft.py +311 -0
  38. hud/cli/rft_status.py +145 -0
  39. hud/cli/tests/test_analyze.py +5 -5
  40. hud/cli/tests/test_analyze_metadata.py +3 -2
  41. hud/cli/tests/test_analyze_module.py +120 -0
  42. hud/cli/tests/test_build.py +108 -6
  43. hud/cli/tests/test_build_failure.py +41 -0
  44. hud/cli/tests/test_build_module.py +50 -0
  45. hud/cli/tests/test_cli_init.py +6 -1
  46. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  47. hud/cli/tests/test_cli_root.py +140 -0
  48. hud/cli/tests/test_convert.py +361 -0
  49. hud/cli/tests/test_debug.py +12 -10
  50. hud/cli/tests/test_dev.py +197 -0
  51. hud/cli/tests/test_eval.py +251 -0
  52. hud/cli/tests/test_eval_bedrock.py +51 -0
  53. hud/cli/tests/test_init.py +124 -0
  54. hud/cli/tests/test_main_module.py +11 -5
  55. hud/cli/tests/test_mcp_server.py +12 -100
  56. hud/cli/tests/test_push_happy.py +74 -0
  57. hud/cli/tests/test_push_wrapper.py +23 -0
  58. hud/cli/tests/test_registry.py +1 -1
  59. hud/cli/tests/test_utils.py +1 -1
  60. hud/cli/{rl → utils}/celebrate.py +14 -12
  61. hud/cli/utils/config.py +18 -1
  62. hud/cli/utils/docker.py +130 -4
  63. hud/cli/utils/env_check.py +9 -9
  64. hud/cli/utils/git.py +136 -0
  65. hud/cli/utils/interactive.py +39 -5
  66. hud/cli/utils/metadata.py +69 -0
  67. hud/cli/utils/runner.py +1 -1
  68. hud/cli/utils/server.py +2 -2
  69. hud/cli/utils/source_hash.py +3 -3
  70. hud/cli/utils/tasks.py +4 -1
  71. hud/cli/utils/tests/__init__.py +0 -0
  72. hud/cli/utils/tests/test_config.py +58 -0
  73. hud/cli/utils/tests/test_docker.py +93 -0
  74. hud/cli/utils/tests/test_docker_hints.py +71 -0
  75. hud/cli/utils/tests/test_env_check.py +74 -0
  76. hud/cli/utils/tests/test_environment.py +42 -0
  77. hud/cli/utils/tests/test_git.py +142 -0
  78. hud/cli/utils/tests/test_interactive_module.py +60 -0
  79. hud/cli/utils/tests/test_local_runner.py +50 -0
  80. hud/cli/utils/tests/test_logging_utils.py +23 -0
  81. hud/cli/utils/tests/test_metadata.py +49 -0
  82. hud/cli/utils/tests/test_package_runner.py +35 -0
  83. hud/cli/utils/tests/test_registry_utils.py +49 -0
  84. hud/cli/utils/tests/test_remote_runner.py +25 -0
  85. hud/cli/utils/tests/test_runner_modules.py +52 -0
  86. hud/cli/utils/tests/test_source_hash.py +36 -0
  87. hud/cli/utils/tests/test_tasks.py +80 -0
  88. hud/cli/utils/version_check.py +258 -0
  89. hud/cli/{rl → utils}/viewer.py +2 -2
  90. hud/clients/README.md +12 -11
  91. hud/clients/__init__.py +4 -3
  92. hud/clients/base.py +166 -26
  93. hud/clients/environment.py +51 -0
  94. hud/clients/fastmcp.py +13 -6
  95. hud/clients/mcp_use.py +40 -15
  96. hud/clients/tests/test_analyze_scenarios.py +206 -0
  97. hud/clients/tests/test_protocol.py +9 -3
  98. hud/datasets/__init__.py +23 -20
  99. hud/datasets/loader.py +327 -0
  100. hud/datasets/runner.py +192 -105
  101. hud/datasets/tests/__init__.py +0 -0
  102. hud/datasets/tests/test_loader.py +221 -0
  103. hud/datasets/tests/test_utils.py +315 -0
  104. hud/datasets/utils.py +270 -90
  105. hud/environment/__init__.py +50 -0
  106. hud/environment/connection.py +206 -0
  107. hud/environment/connectors/__init__.py +33 -0
  108. hud/environment/connectors/base.py +68 -0
  109. hud/environment/connectors/local.py +177 -0
  110. hud/environment/connectors/mcp_config.py +109 -0
  111. hud/environment/connectors/openai.py +101 -0
  112. hud/environment/connectors/remote.py +172 -0
  113. hud/environment/environment.py +694 -0
  114. hud/environment/integrations/__init__.py +45 -0
  115. hud/environment/integrations/adk.py +67 -0
  116. hud/environment/integrations/anthropic.py +196 -0
  117. hud/environment/integrations/gemini.py +92 -0
  118. hud/environment/integrations/langchain.py +82 -0
  119. hud/environment/integrations/llamaindex.py +68 -0
  120. hud/environment/integrations/openai.py +238 -0
  121. hud/environment/mock.py +306 -0
  122. hud/environment/router.py +112 -0
  123. hud/environment/scenarios.py +493 -0
  124. hud/environment/tests/__init__.py +1 -0
  125. hud/environment/tests/test_connection.py +317 -0
  126. hud/environment/tests/test_connectors.py +218 -0
  127. hud/environment/tests/test_environment.py +161 -0
  128. hud/environment/tests/test_integrations.py +257 -0
  129. hud/environment/tests/test_local_connectors.py +201 -0
  130. hud/environment/tests/test_scenarios.py +280 -0
  131. hud/environment/tests/test_tools.py +208 -0
  132. hud/environment/types.py +23 -0
  133. hud/environment/utils/__init__.py +35 -0
  134. hud/environment/utils/formats.py +215 -0
  135. hud/environment/utils/schema.py +171 -0
  136. hud/environment/utils/tool_wrappers.py +113 -0
  137. hud/eval/__init__.py +67 -0
  138. hud/eval/context.py +674 -0
  139. hud/eval/display.py +299 -0
  140. hud/eval/instrument.py +185 -0
  141. hud/eval/manager.py +466 -0
  142. hud/eval/parallel.py +268 -0
  143. hud/eval/task.py +340 -0
  144. hud/eval/tests/__init__.py +1 -0
  145. hud/eval/tests/test_context.py +178 -0
  146. hud/eval/tests/test_eval.py +210 -0
  147. hud/eval/tests/test_manager.py +152 -0
  148. hud/eval/tests/test_parallel.py +168 -0
  149. hud/eval/tests/test_task.py +145 -0
  150. hud/eval/types.py +63 -0
  151. hud/eval/utils.py +183 -0
  152. hud/patches/__init__.py +19 -0
  153. hud/patches/mcp_patches.py +151 -0
  154. hud/patches/warnings.py +54 -0
  155. hud/samples/browser.py +4 -4
  156. hud/server/__init__.py +2 -1
  157. hud/server/low_level.py +2 -1
  158. hud/server/router.py +164 -0
  159. hud/server/server.py +567 -80
  160. hud/server/tests/test_mcp_server_integration.py +11 -11
  161. hud/server/tests/test_mcp_server_more.py +1 -1
  162. hud/server/tests/test_server_extra.py +2 -0
  163. hud/settings.py +45 -3
  164. hud/shared/exceptions.py +36 -10
  165. hud/shared/hints.py +26 -1
  166. hud/shared/requests.py +15 -3
  167. hud/shared/tests/test_exceptions.py +40 -31
  168. hud/shared/tests/test_hints.py +167 -0
  169. hud/telemetry/__init__.py +20 -19
  170. hud/telemetry/exporter.py +201 -0
  171. hud/telemetry/instrument.py +158 -253
  172. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  173. hud/telemetry/tests/test_exporter.py +258 -0
  174. hud/telemetry/tests/test_instrument.py +401 -0
  175. hud/tools/__init__.py +16 -2
  176. hud/tools/apply_patch.py +639 -0
  177. hud/tools/base.py +54 -4
  178. hud/tools/bash.py +2 -2
  179. hud/tools/computer/__init__.py +4 -0
  180. hud/tools/computer/anthropic.py +2 -2
  181. hud/tools/computer/gemini.py +385 -0
  182. hud/tools/computer/hud.py +23 -6
  183. hud/tools/computer/openai.py +20 -21
  184. hud/tools/computer/qwen.py +434 -0
  185. hud/tools/computer/settings.py +37 -0
  186. hud/tools/edit.py +3 -7
  187. hud/tools/executors/base.py +4 -2
  188. hud/tools/executors/pyautogui.py +1 -1
  189. hud/tools/grounding/grounded_tool.py +13 -18
  190. hud/tools/grounding/grounder.py +10 -31
  191. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  192. hud/tools/jupyter.py +330 -0
  193. hud/tools/playwright.py +18 -3
  194. hud/tools/shell.py +308 -0
  195. hud/tools/tests/test_apply_patch.py +718 -0
  196. hud/tools/tests/test_computer.py +4 -9
  197. hud/tools/tests/test_computer_actions.py +24 -2
  198. hud/tools/tests/test_jupyter_tool.py +181 -0
  199. hud/tools/tests/test_shell.py +596 -0
  200. hud/tools/tests/test_submit.py +85 -0
  201. hud/tools/tests/test_types.py +193 -0
  202. hud/tools/types.py +21 -1
  203. hud/types.py +167 -57
  204. hud/utils/__init__.py +2 -0
  205. hud/utils/env.py +67 -0
  206. hud/utils/hud_console.py +61 -3
  207. hud/utils/mcp.py +15 -58
  208. hud/utils/strict_schema.py +162 -0
  209. hud/utils/tests/test_init.py +1 -2
  210. hud/utils/tests/test_mcp.py +1 -28
  211. hud/utils/tests/test_pretty_errors.py +186 -0
  212. hud/utils/tests/test_tool_shorthand.py +154 -0
  213. hud/utils/tests/test_version.py +1 -1
  214. hud/utils/types.py +20 -0
  215. hud/version.py +1 -1
  216. hud_python-0.5.1.dist-info/METADATA +264 -0
  217. hud_python-0.5.1.dist-info/RECORD +299 -0
  218. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
  219. hud/agents/langchain.py +0 -261
  220. hud/agents/lite_llm.py +0 -72
  221. hud/cli/rl/__init__.py +0 -180
  222. hud/cli/rl/config.py +0 -101
  223. hud/cli/rl/display.py +0 -133
  224. hud/cli/rl/gpu.py +0 -63
  225. hud/cli/rl/gpu_utils.py +0 -321
  226. hud/cli/rl/local_runner.py +0 -595
  227. hud/cli/rl/presets.py +0 -96
  228. hud/cli/rl/remote_runner.py +0 -463
  229. hud/cli/rl/rl_api.py +0 -150
  230. hud/cli/rl/vllm.py +0 -177
  231. hud/cli/rl/wait_utils.py +0 -89
  232. hud/datasets/parallel.py +0 -687
  233. hud/misc/__init__.py +0 -1
  234. hud/misc/claude_plays_pokemon.py +0 -292
  235. hud/otel/__init__.py +0 -35
  236. hud/otel/collector.py +0 -142
  237. hud/otel/config.py +0 -181
  238. hud/otel/context.py +0 -570
  239. hud/otel/exporters.py +0 -369
  240. hud/otel/instrumentation.py +0 -135
  241. hud/otel/processors.py +0 -121
  242. hud/otel/tests/__init__.py +0 -1
  243. hud/otel/tests/test_processors.py +0 -197
  244. hud/rl/README.md +0 -30
  245. hud/rl/__init__.py +0 -1
  246. hud/rl/actor.py +0 -176
  247. hud/rl/buffer.py +0 -405
  248. hud/rl/chat_template.jinja +0 -101
  249. hud/rl/config.py +0 -192
  250. hud/rl/distributed.py +0 -132
  251. hud/rl/learner.py +0 -637
  252. hud/rl/tests/__init__.py +0 -1
  253. hud/rl/tests/test_learner.py +0 -186
  254. hud/rl/train.py +0 -382
  255. hud/rl/types.py +0 -101
  256. hud/rl/utils/start_vllm_server.sh +0 -30
  257. hud/rl/utils.py +0 -524
  258. hud/rl/vllm_adapter.py +0 -143
  259. hud/telemetry/job.py +0 -352
  260. hud/telemetry/replay.py +0 -74
  261. hud/telemetry/tests/test_replay.py +0 -40
  262. hud/telemetry/tests/test_trace.py +0 -63
  263. hud/telemetry/trace.py +0 -158
  264. hud/utils/agent_factories.py +0 -86
  265. hud/utils/async_utils.py +0 -65
  266. hud/utils/group_eval.py +0 -223
  267. hud/utils/progress.py +0 -149
  268. hud/utils/tasks.py +0 -127
  269. hud/utils/tests/test_async_utils.py +0 -173
  270. hud/utils/tests/test_progress.py +0 -261
  271. hud_python-0.4.45.dist-info/METADATA +0 -552
  272. hud_python-0.4.45.dist-info/RECORD +0 -228
  273. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
  274. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/cli/__init__.py CHANGED
@@ -3,32 +3,29 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
- import contextlib
7
6
  import json
8
7
  import sys
9
8
  from pathlib import Path
10
9
 
10
+ import httpx
11
11
  import typer
12
12
  from rich.console import Console
13
13
  from rich.panel import Panel
14
14
  from rich.table import Table
15
15
 
16
+ from hud.utils.hud_console import HUDConsole
17
+
16
18
  from . import list_func as list_module
17
- from .analyze import (
18
- analyze_environment,
19
- analyze_environment_from_config,
20
- analyze_environment_from_mcp_config,
21
- )
22
19
  from .build import build_command
23
20
  from .clone import clone_repository, get_clone_message, print_error, print_tutorial
24
21
  from .debug import debug_mcp_stdio
25
22
  from .dev import run_mcp_dev_server
26
-
27
- # Import new commands
28
- from .init import create_environment
23
+ from .eval import eval_command
29
24
  from .pull import pull_command
30
25
  from .push import push_command
31
26
  from .remove import remove_command
27
+ from .rft import rft_command
28
+ from .rft_status import rft_status_command
32
29
  from .utils.config import set_env_values
33
30
  from .utils.cursor import get_cursor_config_path, list_cursor_servers, parse_cursor_config
34
31
  from .utils.logging import CaptureLogger
@@ -39,6 +36,7 @@ app = typer.Typer(
39
36
  help="🚀 HUD CLI for MCP environment analysis and debugging",
40
37
  add_completion=False,
41
38
  rich_markup_mode="rich",
39
+ pretty_exceptions_enable=False, # Disable Rich's verbose tracebacks
42
40
  )
43
41
 
44
42
  console = Console()
@@ -91,15 +89,22 @@ def analyze(
91
89
  ) -> None:
92
90
  """🔍 Analyze MCP environment - discover tools, resources, and capabilities.
93
91
 
94
- By default, uses cached metadata for instant results.
92
+ [not dim]By default, uses cached metadata for instant results.
95
93
  Use --live to run the container for real-time analysis.
96
94
 
97
95
  Examples:
98
96
  hud analyze hudpython/test_init # Fast metadata inspection
99
97
  hud analyze my-env --live # Full container analysis
100
98
  hud analyze --config mcp-config.json # From MCP config
101
- hud analyze --cursor text-2048-dev # From Cursor config
99
+ hud analyze --cursor text-2048-dev # From Cursor config[/not dim]
102
100
  """
101
+ # Lazy import to avoid loading mcp_use on simple CLI commands
102
+ from .analyze import (
103
+ analyze_environment,
104
+ analyze_environment_from_config,
105
+ analyze_environment_from_mcp_config,
106
+ )
107
+
103
108
  if config:
104
109
  # Load config from JSON file (always live for configs)
105
110
  asyncio.run(analyze_environment_from_config(config, output_format, verbose))
@@ -175,7 +180,7 @@ def debug(
175
180
  ) -> None:
176
181
  """🐛 Debug MCP environment - test initialization, tools, and readiness.
177
182
 
178
- Examples:
183
+ [not dim]Examples:
179
184
  hud debug . # Debug current directory
180
185
  hud debug environments/browser # Debug specific directory
181
186
  hud debug . --build # Build then debug
@@ -183,10 +188,9 @@ def debug(
183
188
  hud debug my-mcp-server:v1 -e API_KEY=xxx
184
189
  hud debug --config mcp-config.json
185
190
  hud debug --cursor text-2048-dev
186
- hud debug . --max-phase 3 # Stop after phase 3
191
+ hud debug . --max-phase 3 # Stop after phase 3[/not dim]
187
192
  """
188
193
  # Import here to avoid circular imports
189
- from hud.utils.hud_console import HUDConsole
190
194
 
191
195
  from .utils.environment import (
192
196
  build_environment,
@@ -242,16 +246,32 @@ def debug(
242
246
  if build and not build_environment(directory, image_name):
243
247
  raise typer.Exit(1)
244
248
 
245
- # Build Docker command
246
- from .utils.docker import build_run_command
249
+ # Build Docker command with folder-mode envs
250
+ from .utils.docker import create_docker_run_command
247
251
 
248
- command = build_run_command(image_name, docker_args)
252
+ command = create_docker_run_command(
253
+ image_name, docker_args=docker_args, env_dir=directory
254
+ )
249
255
  else:
250
256
  # Assume it's an image name
251
257
  image = first_param
252
- from .utils.docker import build_run_command
258
+ from .utils.docker import create_docker_run_command
259
+
260
+ # For image mode, check if there's a .env file in current directory
261
+ # and use it if available (similar to hud dev behavior)
262
+ cwd = Path.cwd()
263
+ if (cwd / ".env").exists():
264
+ # Use create_docker_run_command to load .env from current directory
265
+ command = create_docker_run_command(
266
+ image,
267
+ docker_args=docker_args,
268
+ env_dir=cwd, # Load .env from current directory
269
+ )
270
+ else:
271
+ # No .env file, use basic command without env loading
272
+ from .utils.docker import build_run_command
253
273
 
254
- command = build_run_command(image, docker_args)
274
+ command = build_run_command(image, docker_args)
255
275
  else:
256
276
  console.print(
257
277
  "[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
@@ -269,8 +289,6 @@ def debug(
269
289
  phases_completed = asyncio.run(debug_mcp_stdio(command, logger, max_phase=max_phase))
270
290
 
271
291
  # Show summary using design system
272
- from hud.utils.hud_console import HUDConsole
273
-
274
292
  hud_console = HUDConsole()
275
293
 
276
294
  hud_console.info("") # Empty line
@@ -348,80 +366,152 @@ def version() -> None:
348
366
  console.print("HUD CLI version: [cyan]unknown[/cyan]")
349
367
 
350
368
 
369
+ @app.command()
370
+ def models(
371
+ json_output: bool = typer.Option(False, "--json", help="Output as JSON"),
372
+ ) -> None:
373
+ """📋 List available models from HUD inference gateway.
374
+
375
+ [not dim]Shows models available via the HUD inference gateway at inference.hud.ai.
376
+
377
+ Examples:
378
+ hud models # List all models
379
+ hud models --json # Output as JSON[/not dim]
380
+ """
381
+ from hud.settings import settings
382
+
383
+ try:
384
+ response = httpx.get(
385
+ f"{settings.hud_gateway_url}/models",
386
+ headers={"Authorization": f"Bearer {settings.api_key}"} if settings.api_key else {},
387
+ timeout=30.0,
388
+ )
389
+ response.raise_for_status()
390
+ data = response.json()
391
+
392
+ if json_output:
393
+ console.print_json(json.dumps(data, indent=2))
394
+ return
395
+
396
+ # Parse and display models
397
+ models_list = data.get("data", data) if isinstance(data, dict) else data
398
+
399
+ if not models_list:
400
+ console.print("[yellow]No models found[/yellow]")
401
+ return
402
+
403
+ console.print(Panel.fit("📋 [bold cyan]Available Models[/bold cyan]", border_style="cyan"))
404
+
405
+ table = Table()
406
+ table.add_column("Name", style="cyan")
407
+ table.add_column("Model (API)", style="green")
408
+ table.add_column("Routes", style="yellow")
409
+
410
+ for model in models_list:
411
+ if isinstance(model, dict):
412
+ name = model.get("name", "-")
413
+ api_model = model.get("model", model.get("id", "-"))
414
+ routes = model.get("routes", [])
415
+ routes_str = ", ".join(routes) if routes else "-"
416
+ table.add_row(name, api_model, routes_str)
417
+ else:
418
+ table.add_row(str(model), "-", "-")
419
+
420
+ console.print(table)
421
+ console.print(f"\n[dim]Gateway: {settings.hud_gateway_url}[/dim]")
422
+
423
+ except httpx.HTTPStatusError as e:
424
+ console.print(f"[red]❌ API error: {e.response.status_code}[/red]")
425
+ console.print(f"[dim]{e.response.text}[/dim]")
426
+ raise typer.Exit(1) from e
427
+ except Exception as e:
428
+ console.print(f"[red]❌ Failed to fetch models: {e}[/red]")
429
+ raise typer.Exit(1) from e
430
+
431
+
351
432
  @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
352
433
  def dev(
353
434
  params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
354
435
  None,
355
- help="Environment directory followed by optional Docker arguments (e.g., '. -e KEY=value')",
356
- ),
357
- image: str | None = typer.Option(
358
- None, "--image", "-i", help="Docker image name (overrides auto-detection)"
436
+ help="Module path or extra Docker args (when using --docker)",
359
437
  ),
360
- build: bool = typer.Option(False, "--build", "-b", help="Build image before starting"),
361
- no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild without cache"),
362
- transport: str = typer.Option(
363
- "http", "--transport", "-t", help="Transport protocol: http (default) or stdio"
438
+ docker: bool = typer.Option(
439
+ False,
440
+ "--docker",
441
+ help="Run in Docker with volume mounts for hot-reload (for complex environments)",
364
442
  ),
365
- port: int = typer.Option(8765, "--port", "-p", help="HTTP server port (ignored for stdio)"),
366
- no_reload: bool = typer.Option(False, "--no-reload", help="Disable hot-reload"),
367
- full_reload: bool = typer.Option(
443
+ stdio: bool = typer.Option(
368
444
  False,
369
- "--full-reload",
370
- help="Restart entire container on file changes (instead of just server process)",
445
+ "--stdio",
446
+ help="Use stdio transport (default: HTTP)",
371
447
  ),
372
- verbose: bool = typer.Option(False, "--verbose", "-v", help="Show server logs"),
448
+ port: int = typer.Option(8765, "--port", "-p", help="HTTP server port (ignored for stdio)"),
449
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed logs"),
373
450
  inspector: bool = typer.Option(
374
451
  False, "--inspector", help="Launch MCP Inspector (HTTP mode only)"
375
452
  ),
376
- no_logs: bool = typer.Option(False, "--no-logs", help="Disable streaming Docker logs"),
377
453
  interactive: bool = typer.Option(
378
454
  False, "--interactive", help="Launch interactive testing mode (HTTP mode only)"
379
455
  ),
456
+ watch: list[str] = typer.Option( # noqa: B008
457
+ [],
458
+ "--watch",
459
+ "-w",
460
+ help="Paths to watch for hot-reload (repeatable: -w tools -w env.py)",
461
+ ),
462
+ new: bool = typer.Option(
463
+ False,
464
+ "--new",
465
+ help="Create a new dev trace on hud.ai (opens in browser)",
466
+ ),
380
467
  ) -> None:
381
- """🔥 Development mode - interactive MCP environment.
468
+ """🔥 Development mode - run MCP server with hot-reload.
382
469
 
383
- Runs your MCP environment in Docker with mounted source for development.
384
- The container's CMD determines reload behavior.
470
+ [not dim]TWO MODES:
471
+
472
+ 1. Python Module:
473
+ hud dev # Auto-detects module
474
+ hud dev env:env # Explicit module:attribute
475
+ hud dev -w . # Watch current directory
476
+
477
+ 2. Docker (Complex environments):
478
+ hud dev # Auto-detects Dockerfile, no hot-reload
479
+ hud dev -w tools -w env.py # Mount & watch specific paths
480
+ hud dev -w tools # Just watch tools folder
481
+
482
+ For Docker mode, use --watch to specify which folders to mount and watch.
483
+ Paths not in --watch stay in the built image (no hot-reload).
385
484
 
386
485
  Examples:
387
- hud dev # Auto-detect in current directory
388
- hud dev environments/browser # Specific directory
389
- hud dev . --build # Build image first
390
- hud dev . --image custom:tag # Use specific image
391
- hud dev . --no-cache # Force clean rebuild
392
- hud dev . --verbose # Show detailed logs
393
- hud dev . --transport stdio # Use stdio proxy for multiple connections
394
- hud dev . --inspector # Launch MCP Inspector (HTTP mode only)
395
- hud dev . --interactive # Launch interactive testing mode (HTTP mode only)
396
- hud dev . --no-logs # Disable Docker log streaming
397
-
398
- # With Docker arguments (after all options):
399
- hud dev . -e BROWSER_PROVIDER=anchorbrowser -e ANCHOR_API_KEY=xxx
400
- hud dev . -e API_KEY=secret -v /tmp/data:/data --network host
401
- hud dev . --build -e DEBUG=true --memory 2g
486
+ hud dev # Auto-detect mode
487
+ hud dev --new # Create live dev trace on hud.ai
488
+ hud dev env:env # Run specific module
489
+ hud dev --inspector # Launch MCP Inspector
490
+ hud dev --interactive # Launch interactive testing mode
491
+ hud dev -w 'tools env.py' # Docker: hot-reload tools/ and env.py
492
+
493
+ Local development pattern (Docker + local scenarios):
494
+ Terminal 1: hud dev -w 'tools env.py' --port 8000
495
+ Terminal 2: python local_test.py # Uses connect_url()[/not dim]
402
496
  """
403
- # Parse directory and Docker arguments
404
- if params:
405
- directory = params[0]
406
- docker_args = params[1:] if len(params) > 1 else []
407
- else:
408
- directory = "."
409
- docker_args = []
497
+ # Extract module from params if provided (first param when not --docker)
498
+ module = params[0] if params and not docker else None
499
+ docker_args = params if docker else []
500
+
501
+ # Convert empty list to None for run_mcp_dev_server
502
+ watch_paths = watch if watch else None
410
503
 
411
504
  run_mcp_dev_server(
412
- directory,
413
- image,
414
- build,
415
- no_cache,
416
- transport,
505
+ module,
506
+ stdio,
417
507
  port,
418
- no_reload,
419
- full_reload,
420
508
  verbose,
421
509
  inspector,
422
- no_logs,
423
510
  interactive,
424
- docker_args,
511
+ watch_paths,
512
+ docker=docker,
513
+ docker_args=docker_args,
514
+ new_trace=new,
425
515
  )
426
516
 
427
517
 
@@ -429,17 +519,13 @@ def dev(
429
519
  def run(
430
520
  params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
431
521
  None,
432
- help="Python file/module/package or Docker image followed by optional arguments",
522
+ help="Docker image followed by optional Docker run arguments "
523
+ "(e.g., 'my-image:latest -e KEY=value')",
433
524
  ),
434
525
  local: bool = typer.Option(
435
526
  False,
436
527
  "--local",
437
- help="Run locally with Docker (default: remote via mcp.hud.so)",
438
- ),
439
- remote: bool = typer.Option(
440
- False,
441
- "--remote",
442
- help="Run remotely via mcp.hud.so (default)",
528
+ help="Run locally with Docker (default: remote via mcp.hud.ai)",
443
529
  ),
444
530
  transport: str = typer.Option(
445
531
  "stdio",
@@ -456,7 +542,7 @@ def run(
456
542
  url: str = typer.Option(
457
543
  None,
458
544
  "--url",
459
- help="Remote MCP server URL (default: HUD_MCP_URL or mcp.hud.so)",
545
+ help="Remote MCP server URL (default: HUD_MCP_URL or mcp.hud.ai)",
460
546
  ),
461
547
  api_key: str | None = typer.Option(
462
548
  None,
@@ -474,180 +560,54 @@ def run(
474
560
  "-v",
475
561
  help="Show detailed output",
476
562
  ),
477
- interactive: bool = typer.Option(
478
- False,
479
- "--interactive",
480
- help="Launch interactive testing mode (HTTP transport only)",
481
- ),
482
- reload: bool = typer.Option(
483
- False,
484
- "--reload",
485
- help="Enable auto-reload on file changes (local Python files only)",
486
- ),
487
- watch: list[str] = typer.Option( # noqa: B008
488
- None,
489
- "--watch",
490
- help="Directories to watch for changes (can be used multiple times). Defaults to current directory.", # noqa: E501
491
- ),
492
- cmd: str | None = typer.Option(
493
- None,
494
- "--cmd",
495
- help="Command to run as MCP server (e.g., 'python -m controller')",
496
- ),
497
563
  ) -> None:
498
- """🚀 Run MCP server.
564
+ """🚀 Run Docker image as MCP server.
499
565
 
500
- Modes:
501
- - Python (decorator-based): pass a dotted module path. Example: hud run controller
502
- The module is imported, decorators register implicitly, and the server runs.
503
- Use --reload to watch the module/package directory.
566
+ [not dim]A simple wrapper around 'docker run' that can launch images locally or remotely.
567
+ By default, runs remotely via mcp.hud.ai. Use --local to run with local Docker.
504
568
 
505
- - Command: use --cmd to run any command as an MCP server. Example: hud run --cmd "python -m controller"
506
- Works with Docker, binaries, or any executable. Supports --reload.
569
+ For local Python development with hot-reload, use 'hud dev' instead.
507
570
 
508
- - Docker image: pass a Docker image name (optionally with --local to run locally).
509
- """ # noqa: E501
510
- if not params and not cmd:
511
- typer.echo("❌ Dotted module path, Docker image, or --cmd is required")
571
+ Examples:
572
+ hud run my-image:latest # Run remotely (default)
573
+ hud run my-image:latest --local # Run with local Docker
574
+ hud run my-image:latest -e KEY=value # Remote with env vars
575
+ hud run my-image:latest --local -e KEY=val # Local with env vars
576
+ hud run my-image:latest --transport http # Use HTTP transport[/not dim]
577
+ """
578
+ if not params:
579
+ console.print("[red]❌ Docker image is required[/red]")
580
+ console.print("\nExamples:")
581
+ console.print(" hud run my-image:latest # Run remotely (default)")
582
+ console.print(" hud run my-image:latest --local # Run with local Docker")
583
+ console.print("\n[yellow]For local Python development:[/yellow]")
584
+ console.print(" hud dev # Run with hot-reload")
512
585
  raise typer.Exit(1)
513
586
 
514
- # Handle --cmd mode
515
- if cmd:
516
- import asyncio
517
-
518
- from .utils.package_runner import run_package_as_mcp
519
-
520
- asyncio.run(
521
- run_package_as_mcp(
522
- cmd, # Pass command string
523
- transport=transport,
524
- port=port,
525
- verbose=verbose,
526
- reload=reload,
527
- watch_paths=watch if watch else None,
528
- )
529
- )
530
- return
531
-
532
- first_param = params[0]
533
- extra_args = params[1:] if len(params) > 1 else []
534
-
535
- # Guard: strip accidental nested 'run' token from positional args,
536
- # which can happen with nested invocations or reload wrappers.
537
- if first_param == "run" and extra_args:
538
- first_param, extra_args = extra_args[0], extra_args[1:]
539
-
540
- # Try to interpret first_param as module[:attr] or file[:attr]
541
- target = first_param
542
- server_attr = "mcp"
543
- if ":" in target:
544
- target, server_attr = target.split(":", 1)
545
-
546
- # Only allow dotted import paths or python files for Python mode
547
- import importlib.util as _importlib_util
548
-
549
- # Ensure current working directory is importable for local packages like 'controller'
550
- try:
551
- import sys as _sys
552
- from pathlib import Path as _Path
553
-
554
- cwd_str = str(_Path.cwd())
555
- if cwd_str not in _sys.path:
556
- _sys.path.insert(0, cwd_str)
557
- except Exception: # noqa: S110
558
- pass
559
- try:
560
- # If given a file path, detect and import via file spec
561
- from pathlib import Path as _Path
562
-
563
- if target.endswith(".py") and _Path(target).exists():
564
- spec = _importlib_util.spec_from_file_location("_hud_module", target)
565
- else:
566
- spec = _importlib_util.find_spec(target)
567
- except Exception:
568
- spec = None
569
-
570
- # Fallback: treat a local package directory (e.g. 'controller') as a module target
571
- from pathlib import Path as _Path
572
-
573
- pkg_dir = _Path(target)
574
- is_pkg_dir = pkg_dir.is_dir() and (pkg_dir / "__init__.py").exists()
575
-
576
- is_python_target = (spec is not None) or is_pkg_dir
587
+ image = params[0]
588
+ docker_args = params[1:] if len(params) > 1 else []
577
589
 
578
- if is_python_target and not (local or remote):
579
- # Python file/package mode - use implicit MCP server
580
- import asyncio
590
+ # Check if user accidentally passed a module path
591
+ from pathlib import Path
581
592
 
582
- from .utils.package_runner import run_package_as_mcp, run_with_reload
583
-
584
- if reload:
585
- # Run with watchfiles reload
586
- # Use user-provided watch paths or compute from module
587
- if watch:
588
- watch_paths = watch
589
- else:
590
- # Compute a watch path that works for dotted modules as well
591
- watch_paths = [target]
592
- if spec is not None:
593
- origin = getattr(spec, "origin", None)
594
- sublocs = getattr(spec, "submodule_search_locations", None)
595
- if origin:
596
- p = _Path(origin)
597
- # If package __init__.py, watch the package directory
598
- watch_paths = [str(p.parent if p.name == "__init__.py" else p)]
599
- elif sublocs:
600
- with contextlib.suppress(Exception):
601
- watch_paths = [next(iter(sublocs))]
602
-
603
- # Always run as subprocess when using reload to enable proper file watching
604
- # This ensures the parent process can watch files while the child runs the server
605
- run_with_reload(
606
- None, # This forces subprocess mode for both stdio and http
607
- watch_paths,
608
- verbose=verbose,
609
- )
610
- else:
611
- # Run normally (but still pass reload=False for consistency)
612
- asyncio.run(
613
- run_package_as_mcp(
614
- target,
615
- transport=transport,
616
- port=port,
617
- verbose=verbose,
618
- server_attr=server_attr,
619
- reload=False, # Explicitly pass reload state
620
- watch_paths=None,
621
- )
622
- )
623
- return
624
-
625
- # Docker image mode
626
- image = first_param
627
- docker_args = extra_args
628
-
629
- # Handle conflicting flags
630
- if local and remote:
631
- typer.echo("❌ Cannot use both --local and --remote")
593
+ if not any(c in image for c in [":", "/"]) and (
594
+ Path(image).is_dir() or Path(image).is_file() or "." in image
595
+ ):
596
+ console.print(f"[yellow]⚠️ '{image}' looks like a module path, not a Docker image[/yellow]")
597
+ console.print("\n[green]For local Python development, use:[/green]")
598
+ console.print(f" hud dev {image}")
599
+ console.print("\n[green]For Docker images:[/green]")
600
+ console.print(" hud run my-image:latest")
632
601
  raise typer.Exit(1)
633
602
 
634
603
  # Default to remote if not explicitly local
635
- is_local = local and not remote
636
-
637
- # Check for interactive mode restrictions
638
- if interactive:
639
- if transport != "http":
640
- typer.echo("❌ Interactive mode requires HTTP transport (use --transport http)")
641
- raise typer.Exit(1)
642
- if not is_local:
643
- typer.echo("❌ Interactive mode is only available for local execution (use --local)")
644
- raise typer.Exit(1)
604
+ is_local = local
645
605
 
646
606
  if is_local:
647
607
  # Local Docker execution
648
608
  from .utils.runner import run_mcp_server
649
609
 
650
- run_mcp_server(image, docker_args, transport, port, verbose, interactive)
610
+ run_mcp_server(image, docker_args, transport, port, verbose, interactive=False)
651
611
  else:
652
612
  # Remote execution via proxy
653
613
  from .utils.remote_runner import run_remote_server
@@ -661,6 +621,74 @@ def run(
661
621
  run_remote_server(image, docker_args, transport, port, url, api_key, run_id, verbose)
662
622
 
663
623
 
624
+ # Create RFT subcommand app
625
+ rft_app = typer.Typer(help="🚀 Reinforcement Fine-Tuning (RFT) commands")
626
+
627
+
628
+ @rft_app.command("run")
629
+ def rft_run(
630
+ tasks_file: str = typer.Argument(
631
+ ...,
632
+ help="Path to tasks file (JSON/JSONL)",
633
+ ),
634
+ model_id: str | None = typer.Option(
635
+ None,
636
+ "--model-id",
637
+ "-m",
638
+ help="Model ID to train (skip interactive selection)",
639
+ ),
640
+ reasoning_effort: str = typer.Option(
641
+ "medium",
642
+ "--reasoning-effort",
643
+ help="Reasoning effort level (low, medium, high)",
644
+ ),
645
+ verbose: bool = typer.Option(
646
+ False,
647
+ "--verbose",
648
+ "-v",
649
+ help="Enable verbose output",
650
+ ),
651
+ yes: bool = typer.Option(
652
+ False,
653
+ "--yes",
654
+ "-y",
655
+ help="Auto-accept all prompts",
656
+ ),
657
+ ) -> None:
658
+ """Launch an RFT training job."""
659
+ rft_command(
660
+ tasks_file=tasks_file,
661
+ reasoning_effort=reasoning_effort,
662
+ verbose=verbose,
663
+ yes=yes,
664
+ model_id=model_id,
665
+ )
666
+
667
+
668
+ @rft_app.command("status")
669
+ def rft_status(
670
+ model_id: str = typer.Argument(
671
+ ...,
672
+ help="Model ID or job ID to check status for",
673
+ ),
674
+ verbose: bool = typer.Option(
675
+ False,
676
+ "--verbose",
677
+ "-v",
678
+ help="Show full status details",
679
+ ),
680
+ ) -> None:
681
+ """Check the status of an RFT job."""
682
+ rft_status_command(
683
+ model_id=model_id,
684
+ verbose=verbose,
685
+ )
686
+
687
+
688
+ # Add RFT app as a command group
689
+ app.add_typer(rft_app, name="rft")
690
+
691
+
664
692
  @app.command()
665
693
  def clone(
666
694
  url: str = typer.Argument(
@@ -670,7 +698,7 @@ def clone(
670
698
  ) -> None:
671
699
  """🚀 Clone a git repository quietly with a pretty output.
672
700
 
673
- This command wraps 'git clone' with the --quiet flag and displays
701
+ [not dim]This command wraps 'git clone' with the --quiet flag and displays
674
702
  a rich formatted success message. If the repository contains a clone
675
703
  message in pyproject.toml, it will be displayed as a tutorial.
676
704
 
@@ -685,7 +713,7 @@ def clone(
685
713
  # style = "cyan"
686
714
 
687
715
  Examples:
688
- hud clone https://github.com/user/repo.git
716
+ hud clone https://github.com/user/repo.git[/not dim]
689
717
  """
690
718
  # Run the clone
691
719
  success, result = clone_repository(url)
@@ -713,10 +741,13 @@ def build(
713
741
  platform: str | None = typer.Option(
714
742
  None, "--platform", help="Set Docker target platform (e.g., linux/amd64)"
715
743
  ),
744
+ remote_cache: str | None = typer.Option(
745
+ None, "--remote-cache", help="Enable remote cache using Amazon ECR with specified repo name"
746
+ ),
716
747
  ) -> None:
717
748
  """🏗️ Build a HUD environment and generate lock file.
718
749
 
719
- This command:
750
+ [not dim]This command:
720
751
  - Builds a Docker image from your environment
721
752
  - Analyzes the MCP server to extract metadata
722
753
  - Generates a hud.lock.yaml file for reproducibility
@@ -726,7 +757,8 @@ def build(
726
757
  hud build environments/text_2048 -e API_KEY=secret
727
758
  hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
728
759
  hud build . --no-cache # Force rebuild
729
- """
760
+ hud build . --remote-cache my-cache-repo # Use ECR remote cache (requires AWS_ACCOUNT_ID and AWS_DEFAULT_REGION)[/not dim]
761
+ """ # noqa: E501
730
762
  # Parse directory and extra arguments
731
763
  if params:
732
764
  directory = params[0]
@@ -763,7 +795,7 @@ def build(
763
795
  else:
764
796
  i += 1
765
797
 
766
- build_command(directory, tag, no_cache, verbose, env_vars, platform)
798
+ build_command(directory, tag, no_cache, verbose, env_vars, platform, remote_cache)
767
799
 
768
800
 
769
801
  @app.command()
@@ -781,14 +813,14 @@ def push(
781
813
  ) -> None:
782
814
  """📤 Push HUD environment to registry.
783
815
 
784
- Reads hud.lock.yaml from the directory and pushes to registry.
816
+ [not dim]Reads hud.lock.yaml from the directory and pushes to registry.
785
817
  Auto-detects your Docker username if --image not specified.
786
818
 
787
819
  Examples:
788
820
  hud push # Push with auto-detected name
789
821
  hud push --tag v1.0 # Push with specific tag
790
822
  hud push . --image myuser/myenv:v1.0
791
- hud push --yes # Skip confirmation
823
+ hud push --yes # Skip confirmation[/not dim]
792
824
  """
793
825
  push_command(directory, image, tag, sign, yes, verbose)
794
826
 
@@ -807,12 +839,12 @@ def pull(
807
839
  ) -> None:
808
840
  """📥 Pull HUD environment from registry with metadata preview.
809
841
 
810
- Shows environment details before downloading.
842
+ [not dim]Shows environment details before downloading.
811
843
 
812
844
  Examples:
813
845
  hud pull hud.lock.yaml # Pull from lock file
814
846
  hud pull myuser/myenv:latest # Pull by image reference
815
- hud pull myuser/myenv --verify-only # Check metadata only
847
+ hud pull myuser/myenv --verify-only # Check metadata only[/not dim]
816
848
  """
817
849
  pull_command(target, lock_file, yes, verify_only, verbose)
818
850
 
@@ -828,14 +860,14 @@ def list_environments(
828
860
  ) -> None:
829
861
  """📋 List all HUD environments in local registry.
830
862
 
831
- Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
863
+ [not dim]Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
832
864
 
833
865
  Examples:
834
866
  hud list # List all environments
835
867
  hud list --filter text # Filter by name
836
868
  hud list --json # Output as JSON
837
869
  hud list --all # Show digest column
838
- hud list --verbose # Show full descriptions
870
+ hud list --verbose # Show full descriptions[/not dim]
839
871
  """
840
872
  list_module.list_command(filter_name, json_output, show_all, verbose)
841
873
 
@@ -850,7 +882,7 @@ def remove(
850
882
  ) -> None:
851
883
  """🗑️ Remove HUD environments from local registry.
852
884
 
853
- Removes environment metadata from ~/.hud/envs/
885
+ [not dim]Removes environment metadata from ~/.hud/envs/
854
886
  Note: This does not remove the Docker images.
855
887
 
856
888
  Examples:
@@ -858,37 +890,44 @@ def remove(
858
890
  hud remove text_2048 # Remove by name
859
891
  hud remove hudpython/test_init # Remove by full name
860
892
  hud remove all # Remove all environments
861
- hud remove all --yes # Remove all without confirmation
893
+ hud remove all --yes # Remove all without confirmation[/not dim]
862
894
  """
863
895
  remove_command(target, yes, verbose)
864
896
 
865
897
 
866
898
  @app.command()
867
899
  def init(
868
- name: str = typer.Argument(None, help="Environment name (default: current directory name)"),
900
+ name: str = typer.Argument(None, help="Environment name (default: directory name)"),
901
+ directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
902
+ force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
869
903
  preset: str | None = typer.Option(
870
904
  None,
871
905
  "--preset",
872
906
  "-p",
873
- help="Preset to use: blank, deep-research, browser. If omitted, you'll choose interactively.", # noqa: E501
907
+ help="Download a preset: blank, deep-research, browser, rubrics",
874
908
  ),
875
- directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
876
- force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
877
909
  ) -> None:
878
- """🚀 Initialize a new HUD environment with minimal boilerplate.
910
+ """🚀 Initialize a HUD environment.
911
+
912
+ [not dim]• Empty directory: Choose a preset interactively
913
+ • Existing project: Add Dockerfile.hud and hud.py
879
914
 
880
- Creates a working MCP environment with:
881
- - Dockerfile for containerization
882
- - pyproject.toml for dependencies
883
- - Minimal MCP server with context
884
- - Required setup/evaluate tools
915
+ Use --preset to skip selection and download a specific template.
885
916
 
886
917
  Examples:
887
- hud init # Use current directory name
888
- hud init my-env # Create in ./my-env/
889
- hud init my-env --dir /tmp # Create in /tmp/my-env/
918
+ hud init # Auto-detect mode
919
+ hud init my-env # Initialize with custom name
920
+ hud init --preset browser # Download browser preset[/not dim]
921
+
890
922
  """
891
- create_environment(name, directory, force, preset)
923
+ if preset:
924
+ from hud.cli.init import create_environment
925
+
926
+ create_environment(name, directory, force, preset)
927
+ else:
928
+ from hud.cli.flows.init import smart_init
929
+
930
+ smart_init(name, directory, force)
892
931
 
893
932
 
894
933
  @app.command()
@@ -900,200 +939,7 @@ def quickstart() -> None:
900
939
  clone("https://github.com/hud-evals/quickstart.git")
901
940
 
902
941
 
903
- @app.command()
904
- def eval(
905
- source: str | None = typer.Argument(
906
- None,
907
- help=(
908
- "HuggingFace dataset (e.g. 'hud-evals/SheetBench-50') or task JSON file. "
909
- "If not provided, looks for task.json in current directory."
910
- ),
911
- ),
912
- agent: str | None = typer.Argument(
913
- None,
914
- help=(
915
- "Agent backend to use (claude, openai, vllm, or litellm). If not provided, will prompt interactively." # noqa: E501
916
- ),
917
- ),
918
- full: bool = typer.Option(
919
- False,
920
- "--full",
921
- help="Run the entire dataset (omit for single-task debug mode)",
922
- ),
923
- model: str | None = typer.Option(
924
- None,
925
- "--model",
926
- help="Model name for the chosen agent",
927
- ),
928
- allowed_tools: str | None = typer.Option(
929
- None,
930
- "--allowed-tools",
931
- help="Comma-separated list of allowed tools",
932
- ),
933
- max_concurrent: int = typer.Option(
934
- 50,
935
- "--max-concurrent",
936
- help="Max concurrent tasks (prevents rate limits in both asyncio and parallel modes)",
937
- ),
938
- max_steps: int = typer.Option(
939
- 30,
940
- "--max-steps",
941
- help="Maximum steps per task (default: 10 for single, 50 for full)",
942
- ),
943
- parallel: bool = typer.Option(
944
- False,
945
- "--parallel",
946
- help="Use process-based parallel execution for large datasets (100+ tasks)",
947
- ),
948
- max_workers: int | None = typer.Option(
949
- None,
950
- "--max-workers",
951
- help="Number of worker processes for parallel mode (auto-optimized if not set)",
952
- ),
953
- max_concurrent_per_worker: int = typer.Option(
954
- 20,
955
- "--max-concurrent-per-worker",
956
- help="Maximum concurrent tasks per worker in parallel mode",
957
- ),
958
- verbose: bool = typer.Option(
959
- False,
960
- "--verbose",
961
- help="Enable verbose output from the agent",
962
- ),
963
- very_verbose: bool = typer.Option(
964
- False,
965
- "--very-verbose",
966
- "-vv",
967
- help="Enable debug-level logs for maximum visibility",
968
- ),
969
- vllm_base_url: str | None = typer.Option(
970
- None,
971
- "--vllm-base-url",
972
- help="Base URL for vLLM server (when using --agent vllm)",
973
- ),
974
- group_size: int = typer.Option(
975
- 1,
976
- "--group-size",
977
- help="Number of times to run each task (similar to RL training)",
978
- ),
979
- integration_test: bool = typer.Option(
980
- False,
981
- "--integration-test",
982
- help=(
983
- "Run integration_test_tool, where problem is setup, "
984
- "actions are applied, and evaluation is performed, without "
985
- "spinning up an agent"
986
- ),
987
- ),
988
- ) -> None:
989
- """🚀 Run evaluation on datasets or individual tasks with agents."""
990
- from hud.settings import settings
991
- from hud.utils.hud_console import HUDConsole
992
-
993
- hud_console = HUDConsole()
994
-
995
- if integration_test:
996
- agent = "integration_test"
997
-
998
- # If no source provided, reuse RL helper to find a tasks file interactively
999
- if source is None:
1000
- try:
1001
- from hud.cli.utils.tasks import find_tasks_file
1002
-
1003
- source = find_tasks_file(None, msg="Select a tasks file to run")
1004
- hud_console.success(f"Selected: {source}")
1005
- except Exception as e:
1006
- hud_console.error(
1007
- "No source provided and no task/eval JSON files found in current directory"
1008
- )
1009
- hud_console.info(
1010
- "Usage: hud eval <source> or create a task JSON file (e.g., task.json, tasks.jsonl)"
1011
- )
1012
- raise typer.Exit(1) from e
1013
-
1014
- # Import eval_command lazily to avoid importing agent dependencies
1015
- try:
1016
- from .eval import eval_command, get_available_models
1017
- except ImportError as e:
1018
- hud_console.error(
1019
- "Evaluation dependencies are not installed. "
1020
- "Please install with: pip install 'hud-python[agent]'"
1021
- )
1022
- raise typer.Exit(1) from e
1023
-
1024
- # If no agent specified, fetch available models and prompt for selection
1025
- base_model = None
1026
- if agent is None:
1027
- # Get available HUD models first
1028
- hud_models = get_available_models()
1029
-
1030
- # Build choices starting with HUD models
1031
- choices = []
1032
-
1033
- # Add HUD models as agent choices
1034
- for hud_model in hud_models:
1035
- model_name = hud_model["name"]
1036
- base_model = hud_model["base_model"]
1037
- vllm_status = " ⚡" if hud_model.get("vllm_url") else ""
1038
- choices.append({"name": f"{model_name}{vllm_status}", "value": f"{model_name}"})
1039
-
1040
- # Add standard agent choices
1041
- choices.extend(
1042
- [
1043
- {"name": "Claude 4 Sonnet", "value": "claude"},
1044
- {"name": "OpenAI Computer Use", "value": "openai"},
1045
- {"name": "vLLM (Local Server)", "value": "vllm"},
1046
- {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
1047
- ]
1048
- )
1049
-
1050
- agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
1051
-
1052
- # Handle HUD model selection
1053
- if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]:
1054
- # Find remote model name
1055
- model = agent
1056
- if not vllm_base_url:
1057
- vllm_base_url = f"{settings.hud_rl_url}/models/{model}/vllm"
1058
-
1059
- # Set model to base model for the vllm endpoint
1060
- if not base_model:
1061
- hud_models = get_available_models()
1062
- for hud_model in hud_models:
1063
- if hud_model["name"] == model:
1064
- base_model = hud_model["base_model"]
1065
- break
1066
- if not base_model:
1067
- hud_console.error(f"Model {model} not found")
1068
- raise typer.Exit(1)
1069
- model = base_model
1070
- agent = "vllm" # Use vLLM backend for HUD models
1071
- hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
1072
-
1073
- # Validate agent choice
1074
- valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"]
1075
- if agent not in valid_agents:
1076
- hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
1077
- raise typer.Exit(1)
1078
-
1079
- # Run the command
1080
- eval_command(
1081
- source=source,
1082
- full=full,
1083
- agent=agent, # type: ignore
1084
- model=model,
1085
- allowed_tools=allowed_tools,
1086
- max_concurrent=max_concurrent,
1087
- max_steps=max_steps,
1088
- parallel=parallel,
1089
- max_workers=max_workers,
1090
- max_concurrent_per_worker=max_concurrent_per_worker,
1091
- verbose=verbose,
1092
- very_verbose=very_verbose,
1093
- vllm_base_url=vllm_base_url,
1094
- group_size=group_size,
1095
- integration_test=integration_test,
1096
- )
942
+ app.command(name="eval")(eval_command)
1097
943
 
1098
944
 
1099
945
  @app.command()
@@ -1130,97 +976,161 @@ def get(
1130
976
 
1131
977
 
1132
978
  @app.command()
1133
- def rl(
1134
- tasks_file: str | None = typer.Argument(
1135
- None,
1136
- help=(
1137
- "Path to tasks file (JSON/JSONL) or HuggingFace dataset name. "
1138
- "If not provided, looks for tasks.json or tasks.jsonl in current directory."
1139
- ),
1140
- ),
1141
- model: str | None = typer.Argument(
1142
- None,
1143
- help="Model to train from https://hud.so/models (default: interactive selection)",
979
+ def convert(
980
+ tasks_file: str = typer.Argument(
981
+ ..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
1144
982
  ),
1145
- config_file: Path | None = typer.Option( # noqa: B008
1146
- None,
1147
- "--config",
1148
- "-c",
1149
- help="Path to existing configuration file",
1150
- ),
1151
- output_dir: str = typer.Option(
1152
- "checkpoints",
1153
- "--output-dir",
1154
- "-o",
1155
- help="Output directory for checkpoints",
1156
- ),
1157
- restart: bool = typer.Option(
1158
- False,
1159
- "--restart",
1160
- help="Restart the vLLM server before training",
1161
- ),
1162
- verbose: bool = typer.Option(
1163
- False,
1164
- "--verbose",
1165
- "-v",
1166
- help="Enable verbose output",
1167
- ),
1168
- local: bool = typer.Option(
1169
- False,
1170
- "--local",
1171
- help="Run training locally instead of using remote API server",
1172
- ),
1173
- no_ddp: bool = typer.Option(
1174
- False,
1175
- "--no-ddp",
1176
- help="Disable DDP even with multiple GPUs",
1177
- ),
1178
- ddp_gpus: str | None = typer.Option(
1179
- None,
1180
- "--ddp-gpus",
1181
- help="Specific GPUs for DDP (e.g., '0,1,2,3')",
1182
- ),
1183
- yes: bool = typer.Option(
1184
- False,
1185
- "--yes",
1186
- "-y",
1187
- help="Auto-accept all prompts and use defaults (lazy mode)",
1188
- ),
1189
- vllm_gpu: int | None = typer.Option(
1190
- None,
1191
- "--vllm-gpu",
1192
- help="Specific GPU for vLLM server",
983
+ ) -> None:
984
+ """Convert local MCP task configs to remote (mcp.hud.ai) format.
985
+
986
+ This mirrors the implicit conversion flow used by 'hud rl' and writes a new
987
+ remote_<name>.json next to the source file when needed.
988
+ """
989
+ from pathlib import Path
990
+
991
+ hud_console = HUDConsole()
992
+
993
+ try:
994
+ from .flows.tasks import convert_tasks_to_remote
995
+
996
+ result_path = convert_tasks_to_remote(tasks_file)
997
+
998
+ # If nothing changed, inform the user
999
+ try:
1000
+ if Path(result_path).resolve() == Path(tasks_file).resolve():
1001
+ hud_console.success(
1002
+ "Tasks already reference remote MCP URLs. No conversion needed."
1003
+ )
1004
+ hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
1005
+ return
1006
+ except Exception as e:
1007
+ # Best effort; continue with success message
1008
+ hud_console.debug(f"Path comparison failed, continuing: {e}")
1009
+
1010
+ hud_console.success(f"Converted tasks written to: {result_path}")
1011
+ hud_console.hint(
1012
+ "You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
1013
+ )
1014
+ except typer.Exit:
1015
+ raise
1016
+ except Exception as e:
1017
+ hud_console.error(f"Failed to convert tasks: {e}")
1018
+ raise typer.Exit(1) from e
1019
+
1020
+
1021
+ @app.command()
1022
+ def cancel(
1023
+ job_id: str | None = typer.Argument(
1024
+ None, help="Job ID to cancel. Omit to cancel all active jobs with --all."
1193
1025
  ),
1194
- vllm_gpu_count: int = typer.Option(
1195
- 1,
1196
- "--vllm-gpu-count",
1197
- help="Number of GPUs for vLLM server",
1026
+ task_id: str | None = typer.Option(
1027
+ None, "--task", "-t", help="Specific task ID within the job to cancel."
1198
1028
  ),
1199
- skip_vllm_startup: bool = typer.Option(
1200
- False,
1201
- "--skip-vllm-startup",
1202
- help="Skip the vLLM server startup",
1029
+ all_jobs: bool = typer.Option(
1030
+ False, "--all", "-a", help="Cancel ALL active jobs for your account (panic button)."
1203
1031
  ),
1032
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt."),
1204
1033
  ) -> None:
1205
- """🎯 Run GRPO reinforcement learning training on tasks."""
1206
- # Import from the rl module
1207
- from .rl import rl_command
1034
+ """Cancel remote rollouts.
1208
1035
 
1209
- rl_command(
1210
- tasks_file=tasks_file,
1211
- model=model,
1212
- config_file=config_file,
1213
- output_dir=output_dir,
1214
- restart=restart,
1215
- verbose=verbose,
1216
- local=local,
1217
- no_ddp=no_ddp,
1218
- ddp_gpus=ddp_gpus,
1219
- vllm_gpu=vllm_gpu,
1220
- vllm_gpu_count=vllm_gpu_count,
1221
- yes=yes,
1222
- skip_vllm_startup=skip_vllm_startup,
1223
- )
1036
+ Examples:
1037
+ hud cancel <job_id> # Cancel all tasks in a job
1038
+ hud cancel <job_id> --task <id> # Cancel specific task
1039
+ hud cancel --all # Cancel ALL active jobs (panic button)
1040
+ """
1041
+ import asyncio
1042
+
1043
+ import questionary
1044
+
1045
+ hud_console = HUDConsole()
1046
+
1047
+ if not job_id and not all_jobs:
1048
+ hud_console.error("Provide a job_id or use --all to cancel all active jobs.")
1049
+ raise typer.Exit(1)
1050
+
1051
+ if job_id and all_jobs:
1052
+ hud_console.error("Cannot specify both job_id and --all.")
1053
+ raise typer.Exit(1)
1054
+
1055
+ # Handle confirmations BEFORE entering async context (questionary uses asyncio internally)
1056
+ if (
1057
+ all_jobs
1058
+ and not yes
1059
+ and not questionary.confirm(
1060
+ "⚠️ This will cancel ALL your active jobs. Continue?",
1061
+ default=False,
1062
+ ).ask()
1063
+ ):
1064
+ hud_console.info("Cancelled.")
1065
+ raise typer.Exit(0)
1066
+
1067
+ if (
1068
+ job_id
1069
+ and not task_id
1070
+ and not yes
1071
+ and not questionary.confirm(
1072
+ f"Cancel all tasks in job {job_id}?",
1073
+ default=True,
1074
+ ).ask()
1075
+ ):
1076
+ hud_console.info("Cancelled.")
1077
+ raise typer.Exit(0)
1078
+
1079
+ async def _cancel() -> None:
1080
+ from hud.datasets.utils import cancel_all_jobs, cancel_job, cancel_task
1081
+
1082
+ if all_jobs:
1083
+ hud_console.info("Cancelling all active jobs...")
1084
+ result = await cancel_all_jobs()
1085
+
1086
+ jobs_cancelled = result.get("jobs_cancelled", 0)
1087
+ tasks_cancelled = result.get("total_tasks_cancelled", 0)
1088
+
1089
+ if jobs_cancelled == 0:
1090
+ hud_console.info("No active jobs found.")
1091
+ else:
1092
+ hud_console.success(
1093
+ f"Cancelled {jobs_cancelled} job(s), {tasks_cancelled} task(s) total."
1094
+ )
1095
+ for job in result.get("job_details", []):
1096
+ hud_console.info(f" • {job['job_id']}: {job['cancelled']} tasks cancelled")
1097
+
1098
+ elif task_id:
1099
+ hud_console.info(f"Cancelling task {task_id} in job {job_id}...")
1100
+ result = await cancel_task(job_id, task_id) # type: ignore[arg-type]
1101
+
1102
+ status = result.get("status", "unknown")
1103
+ if status in ("revoked", "terminated"):
1104
+ hud_console.success(f"Task cancelled: {result.get('message', '')}")
1105
+ elif status == "not_found":
1106
+ hud_console.warning(f"Task not found: {result.get('message', '')}")
1107
+ else:
1108
+ hud_console.info(f"Status: {status} - {result.get('message', '')}")
1109
+
1110
+ else:
1111
+ hud_console.info(f"Cancelling job {job_id}...")
1112
+ result = await cancel_job(job_id) # type: ignore[arg-type]
1113
+
1114
+ total = result.get("total_found", 0)
1115
+ cancelled = result.get("cancelled", 0)
1116
+
1117
+ if total == 0:
1118
+ hud_console.warning(f"No tasks found for job {job_id}")
1119
+ else:
1120
+ hud_console.success(
1121
+ f"Cancelled {cancelled}/{total} tasks "
1122
+ f"({result.get('running_terminated', 0)} running, "
1123
+ f"{result.get('queued_revoked', 0)} queued)"
1124
+ )
1125
+
1126
+ try:
1127
+ asyncio.run(_cancel())
1128
+ except httpx.HTTPStatusError as e:
1129
+ hud_console.error(f"API error: {e.response.status_code} - {e.response.text}")
1130
+ raise typer.Exit(1) from e
1131
+ except Exception as e:
1132
+ hud_console.error(f"Failed to cancel: {e}")
1133
+ raise typer.Exit(1) from e
1224
1134
 
1225
1135
 
1226
1136
  @app.command()
@@ -1231,13 +1141,12 @@ def set(
1231
1141
  ) -> None:
1232
1142
  """Persist API keys or other variables for HUD to use by default.
1233
1143
 
1234
- Examples:
1144
+ [not dim]Examples:
1235
1145
  hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
1236
1146
 
1237
1147
  Values are stored in ~/.hud/.env and are loaded by hud.settings with
1238
- the lowest precedence (overridden by process env and project .env).
1148
+ the lowest precedence (overridden by process env and project .env).[/not dim]
1239
1149
  """
1240
- from hud.utils.hud_console import HUDConsole
1241
1150
 
1242
1151
  hud_console = HUDConsole()
1243
1152
 
@@ -1261,6 +1170,13 @@ def set(
1261
1170
 
1262
1171
  def main() -> None:
1263
1172
  """Main entry point for the CLI."""
1173
+ # Check for updates (including on --version command)
1174
+ # Skip only on help-only commands
1175
+ if not (len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"])):
1176
+ from .utils.version_check import display_update_prompt
1177
+
1178
+ display_update_prompt()
1179
+
1264
1180
  # Handle --version flag before Typer parses args
1265
1181
  if "--version" in sys.argv:
1266
1182
  try: