hud-python 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hud/agents/__init__.py CHANGED
@@ -8,7 +8,12 @@ from __future__ import annotations
8
8
  from typing import TYPE_CHECKING, Any, cast
9
9
 
10
10
  from hud.types import AgentType
11
- from hud.utils.gateway import build_gateway_client, list_gateway_models
11
+ from hud.utils.gateway import (
12
+ build_gateway_client,
13
+ gateway_model_aliases,
14
+ list_gateway_models,
15
+ normalize_gateway_model_id,
16
+ )
12
17
 
13
18
  if TYPE_CHECKING:
14
19
  from typing import TypeAlias
@@ -27,6 +32,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
27
32
 
28
33
  For direct API access with provider API keys, instantiate the agent classes directly.
29
34
  """
35
+ requested_model = model
36
+ model = normalize_gateway_model_id(model)
30
37
  agent_type = next((candidate for candidate in AgentType if candidate.value == model), None)
31
38
  if agent_type is not None:
32
39
  model_id = model
@@ -73,7 +80,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
73
80
  for n in (gm.id, gm.name, gm.model_name)
74
81
  if isinstance(n, str)
75
82
  ]
76
- near = difflib.get_close_matches(model, known, n=3, cutoff=0.5)
83
+ known.extend(gateway_model_aliases())
84
+ near = difflib.get_close_matches(requested_model, known, n=3, cutoff=0.5)
77
85
  hint = (
78
86
  f" Did you mean: {', '.join(near)}?"
79
87
  if near
@@ -84,7 +92,7 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
84
92
  if gateway_models
85
93
  else "the HUD gateway registry (empty — is HUD_API_KEY set?)"
86
94
  )
87
- raise ValueError(f"Model {model!r} not found in {source}.{hint}")
95
+ raise ValueError(f"Model {requested_model!r} not found in {source}.{hint}")
88
96
 
89
97
  kwargs.setdefault("model", model_id)
90
98
  kwargs.setdefault("model_client", build_gateway_client(provider_name))
@@ -193,16 +193,27 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
193
193
  sample: Sample | None = None
194
194
  if return_token_ids:
195
195
  prompt_token_ids = getattr(choice, "prompt_token_ids", None)
196
+ # Multimodal prompt (text + image chunks): the only prompt representation
197
+ # that survives image inputs; flat prompt_token_ids is null in that case.
198
+ prompt_chunks = getattr(choice, "prompt_chunks", None)
196
199
  token_ids = getattr(choice, "token_ids", None)
197
- if prompt_token_ids is not None and token_ids is not None:
198
- chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
199
- chat_state.continuation_message_count = len(messages)
200
+ has_prompt = prompt_token_ids is not None or prompt_chunks is not None
201
+ if token_ids is not None and has_prompt:
200
202
  content_lp = choice.logprobs.content if choice.logprobs else None
201
203
  sample = Sample(
202
- prompt_token_ids=list(prompt_token_ids),
204
+ prompt_token_ids=list(prompt_token_ids) if prompt_token_ids is not None else [],
205
+ prompt_chunks=list(prompt_chunks) if prompt_chunks is not None else None,
203
206
  output_token_ids=list(token_ids),
204
207
  output_logprobs=[tok.logprob for tok in content_lp] if content_lp else [],
205
208
  )
209
+ # KV-cache continuation only applies to flat text prompts; clear any
210
+ # stale state when the gateway returns chunks-only (multimodal turn).
211
+ if prompt_token_ids is not None:
212
+ chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
213
+ chat_state.continuation_message_count = len(messages)
214
+ else:
215
+ chat_state.continuation_token_ids = None
216
+ chat_state.continuation_message_count = None
206
217
 
207
218
  tool_calls: list[MCPToolCall] = []
208
219
  for tc in function_calls:
@@ -108,7 +108,7 @@ def test_create_agent_resolves_gateway_model_metadata(
108
108
 
109
109
  model = GatewayModelInfo(
110
110
  id="ft:custom-123",
111
- model_name="gpt-5.4",
111
+ model_name="gpt-5.5",
112
112
  sdk_agent_type="openai_compatible",
113
113
  provider=GatewayProviderInfo(name="openai"),
114
114
  )
@@ -122,4 +122,40 @@ def test_create_agent_resolves_gateway_model_metadata(
122
122
  agent = create_agent("ft:custom-123")
123
123
 
124
124
  assert isinstance(agent, OpenAIChatAgent)
125
- assert agent.config.model == "gpt-5.4" # resolved to the model's real name
125
+ assert agent.config.model == "gpt-5.5" # resolved to the model's real name
126
+
127
+
128
+ @pytest.mark.parametrize(
129
+ ("alias", "canonical"),
130
+ [
131
+ ("deepseek-v4", "deepseek/deepseek-v4-pro"),
132
+ ("deepseek-v4-flash", "deepseek/deepseek-v4-flash"),
133
+ ("glm-5.2", "z-ai/glm-5.2"),
134
+ ("kimi-k2.6", "moonshotai/kimi-k2.6"),
135
+ ("minimax-m3", "MiniMax-M3"),
136
+ ],
137
+ )
138
+ def test_create_agent_accepts_gateway_model_aliases(
139
+ alias: str,
140
+ canonical: str,
141
+ monkeypatch: pytest.MonkeyPatch,
142
+ ) -> None:
143
+ from hud.utils.gateway import GatewayModelInfo, GatewayProviderInfo
144
+
145
+ model = GatewayModelInfo(
146
+ id=canonical,
147
+ model_name=canonical,
148
+ sdk_agent_type="openai_compatible",
149
+ provider=GatewayProviderInfo(name="openai"),
150
+ )
151
+ monkeypatch.setattr("hud.agents.list_gateway_models", lambda: [model])
152
+
153
+ def _build_client(_provider: str) -> object:
154
+ return object()
155
+
156
+ monkeypatch.setattr("hud.agents.build_gateway_client", _build_client)
157
+
158
+ agent = create_agent(alias)
159
+
160
+ assert isinstance(agent, OpenAIChatAgent)
161
+ assert agent.config.model == canonical
@@ -102,7 +102,7 @@ def _commands(tool: Any) -> list[str]:
102
102
 
103
103
 
104
104
  async def test_openai_shell_wraps_command_with_timeout() -> None:
105
- tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
105
+ tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
106
106
 
107
107
  result = await tool.execute({"commands": ["pwd"], "timeout_ms": 2500})
108
108
 
@@ -114,7 +114,7 @@ async def test_openai_shell_wraps_command_with_timeout() -> None:
114
114
 
115
115
 
116
116
  async def test_openai_shell_runs_each_command_without_timeout() -> None:
117
- tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
117
+ tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
118
118
 
119
119
  await tool.execute({"commands": ["echo a", "echo b"]})
120
120
 
@@ -122,7 +122,7 @@ async def test_openai_shell_runs_each_command_without_timeout() -> None:
122
122
 
123
123
 
124
124
  async def test_openai_shell_rejects_non_list_commands_without_running() -> None:
125
- tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
125
+ tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
126
126
 
127
127
  result = await tool.execute({"commands": 123})
128
128
 
@@ -131,7 +131,7 @@ async def test_openai_shell_rejects_non_list_commands_without_running() -> None:
131
131
 
132
132
 
133
133
  def test_openai_shell_to_params_is_shell_type() -> None:
134
- tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
134
+ tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
135
135
  assert tool.to_params()["type"] == "shell"
136
136
 
137
137
 
hud/agents/types.py CHANGED
@@ -99,7 +99,7 @@ class OpenAIConfig(AgentConfig):
99
99
  """Configuration for OpenAIAgent."""
100
100
 
101
101
  model_name: str = "OpenAI"
102
- model: str = Field(default="gpt-5.4", validation_alias=_model_alias)
102
+ model: str = Field(default="gpt-5.5", validation_alias=_model_alias)
103
103
  max_output_tokens: int | None = None
104
104
  temperature: float | None = None
105
105
  reasoning: Any = None # openai Reasoning
@@ -113,7 +113,7 @@ class OpenAIChatConfig(AgentConfig):
113
113
  """Configuration for OpenAIChatAgent."""
114
114
 
115
115
  model_name: str = "OpenAI Chat"
116
- model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
116
+ model: str = Field(default="gpt-5.4-mini", validation_alias=_model_alias)
117
117
  checkpoint: str | None = Field(
118
118
  default=None,
119
119
  description="Specific checkpoint name for inference routing. "
@@ -139,7 +139,7 @@ class ClaudeSDKConfig(AgentConfig):
139
139
  """
140
140
 
141
141
  model_name: str = "Claude Code"
142
- model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
142
+ model: str = Field(default="claude-sonnet-4-6", validation_alias=_model_alias)
143
143
  permission_mode: str = "bypassPermissions"
144
144
  max_steps: int = -1
145
145
  allowed_tools: list[str] = Field(
@@ -222,6 +222,10 @@ class Sample(BaseModel):
222
222
  """
223
223
 
224
224
  prompt_token_ids: list[int] = Field(default_factory=list[int])
225
+ # Multimodal prompt as serialized ``ModelInput`` chunks (text + image), set by
226
+ # vision rollouts where the prompt is not a flat token list. When present it is
227
+ # the authoritative prompt for training; ``prompt_token_ids`` stays empty.
228
+ prompt_chunks: list[dict[str, Any]] | None = None
225
229
  output_token_ids: list[int] = Field(default_factory=list[int])
226
230
  output_logprobs: list[float] = Field(default_factory=list[float])
227
231
 
hud/cli/__init__.py CHANGED
@@ -35,11 +35,13 @@ from .client import client_app # noqa: E402
35
35
  from .deploy import deploy_command # noqa: E402
36
36
  from .eval import eval_command # noqa: E402
37
37
  from .init import init_command # noqa: E402
38
+ from .jobs import jobs_app # noqa: E402
38
39
  from .login import login_command # noqa: E402
39
40
  from .models import models_app # noqa: E402
40
41
  from .serve import serve_command # noqa: E402
41
42
  from .sync import sync_app # noqa: E402
42
43
  from .task import task_app # noqa: E402
44
+ from .trace import trace_app # noqa: E402
43
45
 
44
46
  app.command(name="serve")(serve_command)
45
47
  app.command(name="dev", deprecated=True, hidden=True)(serve_command) # alias for now
@@ -49,6 +51,8 @@ app.command(name="eval")(eval_command)
49
51
  app.command(name="init")(init_command)
50
52
  app.command(name="cancel")(cancel_command)
51
53
  app.add_typer(models_app, name="models")
54
+ app.add_typer(jobs_app, name="jobs")
55
+ app.add_typer(trace_app, name="trace")
52
56
 
53
57
 
54
58
  @app.command(name="set")
hud/cli/eval.py CHANGED
@@ -5,6 +5,7 @@ Config Override Order: CLI arguments > .hud_eval.toml > defaults
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
+ import ast
8
9
  import asyncio
9
10
  import logging
10
11
  import os
@@ -42,8 +43,9 @@ def _resolve_model_from_catalog(model_id: str) -> tuple[AgentType, str] | None:
42
43
  Returns None if the model isn't found or the catalog is unreachable.
43
44
  """
44
45
  try:
45
- from hud.utils.gateway import list_gateway_models
46
+ from hud.utils.gateway import list_gateway_models, normalize_gateway_model_id
46
47
 
48
+ model_id = normalize_gateway_model_id(model_id)
47
49
  models = list_gateway_models()
48
50
  except Exception:
49
51
  return None
@@ -116,8 +118,9 @@ class AgentPreset:
116
118
 
117
119
  _AGENT_PRESETS: list[AgentPreset] = [
118
120
  AgentPreset("Claude Sonnet 4.6", AgentType.CLAUDE, "claude-sonnet-4-6"),
119
- AgentPreset("GPT-5.4", AgentType.OPENAI, "gpt-5.4"),
120
- AgentPreset("Gemini 3.1 Pro (Preview)", AgentType.GEMINI, "gemini-3-1-pro"),
121
+ AgentPreset("Claude Opus 4.8", AgentType.CLAUDE, "claude-opus-4-8"),
122
+ AgentPreset("GPT-5.5", AgentType.OPENAI, "gpt-5.5"),
123
+ AgentPreset("Gemini 3.1 Pro (Preview)", AgentType.GEMINI, "gemini-3.1-pro-preview"),
121
124
  AgentPreset(
122
125
  "Grok 4-1 Fast (xAI)",
123
126
  AgentType.OPENAI_COMPATIBLE,
@@ -130,10 +133,22 @@ _AGENT_PRESETS: list[AgentPreset] = [
130
133
  },
131
134
  ),
132
135
  AgentPreset(
133
- "GLM-4.6V (Z-AI)",
136
+ "GLM 5.2 (Z.ai)",
134
137
  AgentType.OPENAI_COMPATIBLE,
135
- "z-ai/glm-4.6v",
136
- {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "GLM-4.6V"}},
138
+ "z-ai/glm-5.2",
139
+ {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "GLM 5.2"}},
140
+ ),
141
+ AgentPreset(
142
+ "Kimi K2.6 (Moonshot)",
143
+ AgentType.OPENAI_COMPATIBLE,
144
+ "moonshotai/kimi-k2.6",
145
+ {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "Kimi K2.6"}},
146
+ ),
147
+ AgentPreset(
148
+ "MiniMax M3",
149
+ AgentType.OPENAI_COMPATIBLE,
150
+ "MiniMax-M3",
151
+ {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "MiniMax M3"}},
137
152
  ),
138
153
  ]
139
154
 
@@ -161,7 +176,7 @@ _DEFAULT_CONFIG_TEMPLATE = """# HUD Eval Configuration
161
176
  # use_computer_beta = true
162
177
 
163
178
  [openai]
164
- # model = "gpt-4o"
179
+ # model = "gpt-5.5"
165
180
  # temperature = 0.7
166
181
  # max_output_tokens = 4096
167
182
 
@@ -401,6 +416,11 @@ class EvalConfig(BaseModel):
401
416
  if self.model:
402
417
  kwargs["model"] = self.model
403
418
 
419
+ if isinstance(kwargs.get("model"), str):
420
+ from hud.utils.gateway import normalize_gateway_model_id
421
+
422
+ kwargs["model"] = normalize_gateway_model_id(kwargs["model"])
423
+
404
424
  if self.agent_type == AgentType.OPENAI_COMPATIBLE and "api_key" not in kwargs:
405
425
  base_url = kwargs.get("base_url", "")
406
426
  if settings.hud_gateway_url in base_url and settings.api_key:
@@ -665,13 +685,46 @@ def _build_agent(cfg: EvalConfig) -> Any:
665
685
  return cast("Any", cfg.agent_type.cls)(config=config)
666
686
 
667
687
 
688
+ def _python_defines_environment(path: Path) -> bool:
689
+ """Return True when ``path`` constructs a v6 :class:`~hud.environment.Environment`."""
690
+ try:
691
+ tree = ast.parse(path.read_text(encoding="utf-8"))
692
+ except (OSError, SyntaxError):
693
+ return False
694
+ for node in ast.walk(tree):
695
+ if not isinstance(node, ast.Call):
696
+ continue
697
+ callee = node.func
698
+ callee_name = (
699
+ callee.id
700
+ if isinstance(callee, ast.Name)
701
+ else callee.attr
702
+ if isinstance(callee, ast.Attribute)
703
+ else None
704
+ )
705
+ if callee_name == "Environment":
706
+ return True
707
+ return False
708
+
709
+
668
710
  def _spawn_target(source: Path) -> Path:
669
- """The path the ``LocalRuntime`` provider serves: the source itself for ``.py``
670
- files and directories, the surrounding directory for JSON/JSONL data files
671
- (the env's ``.py`` source lives next to the tasks file)."""
711
+ """The path the ``LocalRuntime`` provider serves.
712
+
713
+ Directories and env-defining ``.py`` files are served as-is. Task-only
714
+ sources (``tasks.py`` importing from ``env.py``) resolve to a sibling
715
+ ``env.py`` or the containing directory. JSON/JSONL data files use the
716
+ surrounding directory (the env source lives next to the tasks file).
717
+ """
672
718
  resolved = source.resolve()
673
- if resolved.is_dir() or resolved.suffix == ".py":
719
+ if resolved.is_dir():
720
+ return resolved
721
+ if resolved.suffix != ".py":
722
+ return resolved.parent
723
+ if _python_defines_environment(resolved):
674
724
  return resolved
725
+ env_py = resolved.parent / "env.py"
726
+ if env_py.is_file():
727
+ return env_py
675
728
  return resolved.parent
676
729
 
677
730
 
hud/cli/init.py CHANGED
@@ -76,8 +76,8 @@ def init_command(
76
76
  None,
77
77
  "--preset",
78
78
  "-p",
79
- help="Starter preset to download from GitHub (e.g. blank, coding, browser, "
80
- "deepresearch, rubrics, remote-browser). Omit for an interactive picker; in a "
79
+ help="Starter preset to download from GitHub (e.g. blank, browser, "
80
+ "deepresearch, cua, autonomous-businesses, verilog). Omit for an interactive picker; in a "
81
81
  "non-interactive shell, omitting it writes the minimal local scaffold.",
82
82
  ),
83
83
  ) -> None:
@@ -89,7 +89,7 @@ def init_command(
89
89
 
90
90
  Examples:
91
91
  hud init my-env # interactive picker (or local scaffold)
92
- hud init my-env --preset coding # download the coding starter
92
+ hud init my-env --preset browser # download the browser starter
93
93
  hud init my-env --dir envs # create ./envs/my-env[/not dim]
94
94
  """
95
95
  hud_console = HUDConsole()
hud/cli/jobs.py ADDED
@@ -0,0 +1,146 @@
1
+ """``hud jobs`` — list jobs and their traces."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ import typer
8
+ from rich.console import Console
9
+ from rich.panel import Panel
10
+ from rich.table import Table
11
+
12
+ console = Console()
13
+
14
+ jobs_app = typer.Typer(
15
+ name="jobs",
16
+ help="List jobs and their traces",
17
+ add_completion=False,
18
+ rich_markup_mode="rich",
19
+ no_args_is_help=False,
20
+ )
21
+
22
+
23
+ @jobs_app.callback(invoke_without_command=True)
24
+ def jobs_command(
25
+ ctx: typer.Context,
26
+ job_id: str | None = typer.Argument(None, help="Job ID — omit to list recent jobs"),
27
+ json_output: bool = typer.Option(False, "--json", help="Output as JSON"),
28
+ limit: int = typer.Option(20, "--limit", "-n", help="Max rows to show"),
29
+ ) -> None:
30
+ """List recent jobs, or show traces for a specific job.
31
+
32
+ Without an argument, lists the most recent jobs.
33
+ With a job id, lists all traces for that job.
34
+ """
35
+ if ctx.invoked_subcommand is not None:
36
+ return
37
+
38
+ from hud.cli.utils.api import require_api_key
39
+
40
+ require_api_key("list jobs")
41
+
42
+ if job_id:
43
+ _show_job_traces(job_id, json_output=json_output, limit=limit)
44
+ else:
45
+ _list_jobs(json_output=json_output, limit=limit)
46
+
47
+
48
+ # ── job listing ────────────────────────────────────────────────────────────────
49
+
50
+
51
+ def _list_jobs(*, json_output: bool, limit: int) -> None:
52
+ from hud.utils.platform import PlatformClient
53
+
54
+ client = PlatformClient.from_settings()
55
+ try:
56
+ data = client.get("/jobs", params={"limit": limit})
57
+ except Exception as e:
58
+ console.print(f"[red]Failed to fetch jobs: {e}[/red]")
59
+ raise typer.Exit(1) from e
60
+
61
+ items = data if isinstance(data, list) else (data.get("items") or [])
62
+
63
+ if json_output:
64
+ console.print_json(json.dumps(items, indent=2, default=str))
65
+ return
66
+
67
+ if not items:
68
+ console.print("[yellow]No jobs found.[/yellow]")
69
+ return
70
+
71
+ console.print(Panel.fit("[bold cyan]Recent Jobs[/bold cyan]", border_style="cyan"))
72
+ table = Table()
73
+ table.add_column("ID", style="blue", no_wrap=True)
74
+ table.add_column("Name", style="cyan")
75
+ table.add_column("Taskset", style="dim")
76
+ table.add_column("Status", style="yellow")
77
+ table.add_column("Created", style="dim")
78
+
79
+ from hud.settings import settings
80
+
81
+ web = settings.hud_web_url.rstrip("/")
82
+
83
+ for job in items:
84
+ jid = str(job.get("id") or "")
85
+ table.add_row(
86
+ jid,
87
+ job.get("name") or "-",
88
+ job.get("taskset_name") or "-",
89
+ job.get("status") or "-",
90
+ (str(job.get("created_at") or ""))[:19],
91
+ )
92
+ console.print(table)
93
+ console.print(f"\n[dim]View: {web}/jobs[/dim]")
94
+ console.print("[dim]Tip: hud jobs <id> to see traces for a specific job[/dim]")
95
+
96
+
97
+ # ── job traces ────────────────────────────────────────────────────────────────
98
+
99
+
100
+ def _show_job_traces(job_id: str, *, json_output: bool, limit: int) -> None:
101
+ from hud.settings import settings
102
+ from hud.utils.platform import PlatformClient
103
+
104
+ client = PlatformClient.from_settings()
105
+ try:
106
+ data = client.get(f"/jobs/{job_id}/traces", params={"limit": limit})
107
+ except Exception as e:
108
+ console.print(f"[red]Failed to fetch traces: {e}[/red]")
109
+ raise typer.Exit(1) from e
110
+
111
+ items = data if isinstance(data, list) else (data.get("items") or [])
112
+
113
+ if json_output:
114
+ console.print_json(json.dumps(items, indent=2, default=str))
115
+ return
116
+
117
+ web = settings.hud_web_url.rstrip("/")
118
+
119
+ if not items:
120
+ console.print("[yellow]No traces found for this job.[/yellow]")
121
+ console.print(f"[dim]View: {web}/jobs/{job_id}[/dim]")
122
+ return
123
+
124
+ console.print(
125
+ Panel.fit(f"[bold cyan]Job Traces[/bold cyan] [dim]{job_id}[/dim]", border_style="cyan")
126
+ )
127
+ table = Table()
128
+ table.add_column("Trace ID", style="blue", no_wrap=True)
129
+ table.add_column("Status", style="yellow")
130
+ table.add_column("Reward", style="green", justify="right")
131
+ table.add_column("Started", style="dim")
132
+ table.add_column("Error", style="red")
133
+
134
+ for tr in items:
135
+ tid = str(tr.get("id") or "")
136
+ reward = tr.get("reward")
137
+ table.add_row(
138
+ tid,
139
+ tr.get("status") or "-",
140
+ f"{reward:.3f}" if reward is not None else "-",
141
+ (str(tr.get("start_time") or tr.get("created_at") or ""))[:19],
142
+ (tr.get("error") or "")[:40],
143
+ )
144
+ console.print(table)
145
+ console.print(f"\n[dim]View: {web}/jobs/{job_id}[/dim]")
146
+ console.print("[dim]Tip: hud trace <trace_id> to inspect a specific rollout[/dim]")
hud/cli/models.py CHANGED
@@ -71,6 +71,8 @@ def list_models(
71
71
  )
72
72
  console.print(table)
73
73
  console.print(f"\n[dim]Gateway: {settings.hud_gateway_url}[/dim]")
74
+ web = settings.hud_web_url.rstrip("/")
75
+ console.print(f"[dim]View a model in the browser: {web}/models/<id>[/dim]")
74
76
 
75
77
 
76
78
  @models_app.command("fork")
@@ -116,6 +118,7 @@ def fork_model(
116
118
  )
117
119
  )
118
120
  console.print(f"\n[dim]Train it: hud.TrainingClient({slug!r})[/dim]")
121
+ console.print(f"[dim]View: {_model_url(model['id'])}[/dim]")
119
122
 
120
123
 
121
124
  @models_app.command("checkpoints")
@@ -127,13 +130,15 @@ def list_checkpoints(
127
130
  from hud.cli.utils.api import require_api_key
128
131
 
129
132
  require_api_key("list checkpoints")
130
- checkpoints = _get_checkpoints(model)
133
+ model_id = _resolve_model_id(model)
134
+ checkpoints = _get_checkpoints(model_id)
131
135
 
132
136
  if json_output:
133
137
  console.print_json(json.dumps(checkpoints, indent=2))
134
138
  return
135
139
  if not checkpoints:
136
140
  console.print("[yellow]No checkpoints yet — this model serves its base weights[/yellow]")
141
+ console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
137
142
  return
138
143
 
139
144
  checkpoints = sorted(checkpoints, key=lambda c: c.get("created_at") or "")
@@ -155,6 +160,7 @@ def list_checkpoints(
155
160
  (ckpt.get("created_at") or "")[:19],
156
161
  )
157
162
  console.print(table)
163
+ console.print(f"\n[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
158
164
 
159
165
 
160
166
  @models_app.command("head")
@@ -170,19 +176,22 @@ def show_head(
170
176
  from hud.cli.utils.api import require_api_key
171
177
 
172
178
  require_api_key("manage head")
179
+ model_id = _resolve_model_id(model)
173
180
 
174
181
  if set_to is not None:
175
- _set_head(model, set_to)
182
+ _set_head(model_id, set_to)
176
183
  console.print(f"[green]Head set to[/green] [cyan]{set_to}[/cyan]")
184
+ console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
177
185
  return
178
186
 
179
- head = next((c for c in _get_checkpoints(model) if c.get("is_active")), None)
187
+ head = next((c for c in _get_checkpoints(model_id) if c.get("is_active")), None)
180
188
 
181
189
  if json_output:
182
190
  console.print_json(json.dumps(head, indent=2))
183
191
  return
184
192
  if head is None:
185
193
  console.print("[yellow]No active checkpoint — this model serves its base weights[/yellow]")
194
+ console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
186
195
  return
187
196
 
188
197
  reward = head.get("mean_reward")
@@ -196,6 +205,15 @@ def show_head(
196
205
  border_style="green",
197
206
  )
198
207
  )
208
+ console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
209
+
210
+
211
+ def _model_url(model_id: str, *, tab: str | None = None) -> str:
212
+ """Web app URL for a model (optionally a specific tab, e.g. ``checkpoints``)."""
213
+ from hud.settings import settings
214
+
215
+ url = f"{settings.hud_web_url.rstrip('/')}/models/{model_id}"
216
+ return f"{url}?tab={tab}" if tab else url
199
217
 
200
218
 
201
219
  def _resolve_model_id(model: str) -> str:
hud/cli/templates.py CHANGED
@@ -13,7 +13,7 @@ COPY . .
13
13
 
14
14
  # Serve the Environment's control channel (tcp JSON-RPC) on 8765.
15
15
  EXPOSE 8765
16
- CMD ["uv", "run", "python", "-m", "hud", "dev", "env:env", "--host", "0.0.0.0", "--port", "8765"]
16
+ CMD ["uv", "run", "hud", "serve", "env:env", "--host", "0.0.0.0", "--port", "8765"]
17
17
  """
18
18
 
19
19
  # fmt: off
@@ -78,7 +78,7 @@ async def count(sentence: str, letter: str):
78
78
 
79
79
 
80
80
  # =============================================================================
81
- # TEST - run with: python env.py
81
+ # TEST - run with: uv run python env.py
82
82
  # =============================================================================
83
83
 
84
84
  async def test():
@@ -136,7 +136,6 @@ version = "0.1.0"
136
136
  requires-python = ">=3.11"
137
137
  dependencies = ["hud-python"]
138
138
 
139
- [build-system]
140
- requires = ["hatchling"]
141
- build-backend = "hatchling.build"
139
+ [tool.uv]
140
+ package = false
142
141
  """
@@ -48,7 +48,7 @@ class TestResolveEnvironmentName:
48
48
 
49
49
  def test_entrypoint_disambiguates_subagent(self, tmp_path: Path) -> None:
50
50
  (tmp_path / "Dockerfile").write_text(
51
- 'CMD ["hud", "dev", "env:env", "--port", "8765"]\n', encoding="utf-8"
51
+ 'CMD ["hud", "serve", "env:env", "--port", "8765"]\n', encoding="utf-8"
52
52
  )
53
53
  (tmp_path / "env.py").write_text('env = Environment("trace-explorer")\n', encoding="utf-8")
54
54
  (tmp_path / "verify.py").write_text(