PyPI - hud-python - Versions diffs - 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl - Mend

hud-python 0.6.2py3-none-any.whl → 0.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

hud/agents/__init__.py +11 -3
hud/agents/openai_compatible/agent.py +15 -4
hud/agents/tests/test_base.py +38 -2
hud/agents/tests/test_provider_native_tools.py +4 -4
hud/agents/types.py +7 -3
hud/cli/__init__.py +4 -0
hud/cli/eval.py +64 -11
hud/cli/init.py +3 -3
hud/cli/jobs.py +146 -0
hud/cli/models.py +21 -3
hud/cli/templates.py +4 -5
hud/cli/tests/test_deploy.py +1 -1
hud/cli/tests/test_eval_config.py +69 -0
hud/cli/tests/test_init.py +8 -0
hud/cli/trace.py +215 -0
hud/eval/job.py +33 -9
hud/eval/run.py +31 -6
hud/eval/runtime.py +51 -8
hud/eval/taskset.py +18 -2
hud/eval/tests/test_hosted.py +48 -0
hud/eval/tests/test_rollout.py +26 -1
hud/settings.py +2 -2
hud/train/__init__.py +2 -0
hud/train/base.py +68 -11
hud/train/client.py +41 -17
hud/train/types.py +38 -4
hud/utils/gateway.py +23 -0
hud/version.py +1 -1
{hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/METADATA +1 -1
{hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/RECORD +33 -31
{hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/WHEEL +0 -0
{hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/entry_points.txt +0 -0
{hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/licenses/LICENSE +0 -0

hud/agents/__init__.py CHANGED Viewed

@@ -8,7 +8,12 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any, cast
 from hud.types import AgentType
-from hud.utils.gateway import build_gateway_client, list_gateway_models
+from hud.utils.gateway import (
+    build_gateway_client,
+    gateway_model_aliases,
+    list_gateway_models,
+    normalize_gateway_model_id,
+)
 if TYPE_CHECKING:
     from typing import TypeAlias
@@ -27,6 +32,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
     For direct API access with provider API keys, instantiate the agent classes directly.
     """
+    requested_model = model
+    model = normalize_gateway_model_id(model)
     agent_type = next((candidate for candidate in AgentType if candidate.value == model), None)
     if agent_type is not None:
         model_id = model
@@ -73,7 +80,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
                 for n in (gm.id, gm.name, gm.model_name)
                 if isinstance(n, str)
             ]
-            near = difflib.get_close_matches(model, known, n=3, cutoff=0.5)
+            known.extend(gateway_model_aliases())
+            near = difflib.get_close_matches(requested_model, known, n=3, cutoff=0.5)
             hint = (
                 f" Did you mean: {', '.join(near)}?"
                 if near
@@ -84,7 +92,7 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
                 if gateway_models
                 else "the HUD gateway registry (empty — is HUD_API_KEY set?)"
             )
-            raise ValueError(f"Model {model!r} not found in {source}.{hint}")
+            raise ValueError(f"Model {requested_model!r} not found in {source}.{hint}")
     kwargs.setdefault("model", model_id)
     kwargs.setdefault("model_client", build_gateway_client(provider_name))

hud/agents/openai_compatible/agent.py CHANGED Viewed

@@ -193,16 +193,27 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
         sample: Sample | None = None
         if return_token_ids:
             prompt_token_ids = getattr(choice, "prompt_token_ids", None)
+            # Multimodal prompt (text + image chunks): the only prompt representation
+            # that survives image inputs; flat prompt_token_ids is null in that case.
+            prompt_chunks = getattr(choice, "prompt_chunks", None)
             token_ids = getattr(choice, "token_ids", None)
-            if prompt_token_ids is not None and token_ids is not None:
-                chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
-                chat_state.continuation_message_count = len(messages)
+            has_prompt = prompt_token_ids is not None or prompt_chunks is not None
+            if token_ids is not None and has_prompt:
                 content_lp = choice.logprobs.content if choice.logprobs else None
                 sample = Sample(
-                    prompt_token_ids=list(prompt_token_ids),
+                    prompt_token_ids=list(prompt_token_ids) if prompt_token_ids is not None else [],
+                    prompt_chunks=list(prompt_chunks) if prompt_chunks is not None else None,
                     output_token_ids=list(token_ids),
                     output_logprobs=[tok.logprob for tok in content_lp] if content_lp else [],
                 )
+                # KV-cache continuation only applies to flat text prompts; clear any
+                # stale state when the gateway returns chunks-only (multimodal turn).
+                if prompt_token_ids is not None:
+                    chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
+                    chat_state.continuation_message_count = len(messages)
+                else:
+                    chat_state.continuation_token_ids = None
+                    chat_state.continuation_message_count = None
         tool_calls: list[MCPToolCall] = []
         for tc in function_calls:

hud/agents/tests/test_base.py CHANGED Viewed

@@ -108,7 +108,7 @@ def test_create_agent_resolves_gateway_model_metadata(
     model = GatewayModelInfo(
         id="ft:custom-123",
-        model_name="gpt-5.4",
+        model_name="gpt-5.5",
         sdk_agent_type="openai_compatible",
         provider=GatewayProviderInfo(name="openai"),
     )
@@ -122,4 +122,40 @@ def test_create_agent_resolves_gateway_model_metadata(
     agent = create_agent("ft:custom-123")
     assert isinstance(agent, OpenAIChatAgent)
-    assert agent.config.model == "gpt-5.4"  # resolved to the model's real name
+    assert agent.config.model == "gpt-5.5"  # resolved to the model's real name
+@pytest.mark.parametrize(
+    ("alias", "canonical"),
+    [
+        ("deepseek-v4", "deepseek/deepseek-v4-pro"),
+        ("deepseek-v4-flash", "deepseek/deepseek-v4-flash"),
+        ("glm-5.2", "z-ai/glm-5.2"),
+        ("kimi-k2.6", "moonshotai/kimi-k2.6"),
+        ("minimax-m3", "MiniMax-M3"),
+    ],
+)
+def test_create_agent_accepts_gateway_model_aliases(
+    alias: str,
+    canonical: str,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from hud.utils.gateway import GatewayModelInfo, GatewayProviderInfo
+    model = GatewayModelInfo(
+        id=canonical,
+        model_name=canonical,
+        sdk_agent_type="openai_compatible",
+        provider=GatewayProviderInfo(name="openai"),
+    )
+    monkeypatch.setattr("hud.agents.list_gateway_models", lambda: [model])
+    def _build_client(_provider: str) -> object:
+        return object()
+    monkeypatch.setattr("hud.agents.build_gateway_client", _build_client)
+    agent = create_agent(alias)
+    assert isinstance(agent, OpenAIChatAgent)
+    assert agent.config.model == canonical

hud/agents/tests/test_provider_native_tools.py CHANGED Viewed

@@ -102,7 +102,7 @@ def _commands(tool: Any) -> list[str]:
 async def test_openai_shell_wraps_command_with_timeout() -> None:
-    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
+    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
     result = await tool.execute({"commands": ["pwd"], "timeout_ms": 2500})
@@ -114,7 +114,7 @@ async def test_openai_shell_wraps_command_with_timeout() -> None:
 async def test_openai_shell_runs_each_command_without_timeout() -> None:
-    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
+    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
     await tool.execute({"commands": ["echo a", "echo b"]})
@@ -122,7 +122,7 @@ async def test_openai_shell_runs_each_command_without_timeout() -> None:
 async def test_openai_shell_rejects_non_list_commands_without_running() -> None:
-    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
+    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
     result = await tool.execute({"commands": 123})
@@ -131,7 +131,7 @@ async def test_openai_shell_rejects_non_list_commands_without_running() -> None:
 def test_openai_shell_to_params_is_shell_type() -> None:
-    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.4"), client=_ssh())
+    tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
     assert tool.to_params()["type"] == "shell"

hud/agents/types.py CHANGED Viewed

@@ -99,7 +99,7 @@ class OpenAIConfig(AgentConfig):
     """Configuration for OpenAIAgent."""
     model_name: str = "OpenAI"
-    model: str = Field(default="gpt-5.4", validation_alias=_model_alias)
+    model: str = Field(default="gpt-5.5", validation_alias=_model_alias)
     max_output_tokens: int | None = None
     temperature: float | None = None
     reasoning: Any = None  # openai Reasoning
@@ -113,7 +113,7 @@ class OpenAIChatConfig(AgentConfig):
     """Configuration for OpenAIChatAgent."""
     model_name: str = "OpenAI Chat"
-    model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
+    model: str = Field(default="gpt-5.4-mini", validation_alias=_model_alias)
     checkpoint: str | None = Field(
         default=None,
         description="Specific checkpoint name for inference routing. "
@@ -139,7 +139,7 @@ class ClaudeSDKConfig(AgentConfig):
     """
     model_name: str = "Claude Code"
-    model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
+    model: str = Field(default="claude-sonnet-4-6", validation_alias=_model_alias)
     permission_mode: str = "bypassPermissions"
     max_steps: int = -1
     allowed_tools: list[str] = Field(
@@ -222,6 +222,10 @@ class Sample(BaseModel):
     """
     prompt_token_ids: list[int] = Field(default_factory=list[int])
+    # Multimodal prompt as serialized ``ModelInput`` chunks (text + image), set by
+    # vision rollouts where the prompt is not a flat token list. When present it is
+    # the authoritative prompt for training; ``prompt_token_ids`` stays empty.
+    prompt_chunks: list[dict[str, Any]] | None = None
     output_token_ids: list[int] = Field(default_factory=list[int])
     output_logprobs: list[float] = Field(default_factory=list[float])

hud/cli/__init__.py CHANGED Viewed

@@ -35,11 +35,13 @@ from .client import client_app  # noqa: E402
 from .deploy import deploy_command  # noqa: E402
 from .eval import eval_command  # noqa: E402
 from .init import init_command  # noqa: E402
+from .jobs import jobs_app  # noqa: E402
 from .login import login_command  # noqa: E402
 from .models import models_app  # noqa: E402
 from .serve import serve_command  # noqa: E402
 from .sync import sync_app  # noqa: E402
 from .task import task_app  # noqa: E402
+from .trace import trace_app  # noqa: E402
 app.command(name="serve")(serve_command)
 app.command(name="dev", deprecated=True, hidden=True)(serve_command)  # alias for now
@@ -49,6 +51,8 @@ app.command(name="eval")(eval_command)
 app.command(name="init")(init_command)
 app.command(name="cancel")(cancel_command)
 app.add_typer(models_app, name="models")
+app.add_typer(jobs_app, name="jobs")
+app.add_typer(trace_app, name="trace")
 @app.command(name="set")

hud/cli/eval.py CHANGED Viewed

@@ -5,6 +5,7 @@ Config Override Order: CLI arguments > .hud_eval.toml > defaults
 from __future__ import annotations
+import ast
 import asyncio
 import logging
 import os
@@ -42,8 +43,9 @@ def _resolve_model_from_catalog(model_id: str) -> tuple[AgentType, str] | None:
     Returns None if the model isn't found or the catalog is unreachable.
     """
     try:
-        from hud.utils.gateway import list_gateway_models
+        from hud.utils.gateway import list_gateway_models, normalize_gateway_model_id
+        model_id = normalize_gateway_model_id(model_id)
         models = list_gateway_models()
     except Exception:
         return None
@@ -116,8 +118,9 @@ class AgentPreset:
 _AGENT_PRESETS: list[AgentPreset] = [
     AgentPreset("Claude Sonnet 4.6", AgentType.CLAUDE, "claude-sonnet-4-6"),
-    AgentPreset("GPT-5.4", AgentType.OPENAI, "gpt-5.4"),
-    AgentPreset("Gemini 3.1 Pro (Preview)", AgentType.GEMINI, "gemini-3-1-pro"),
+    AgentPreset("Claude Opus 4.8", AgentType.CLAUDE, "claude-opus-4-8"),
+    AgentPreset("GPT-5.5", AgentType.OPENAI, "gpt-5.5"),
+    AgentPreset("Gemini 3.1 Pro (Preview)", AgentType.GEMINI, "gemini-3.1-pro-preview"),
     AgentPreset(
         "Grok 4-1 Fast (xAI)",
         AgentType.OPENAI_COMPATIBLE,
@@ -130,10 +133,22 @@ _AGENT_PRESETS: list[AgentPreset] = [
         },
     ),
     AgentPreset(
-        "GLM-4.6V (Z-AI)",
+        "GLM 5.2 (Z.ai)",
         AgentType.OPENAI_COMPATIBLE,
-        "z-ai/glm-4.6v",
-        {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "GLM-4.6V"}},
+        "z-ai/glm-5.2",
+        {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "GLM 5.2"}},
+    ),
+    AgentPreset(
+        "Kimi K2.6 (Moonshot)",
+        AgentType.OPENAI_COMPATIBLE,
+        "moonshotai/kimi-k2.6",
+        {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "Kimi K2.6"}},
+    ),
+    AgentPreset(
+        "MiniMax M3",
+        AgentType.OPENAI_COMPATIBLE,
+        "MiniMax-M3",
+        {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "MiniMax M3"}},
     ),
 ]
@@ -161,7 +176,7 @@ _DEFAULT_CONFIG_TEMPLATE = """# HUD Eval Configuration
 # use_computer_beta = true
 [openai]
-# model = "gpt-4o"
+# model = "gpt-5.5"
 # temperature = 0.7
 # max_output_tokens = 4096
@@ -401,6 +416,11 @@ class EvalConfig(BaseModel):
         if self.model:
             kwargs["model"] = self.model
+        if isinstance(kwargs.get("model"), str):
+            from hud.utils.gateway import normalize_gateway_model_id
+            kwargs["model"] = normalize_gateway_model_id(kwargs["model"])
         if self.agent_type == AgentType.OPENAI_COMPATIBLE and "api_key" not in kwargs:
             base_url = kwargs.get("base_url", "")
             if settings.hud_gateway_url in base_url and settings.api_key:
@@ -665,13 +685,46 @@ def _build_agent(cfg: EvalConfig) -> Any:
     return cast("Any", cfg.agent_type.cls)(config=config)
+def _python_defines_environment(path: Path) -> bool:
+    """Return True when ``path`` constructs a v6 :class:`~hud.environment.Environment`."""
+    try:
+        tree = ast.parse(path.read_text(encoding="utf-8"))
+    except (OSError, SyntaxError):
+        return False
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call):
+            continue
+        callee = node.func
+        callee_name = (
+            callee.id
+            if isinstance(callee, ast.Name)
+            else callee.attr
+            if isinstance(callee, ast.Attribute)
+            else None
+        )
+        if callee_name == "Environment":
+            return True
+    return False
 def _spawn_target(source: Path) -> Path:
-    """The path the ``LocalRuntime`` provider serves: the source itself for ``.py``
-    files and directories, the surrounding directory for JSON/JSONL data files
-    (the env's ``.py`` source lives next to the tasks file)."""
+    """The path the ``LocalRuntime`` provider serves.
+    Directories and env-defining ``.py`` files are served as-is. Task-only
+    sources (``tasks.py`` importing from ``env.py``) resolve to a sibling
+    ``env.py`` or the containing directory. JSON/JSONL data files use the
+    surrounding directory (the env source lives next to the tasks file).
+    """
     resolved = source.resolve()
-    if resolved.is_dir() or resolved.suffix == ".py":
+    if resolved.is_dir():
+        return resolved
+    if resolved.suffix != ".py":
+        return resolved.parent
+    if _python_defines_environment(resolved):
         return resolved
+    env_py = resolved.parent / "env.py"
+    if env_py.is_file():
+        return env_py
     return resolved.parent

hud/cli/init.py CHANGED Viewed

@@ -76,8 +76,8 @@ def init_command(
         None,
         "--preset",
         "-p",
-        help="Starter preset to download from GitHub (e.g. blank, coding, browser, "
-        "deepresearch, rubrics, remote-browser). Omit for an interactive picker; in a "
+        help="Starter preset to download from GitHub (e.g. blank, browser, "
+        "deepresearch, cua, autonomous-businesses, verilog). Omit for an interactive picker; in a "
         "non-interactive shell, omitting it writes the minimal local scaffold.",
     ),
 ) -> None:
@@ -89,7 +89,7 @@ def init_command(
     Examples:
         hud init my-env                  # interactive picker (or local scaffold)
-        hud init my-env --preset coding  # download the coding starter
+        hud init my-env --preset browser  # download the browser starter
         hud init my-env --dir envs       # create ./envs/my-env[/not dim]
     """
     hud_console = HUDConsole()

hud/cli/jobs.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""``hud jobs`` — list jobs and their traces."""
+from __future__ import annotations
+import json
+import typer
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+console = Console()
+jobs_app = typer.Typer(
+    name="jobs",
+    help="List jobs and their traces",
+    add_completion=False,
+    rich_markup_mode="rich",
+    no_args_is_help=False,
+)
+@jobs_app.callback(invoke_without_command=True)
+def jobs_command(
+    ctx: typer.Context,
+    job_id: str | None = typer.Argument(None, help="Job ID — omit to list recent jobs"),
+    json_output: bool = typer.Option(False, "--json", help="Output as JSON"),
+    limit: int = typer.Option(20, "--limit", "-n", help="Max rows to show"),
+) -> None:
+    """List recent jobs, or show traces for a specific job.
+    Without an argument, lists the most recent jobs.
+    With a job id, lists all traces for that job.
+    """
+    if ctx.invoked_subcommand is not None:
+        return
+    from hud.cli.utils.api import require_api_key
+    require_api_key("list jobs")
+    if job_id:
+        _show_job_traces(job_id, json_output=json_output, limit=limit)
+    else:
+        _list_jobs(json_output=json_output, limit=limit)
+# ── job listing ────────────────────────────────────────────────────────────────
+def _list_jobs(*, json_output: bool, limit: int) -> None:
+    from hud.utils.platform import PlatformClient
+    client = PlatformClient.from_settings()
+    try:
+        data = client.get("/jobs", params={"limit": limit})
+    except Exception as e:
+        console.print(f"[red]Failed to fetch jobs: {e}[/red]")
+        raise typer.Exit(1) from e
+    items = data if isinstance(data, list) else (data.get("items") or [])
+    if json_output:
+        console.print_json(json.dumps(items, indent=2, default=str))
+        return
+    if not items:
+        console.print("[yellow]No jobs found.[/yellow]")
+        return
+    console.print(Panel.fit("[bold cyan]Recent Jobs[/bold cyan]", border_style="cyan"))
+    table = Table()
+    table.add_column("ID", style="blue", no_wrap=True)
+    table.add_column("Name", style="cyan")
+    table.add_column("Taskset", style="dim")
+    table.add_column("Status", style="yellow")
+    table.add_column("Created", style="dim")
+    from hud.settings import settings
+    web = settings.hud_web_url.rstrip("/")
+    for job in items:
+        jid = str(job.get("id") or "")
+        table.add_row(
+            jid,
+            job.get("name") or "-",
+            job.get("taskset_name") or "-",
+            job.get("status") or "-",
+            (str(job.get("created_at") or ""))[:19],
+        )
+    console.print(table)
+    console.print(f"\n[dim]View: {web}/jobs[/dim]")
+    console.print("[dim]Tip: hud jobs <id> to see traces for a specific job[/dim]")
+# ── job traces ────────────────────────────────────────────────────────────────
+def _show_job_traces(job_id: str, *, json_output: bool, limit: int) -> None:
+    from hud.settings import settings
+    from hud.utils.platform import PlatformClient
+    client = PlatformClient.from_settings()
+    try:
+        data = client.get(f"/jobs/{job_id}/traces", params={"limit": limit})
+    except Exception as e:
+        console.print(f"[red]Failed to fetch traces: {e}[/red]")
+        raise typer.Exit(1) from e
+    items = data if isinstance(data, list) else (data.get("items") or [])
+    if json_output:
+        console.print_json(json.dumps(items, indent=2, default=str))
+        return
+    web = settings.hud_web_url.rstrip("/")
+    if not items:
+        console.print("[yellow]No traces found for this job.[/yellow]")
+        console.print(f"[dim]View: {web}/jobs/{job_id}[/dim]")
+        return
+    console.print(
+        Panel.fit(f"[bold cyan]Job Traces[/bold cyan] [dim]{job_id}[/dim]", border_style="cyan")
+    )
+    table = Table()
+    table.add_column("Trace ID", style="blue", no_wrap=True)
+    table.add_column("Status", style="yellow")
+    table.add_column("Reward", style="green", justify="right")
+    table.add_column("Started", style="dim")
+    table.add_column("Error", style="red")
+    for tr in items:
+        tid = str(tr.get("id") or "")
+        reward = tr.get("reward")
+        table.add_row(
+            tid,
+            tr.get("status") or "-",
+            f"{reward:.3f}" if reward is not None else "-",
+            (str(tr.get("start_time") or tr.get("created_at") or ""))[:19],
+            (tr.get("error") or "")[:40],
+        )
+    console.print(table)
+    console.print(f"\n[dim]View: {web}/jobs/{job_id}[/dim]")
+    console.print("[dim]Tip: hud trace <trace_id> to inspect a specific rollout[/dim]")

hud/cli/models.py CHANGED Viewed

@@ -71,6 +71,8 @@ def list_models(
         )
     console.print(table)
     console.print(f"\n[dim]Gateway: {settings.hud_gateway_url}[/dim]")
+    web = settings.hud_web_url.rstrip("/")
+    console.print(f"[dim]View a model in the browser: {web}/models/<id>[/dim]")
 @models_app.command("fork")
@@ -116,6 +118,7 @@ def fork_model(
         )
     )
     console.print(f"\n[dim]Train it: hud.TrainingClient({slug!r})[/dim]")
+    console.print(f"[dim]View: {_model_url(model['id'])}[/dim]")
 @models_app.command("checkpoints")
@@ -127,13 +130,15 @@ def list_checkpoints(
     from hud.cli.utils.api import require_api_key
     require_api_key("list checkpoints")
-    checkpoints = _get_checkpoints(model)
+    model_id = _resolve_model_id(model)
+    checkpoints = _get_checkpoints(model_id)
     if json_output:
         console.print_json(json.dumps(checkpoints, indent=2))
         return
     if not checkpoints:
         console.print("[yellow]No checkpoints yet — this model serves its base weights[/yellow]")
+        console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
         return
     checkpoints = sorted(checkpoints, key=lambda c: c.get("created_at") or "")
@@ -155,6 +160,7 @@ def list_checkpoints(
             (ckpt.get("created_at") or "")[:19],
         )
     console.print(table)
+    console.print(f"\n[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
 @models_app.command("head")
@@ -170,19 +176,22 @@ def show_head(
     from hud.cli.utils.api import require_api_key
     require_api_key("manage head")
+    model_id = _resolve_model_id(model)
     if set_to is not None:
-        _set_head(model, set_to)
+        _set_head(model_id, set_to)
         console.print(f"[green]Head set to[/green] [cyan]{set_to}[/cyan]")
+        console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
         return
-    head = next((c for c in _get_checkpoints(model) if c.get("is_active")), None)
+    head = next((c for c in _get_checkpoints(model_id) if c.get("is_active")), None)
     if json_output:
         console.print_json(json.dumps(head, indent=2))
         return
     if head is None:
         console.print("[yellow]No active checkpoint — this model serves its base weights[/yellow]")
+        console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
         return
     reward = head.get("mean_reward")
@@ -196,6 +205,15 @@ def show_head(
             border_style="green",
         )
     )
+    console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
+def _model_url(model_id: str, *, tab: str | None = None) -> str:
+    """Web app URL for a model (optionally a specific tab, e.g. ``checkpoints``)."""
+    from hud.settings import settings
+    url = f"{settings.hud_web_url.rstrip('/')}/models/{model_id}"
+    return f"{url}?tab={tab}" if tab else url
 def _resolve_model_id(model: str) -> str:

hud/cli/templates.py CHANGED Viewed

@@ -13,7 +13,7 @@ COPY . .
 # Serve the Environment's control channel (tcp JSON-RPC) on 8765.
 EXPOSE 8765
-CMD ["uv", "run", "python", "-m", "hud", "dev", "env:env", "--host", "0.0.0.0", "--port", "8765"]
+CMD ["uv", "run", "hud", "serve", "env:env", "--host", "0.0.0.0", "--port", "8765"]
 """
 # fmt: off
@@ -78,7 +78,7 @@ async def count(sentence: str, letter: str):
 # =============================================================================
-# TEST - run with: python env.py
+# TEST - run with: uv run python env.py
 # =============================================================================
 async def test():
@@ -136,7 +136,6 @@ version = "0.1.0"
 requires-python = ">=3.11"
 dependencies = ["hud-python"]
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
+[tool.uv]
+package = false
 """

hud/cli/tests/test_deploy.py CHANGED Viewed

@@ -48,7 +48,7 @@ class TestResolveEnvironmentName:
     def test_entrypoint_disambiguates_subagent(self, tmp_path: Path) -> None:
         (tmp_path / "Dockerfile").write_text(
-            'CMD ["hud", "dev", "env:env", "--port", "8765"]\n', encoding="utf-8"
+            'CMD ["hud", "serve", "env:env", "--port", "8765"]\n', encoding="utf-8"
         )
         (tmp_path / "env.py").write_text('env = Environment("trace-explorer")\n', encoding="utf-8")
         (tmp_path / "verify.py").write_text(

hud-python 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

hud-python 0.6.2py3-none-any.whl → 0.6.4py3-none-any.whl