hud-python 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/agents/__init__.py +11 -3
- hud/agents/openai_compatible/agent.py +15 -4
- hud/agents/tests/test_base.py +38 -2
- hud/agents/tests/test_provider_native_tools.py +4 -4
- hud/agents/types.py +7 -3
- hud/cli/__init__.py +4 -0
- hud/cli/eval.py +64 -11
- hud/cli/init.py +3 -3
- hud/cli/jobs.py +146 -0
- hud/cli/models.py +21 -3
- hud/cli/templates.py +4 -5
- hud/cli/tests/test_deploy.py +1 -1
- hud/cli/tests/test_eval_config.py +69 -0
- hud/cli/tests/test_init.py +8 -0
- hud/cli/trace.py +215 -0
- hud/eval/job.py +33 -9
- hud/eval/run.py +31 -6
- hud/eval/runtime.py +51 -8
- hud/eval/taskset.py +18 -2
- hud/eval/tests/test_hosted.py +48 -0
- hud/eval/tests/test_rollout.py +26 -1
- hud/settings.py +2 -2
- hud/train/__init__.py +2 -0
- hud/train/base.py +68 -11
- hud/train/client.py +41 -17
- hud/train/types.py +38 -4
- hud/utils/gateway.py +23 -0
- hud/version.py +1 -1
- {hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/METADATA +1 -1
- {hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/RECORD +33 -31
- {hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/WHEEL +0 -0
- {hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/entry_points.txt +0 -0
- {hud_python-0.6.2.dist-info → hud_python-0.6.4.dist-info}/licenses/LICENSE +0 -0
hud/agents/__init__.py
CHANGED
|
@@ -8,7 +8,12 @@ from __future__ import annotations
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any, cast
|
|
9
9
|
|
|
10
10
|
from hud.types import AgentType
|
|
11
|
-
from hud.utils.gateway import
|
|
11
|
+
from hud.utils.gateway import (
|
|
12
|
+
build_gateway_client,
|
|
13
|
+
gateway_model_aliases,
|
|
14
|
+
list_gateway_models,
|
|
15
|
+
normalize_gateway_model_id,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
18
|
if TYPE_CHECKING:
|
|
14
19
|
from typing import TypeAlias
|
|
@@ -27,6 +32,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
|
|
|
27
32
|
|
|
28
33
|
For direct API access with provider API keys, instantiate the agent classes directly.
|
|
29
34
|
"""
|
|
35
|
+
requested_model = model
|
|
36
|
+
model = normalize_gateway_model_id(model)
|
|
30
37
|
agent_type = next((candidate for candidate in AgentType if candidate.value == model), None)
|
|
31
38
|
if agent_type is not None:
|
|
32
39
|
model_id = model
|
|
@@ -73,7 +80,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
|
|
|
73
80
|
for n in (gm.id, gm.name, gm.model_name)
|
|
74
81
|
if isinstance(n, str)
|
|
75
82
|
]
|
|
76
|
-
|
|
83
|
+
known.extend(gateway_model_aliases())
|
|
84
|
+
near = difflib.get_close_matches(requested_model, known, n=3, cutoff=0.5)
|
|
77
85
|
hint = (
|
|
78
86
|
f" Did you mean: {', '.join(near)}?"
|
|
79
87
|
if near
|
|
@@ -84,7 +92,7 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
|
|
|
84
92
|
if gateway_models
|
|
85
93
|
else "the HUD gateway registry (empty — is HUD_API_KEY set?)"
|
|
86
94
|
)
|
|
87
|
-
raise ValueError(f"Model {
|
|
95
|
+
raise ValueError(f"Model {requested_model!r} not found in {source}.{hint}")
|
|
88
96
|
|
|
89
97
|
kwargs.setdefault("model", model_id)
|
|
90
98
|
kwargs.setdefault("model_client", build_gateway_client(provider_name))
|
|
@@ -193,16 +193,27 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
|
|
|
193
193
|
sample: Sample | None = None
|
|
194
194
|
if return_token_ids:
|
|
195
195
|
prompt_token_ids = getattr(choice, "prompt_token_ids", None)
|
|
196
|
+
# Multimodal prompt (text + image chunks): the only prompt representation
|
|
197
|
+
# that survives image inputs; flat prompt_token_ids is null in that case.
|
|
198
|
+
prompt_chunks = getattr(choice, "prompt_chunks", None)
|
|
196
199
|
token_ids = getattr(choice, "token_ids", None)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
chat_state.continuation_message_count = len(messages)
|
|
200
|
+
has_prompt = prompt_token_ids is not None or prompt_chunks is not None
|
|
201
|
+
if token_ids is not None and has_prompt:
|
|
200
202
|
content_lp = choice.logprobs.content if choice.logprobs else None
|
|
201
203
|
sample = Sample(
|
|
202
|
-
prompt_token_ids=list(prompt_token_ids),
|
|
204
|
+
prompt_token_ids=list(prompt_token_ids) if prompt_token_ids is not None else [],
|
|
205
|
+
prompt_chunks=list(prompt_chunks) if prompt_chunks is not None else None,
|
|
203
206
|
output_token_ids=list(token_ids),
|
|
204
207
|
output_logprobs=[tok.logprob for tok in content_lp] if content_lp else [],
|
|
205
208
|
)
|
|
209
|
+
# KV-cache continuation only applies to flat text prompts; clear any
|
|
210
|
+
# stale state when the gateway returns chunks-only (multimodal turn).
|
|
211
|
+
if prompt_token_ids is not None:
|
|
212
|
+
chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
|
|
213
|
+
chat_state.continuation_message_count = len(messages)
|
|
214
|
+
else:
|
|
215
|
+
chat_state.continuation_token_ids = None
|
|
216
|
+
chat_state.continuation_message_count = None
|
|
206
217
|
|
|
207
218
|
tool_calls: list[MCPToolCall] = []
|
|
208
219
|
for tc in function_calls:
|
hud/agents/tests/test_base.py
CHANGED
|
@@ -108,7 +108,7 @@ def test_create_agent_resolves_gateway_model_metadata(
|
|
|
108
108
|
|
|
109
109
|
model = GatewayModelInfo(
|
|
110
110
|
id="ft:custom-123",
|
|
111
|
-
model_name="gpt-5.
|
|
111
|
+
model_name="gpt-5.5",
|
|
112
112
|
sdk_agent_type="openai_compatible",
|
|
113
113
|
provider=GatewayProviderInfo(name="openai"),
|
|
114
114
|
)
|
|
@@ -122,4 +122,40 @@ def test_create_agent_resolves_gateway_model_metadata(
|
|
|
122
122
|
agent = create_agent("ft:custom-123")
|
|
123
123
|
|
|
124
124
|
assert isinstance(agent, OpenAIChatAgent)
|
|
125
|
-
assert agent.config.model == "gpt-5.
|
|
125
|
+
assert agent.config.model == "gpt-5.5" # resolved to the model's real name
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@pytest.mark.parametrize(
|
|
129
|
+
("alias", "canonical"),
|
|
130
|
+
[
|
|
131
|
+
("deepseek-v4", "deepseek/deepseek-v4-pro"),
|
|
132
|
+
("deepseek-v4-flash", "deepseek/deepseek-v4-flash"),
|
|
133
|
+
("glm-5.2", "z-ai/glm-5.2"),
|
|
134
|
+
("kimi-k2.6", "moonshotai/kimi-k2.6"),
|
|
135
|
+
("minimax-m3", "MiniMax-M3"),
|
|
136
|
+
],
|
|
137
|
+
)
|
|
138
|
+
def test_create_agent_accepts_gateway_model_aliases(
|
|
139
|
+
alias: str,
|
|
140
|
+
canonical: str,
|
|
141
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
142
|
+
) -> None:
|
|
143
|
+
from hud.utils.gateway import GatewayModelInfo, GatewayProviderInfo
|
|
144
|
+
|
|
145
|
+
model = GatewayModelInfo(
|
|
146
|
+
id=canonical,
|
|
147
|
+
model_name=canonical,
|
|
148
|
+
sdk_agent_type="openai_compatible",
|
|
149
|
+
provider=GatewayProviderInfo(name="openai"),
|
|
150
|
+
)
|
|
151
|
+
monkeypatch.setattr("hud.agents.list_gateway_models", lambda: [model])
|
|
152
|
+
|
|
153
|
+
def _build_client(_provider: str) -> object:
|
|
154
|
+
return object()
|
|
155
|
+
|
|
156
|
+
monkeypatch.setattr("hud.agents.build_gateway_client", _build_client)
|
|
157
|
+
|
|
158
|
+
agent = create_agent(alias)
|
|
159
|
+
|
|
160
|
+
assert isinstance(agent, OpenAIChatAgent)
|
|
161
|
+
assert agent.config.model == canonical
|
|
@@ -102,7 +102,7 @@ def _commands(tool: Any) -> list[str]:
|
|
|
102
102
|
|
|
103
103
|
|
|
104
104
|
async def test_openai_shell_wraps_command_with_timeout() -> None:
|
|
105
|
-
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.
|
|
105
|
+
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
|
|
106
106
|
|
|
107
107
|
result = await tool.execute({"commands": ["pwd"], "timeout_ms": 2500})
|
|
108
108
|
|
|
@@ -114,7 +114,7 @@ async def test_openai_shell_wraps_command_with_timeout() -> None:
|
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
async def test_openai_shell_runs_each_command_without_timeout() -> None:
|
|
117
|
-
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.
|
|
117
|
+
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
|
|
118
118
|
|
|
119
119
|
await tool.execute({"commands": ["echo a", "echo b"]})
|
|
120
120
|
|
|
@@ -122,7 +122,7 @@ async def test_openai_shell_runs_each_command_without_timeout() -> None:
|
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
async def test_openai_shell_rejects_non_list_commands_without_running() -> None:
|
|
125
|
-
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.
|
|
125
|
+
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
|
|
126
126
|
|
|
127
127
|
result = await tool.execute({"commands": 123})
|
|
128
128
|
|
|
@@ -131,7 +131,7 @@ async def test_openai_shell_rejects_non_list_commands_without_running() -> None:
|
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
def test_openai_shell_to_params_is_shell_type() -> None:
|
|
134
|
-
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.
|
|
134
|
+
tool = OpenAIShellTool(spec=OpenAIShellTool.default_spec("gpt-5.5"), client=_ssh())
|
|
135
135
|
assert tool.to_params()["type"] == "shell"
|
|
136
136
|
|
|
137
137
|
|
hud/agents/types.py
CHANGED
|
@@ -99,7 +99,7 @@ class OpenAIConfig(AgentConfig):
|
|
|
99
99
|
"""Configuration for OpenAIAgent."""
|
|
100
100
|
|
|
101
101
|
model_name: str = "OpenAI"
|
|
102
|
-
model: str = Field(default="gpt-5.
|
|
102
|
+
model: str = Field(default="gpt-5.5", validation_alias=_model_alias)
|
|
103
103
|
max_output_tokens: int | None = None
|
|
104
104
|
temperature: float | None = None
|
|
105
105
|
reasoning: Any = None # openai Reasoning
|
|
@@ -113,7 +113,7 @@ class OpenAIChatConfig(AgentConfig):
|
|
|
113
113
|
"""Configuration for OpenAIChatAgent."""
|
|
114
114
|
|
|
115
115
|
model_name: str = "OpenAI Chat"
|
|
116
|
-
model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
|
|
116
|
+
model: str = Field(default="gpt-5.4-mini", validation_alias=_model_alias)
|
|
117
117
|
checkpoint: str | None = Field(
|
|
118
118
|
default=None,
|
|
119
119
|
description="Specific checkpoint name for inference routing. "
|
|
@@ -139,7 +139,7 @@ class ClaudeSDKConfig(AgentConfig):
|
|
|
139
139
|
"""
|
|
140
140
|
|
|
141
141
|
model_name: str = "Claude Code"
|
|
142
|
-
model: str = Field(default="claude-sonnet-4-
|
|
142
|
+
model: str = Field(default="claude-sonnet-4-6", validation_alias=_model_alias)
|
|
143
143
|
permission_mode: str = "bypassPermissions"
|
|
144
144
|
max_steps: int = -1
|
|
145
145
|
allowed_tools: list[str] = Field(
|
|
@@ -222,6 +222,10 @@ class Sample(BaseModel):
|
|
|
222
222
|
"""
|
|
223
223
|
|
|
224
224
|
prompt_token_ids: list[int] = Field(default_factory=list[int])
|
|
225
|
+
# Multimodal prompt as serialized ``ModelInput`` chunks (text + image), set by
|
|
226
|
+
# vision rollouts where the prompt is not a flat token list. When present it is
|
|
227
|
+
# the authoritative prompt for training; ``prompt_token_ids`` stays empty.
|
|
228
|
+
prompt_chunks: list[dict[str, Any]] | None = None
|
|
225
229
|
output_token_ids: list[int] = Field(default_factory=list[int])
|
|
226
230
|
output_logprobs: list[float] = Field(default_factory=list[float])
|
|
227
231
|
|
hud/cli/__init__.py
CHANGED
|
@@ -35,11 +35,13 @@ from .client import client_app # noqa: E402
|
|
|
35
35
|
from .deploy import deploy_command # noqa: E402
|
|
36
36
|
from .eval import eval_command # noqa: E402
|
|
37
37
|
from .init import init_command # noqa: E402
|
|
38
|
+
from .jobs import jobs_app # noqa: E402
|
|
38
39
|
from .login import login_command # noqa: E402
|
|
39
40
|
from .models import models_app # noqa: E402
|
|
40
41
|
from .serve import serve_command # noqa: E402
|
|
41
42
|
from .sync import sync_app # noqa: E402
|
|
42
43
|
from .task import task_app # noqa: E402
|
|
44
|
+
from .trace import trace_app # noqa: E402
|
|
43
45
|
|
|
44
46
|
app.command(name="serve")(serve_command)
|
|
45
47
|
app.command(name="dev", deprecated=True, hidden=True)(serve_command) # alias for now
|
|
@@ -49,6 +51,8 @@ app.command(name="eval")(eval_command)
|
|
|
49
51
|
app.command(name="init")(init_command)
|
|
50
52
|
app.command(name="cancel")(cancel_command)
|
|
51
53
|
app.add_typer(models_app, name="models")
|
|
54
|
+
app.add_typer(jobs_app, name="jobs")
|
|
55
|
+
app.add_typer(trace_app, name="trace")
|
|
52
56
|
|
|
53
57
|
|
|
54
58
|
@app.command(name="set")
|
hud/cli/eval.py
CHANGED
|
@@ -5,6 +5,7 @@ Config Override Order: CLI arguments > .hud_eval.toml > defaults
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
+
import ast
|
|
8
9
|
import asyncio
|
|
9
10
|
import logging
|
|
10
11
|
import os
|
|
@@ -42,8 +43,9 @@ def _resolve_model_from_catalog(model_id: str) -> tuple[AgentType, str] | None:
|
|
|
42
43
|
Returns None if the model isn't found or the catalog is unreachable.
|
|
43
44
|
"""
|
|
44
45
|
try:
|
|
45
|
-
from hud.utils.gateway import list_gateway_models
|
|
46
|
+
from hud.utils.gateway import list_gateway_models, normalize_gateway_model_id
|
|
46
47
|
|
|
48
|
+
model_id = normalize_gateway_model_id(model_id)
|
|
47
49
|
models = list_gateway_models()
|
|
48
50
|
except Exception:
|
|
49
51
|
return None
|
|
@@ -116,8 +118,9 @@ class AgentPreset:
|
|
|
116
118
|
|
|
117
119
|
_AGENT_PRESETS: list[AgentPreset] = [
|
|
118
120
|
AgentPreset("Claude Sonnet 4.6", AgentType.CLAUDE, "claude-sonnet-4-6"),
|
|
119
|
-
AgentPreset("
|
|
120
|
-
AgentPreset("
|
|
121
|
+
AgentPreset("Claude Opus 4.8", AgentType.CLAUDE, "claude-opus-4-8"),
|
|
122
|
+
AgentPreset("GPT-5.5", AgentType.OPENAI, "gpt-5.5"),
|
|
123
|
+
AgentPreset("Gemini 3.1 Pro (Preview)", AgentType.GEMINI, "gemini-3.1-pro-preview"),
|
|
121
124
|
AgentPreset(
|
|
122
125
|
"Grok 4-1 Fast (xAI)",
|
|
123
126
|
AgentType.OPENAI_COMPATIBLE,
|
|
@@ -130,10 +133,22 @@ _AGENT_PRESETS: list[AgentPreset] = [
|
|
|
130
133
|
},
|
|
131
134
|
),
|
|
132
135
|
AgentPreset(
|
|
133
|
-
"GLM
|
|
136
|
+
"GLM 5.2 (Z.ai)",
|
|
134
137
|
AgentType.OPENAI_COMPATIBLE,
|
|
135
|
-
"z-ai/glm-
|
|
136
|
-
{"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "GLM
|
|
138
|
+
"z-ai/glm-5.2",
|
|
139
|
+
{"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "GLM 5.2"}},
|
|
140
|
+
),
|
|
141
|
+
AgentPreset(
|
|
142
|
+
"Kimi K2.6 (Moonshot)",
|
|
143
|
+
AgentType.OPENAI_COMPATIBLE,
|
|
144
|
+
"moonshotai/kimi-k2.6",
|
|
145
|
+
{"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "Kimi K2.6"}},
|
|
146
|
+
),
|
|
147
|
+
AgentPreset(
|
|
148
|
+
"MiniMax M3",
|
|
149
|
+
AgentType.OPENAI_COMPATIBLE,
|
|
150
|
+
"MiniMax-M3",
|
|
151
|
+
{"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "MiniMax M3"}},
|
|
137
152
|
),
|
|
138
153
|
]
|
|
139
154
|
|
|
@@ -161,7 +176,7 @@ _DEFAULT_CONFIG_TEMPLATE = """# HUD Eval Configuration
|
|
|
161
176
|
# use_computer_beta = true
|
|
162
177
|
|
|
163
178
|
[openai]
|
|
164
|
-
# model = "gpt-
|
|
179
|
+
# model = "gpt-5.5"
|
|
165
180
|
# temperature = 0.7
|
|
166
181
|
# max_output_tokens = 4096
|
|
167
182
|
|
|
@@ -401,6 +416,11 @@ class EvalConfig(BaseModel):
|
|
|
401
416
|
if self.model:
|
|
402
417
|
kwargs["model"] = self.model
|
|
403
418
|
|
|
419
|
+
if isinstance(kwargs.get("model"), str):
|
|
420
|
+
from hud.utils.gateway import normalize_gateway_model_id
|
|
421
|
+
|
|
422
|
+
kwargs["model"] = normalize_gateway_model_id(kwargs["model"])
|
|
423
|
+
|
|
404
424
|
if self.agent_type == AgentType.OPENAI_COMPATIBLE and "api_key" not in kwargs:
|
|
405
425
|
base_url = kwargs.get("base_url", "")
|
|
406
426
|
if settings.hud_gateway_url in base_url and settings.api_key:
|
|
@@ -665,13 +685,46 @@ def _build_agent(cfg: EvalConfig) -> Any:
|
|
|
665
685
|
return cast("Any", cfg.agent_type.cls)(config=config)
|
|
666
686
|
|
|
667
687
|
|
|
688
|
+
def _python_defines_environment(path: Path) -> bool:
|
|
689
|
+
"""Return True when ``path`` constructs a v6 :class:`~hud.environment.Environment`."""
|
|
690
|
+
try:
|
|
691
|
+
tree = ast.parse(path.read_text(encoding="utf-8"))
|
|
692
|
+
except (OSError, SyntaxError):
|
|
693
|
+
return False
|
|
694
|
+
for node in ast.walk(tree):
|
|
695
|
+
if not isinstance(node, ast.Call):
|
|
696
|
+
continue
|
|
697
|
+
callee = node.func
|
|
698
|
+
callee_name = (
|
|
699
|
+
callee.id
|
|
700
|
+
if isinstance(callee, ast.Name)
|
|
701
|
+
else callee.attr
|
|
702
|
+
if isinstance(callee, ast.Attribute)
|
|
703
|
+
else None
|
|
704
|
+
)
|
|
705
|
+
if callee_name == "Environment":
|
|
706
|
+
return True
|
|
707
|
+
return False
|
|
708
|
+
|
|
709
|
+
|
|
668
710
|
def _spawn_target(source: Path) -> Path:
|
|
669
|
-
"""The path the ``LocalRuntime`` provider serves
|
|
670
|
-
|
|
671
|
-
|
|
711
|
+
"""The path the ``LocalRuntime`` provider serves.
|
|
712
|
+
|
|
713
|
+
Directories and env-defining ``.py`` files are served as-is. Task-only
|
|
714
|
+
sources (``tasks.py`` importing from ``env.py``) resolve to a sibling
|
|
715
|
+
``env.py`` or the containing directory. JSON/JSONL data files use the
|
|
716
|
+
surrounding directory (the env source lives next to the tasks file).
|
|
717
|
+
"""
|
|
672
718
|
resolved = source.resolve()
|
|
673
|
-
if resolved.is_dir()
|
|
719
|
+
if resolved.is_dir():
|
|
720
|
+
return resolved
|
|
721
|
+
if resolved.suffix != ".py":
|
|
722
|
+
return resolved.parent
|
|
723
|
+
if _python_defines_environment(resolved):
|
|
674
724
|
return resolved
|
|
725
|
+
env_py = resolved.parent / "env.py"
|
|
726
|
+
if env_py.is_file():
|
|
727
|
+
return env_py
|
|
675
728
|
return resolved.parent
|
|
676
729
|
|
|
677
730
|
|
hud/cli/init.py
CHANGED
|
@@ -76,8 +76,8 @@ def init_command(
|
|
|
76
76
|
None,
|
|
77
77
|
"--preset",
|
|
78
78
|
"-p",
|
|
79
|
-
help="Starter preset to download from GitHub (e.g. blank,
|
|
80
|
-
"deepresearch,
|
|
79
|
+
help="Starter preset to download from GitHub (e.g. blank, browser, "
|
|
80
|
+
"deepresearch, cua, autonomous-businesses, verilog). Omit for an interactive picker; in a "
|
|
81
81
|
"non-interactive shell, omitting it writes the minimal local scaffold.",
|
|
82
82
|
),
|
|
83
83
|
) -> None:
|
|
@@ -89,7 +89,7 @@ def init_command(
|
|
|
89
89
|
|
|
90
90
|
Examples:
|
|
91
91
|
hud init my-env # interactive picker (or local scaffold)
|
|
92
|
-
hud init my-env --preset
|
|
92
|
+
hud init my-env --preset browser # download the browser starter
|
|
93
93
|
hud init my-env --dir envs # create ./envs/my-env[/not dim]
|
|
94
94
|
"""
|
|
95
95
|
hud_console = HUDConsole()
|
hud/cli/jobs.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""``hud jobs`` — list jobs and their traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
jobs_app = typer.Typer(
|
|
15
|
+
name="jobs",
|
|
16
|
+
help="List jobs and their traces",
|
|
17
|
+
add_completion=False,
|
|
18
|
+
rich_markup_mode="rich",
|
|
19
|
+
no_args_is_help=False,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@jobs_app.callback(invoke_without_command=True)
|
|
24
|
+
def jobs_command(
|
|
25
|
+
ctx: typer.Context,
|
|
26
|
+
job_id: str | None = typer.Argument(None, help="Job ID — omit to list recent jobs"),
|
|
27
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON"),
|
|
28
|
+
limit: int = typer.Option(20, "--limit", "-n", help="Max rows to show"),
|
|
29
|
+
) -> None:
|
|
30
|
+
"""List recent jobs, or show traces for a specific job.
|
|
31
|
+
|
|
32
|
+
Without an argument, lists the most recent jobs.
|
|
33
|
+
With a job id, lists all traces for that job.
|
|
34
|
+
"""
|
|
35
|
+
if ctx.invoked_subcommand is not None:
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
from hud.cli.utils.api import require_api_key
|
|
39
|
+
|
|
40
|
+
require_api_key("list jobs")
|
|
41
|
+
|
|
42
|
+
if job_id:
|
|
43
|
+
_show_job_traces(job_id, json_output=json_output, limit=limit)
|
|
44
|
+
else:
|
|
45
|
+
_list_jobs(json_output=json_output, limit=limit)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ── job listing ────────────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _list_jobs(*, json_output: bool, limit: int) -> None:
|
|
52
|
+
from hud.utils.platform import PlatformClient
|
|
53
|
+
|
|
54
|
+
client = PlatformClient.from_settings()
|
|
55
|
+
try:
|
|
56
|
+
data = client.get("/jobs", params={"limit": limit})
|
|
57
|
+
except Exception as e:
|
|
58
|
+
console.print(f"[red]Failed to fetch jobs: {e}[/red]")
|
|
59
|
+
raise typer.Exit(1) from e
|
|
60
|
+
|
|
61
|
+
items = data if isinstance(data, list) else (data.get("items") or [])
|
|
62
|
+
|
|
63
|
+
if json_output:
|
|
64
|
+
console.print_json(json.dumps(items, indent=2, default=str))
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
if not items:
|
|
68
|
+
console.print("[yellow]No jobs found.[/yellow]")
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
console.print(Panel.fit("[bold cyan]Recent Jobs[/bold cyan]", border_style="cyan"))
|
|
72
|
+
table = Table()
|
|
73
|
+
table.add_column("ID", style="blue", no_wrap=True)
|
|
74
|
+
table.add_column("Name", style="cyan")
|
|
75
|
+
table.add_column("Taskset", style="dim")
|
|
76
|
+
table.add_column("Status", style="yellow")
|
|
77
|
+
table.add_column("Created", style="dim")
|
|
78
|
+
|
|
79
|
+
from hud.settings import settings
|
|
80
|
+
|
|
81
|
+
web = settings.hud_web_url.rstrip("/")
|
|
82
|
+
|
|
83
|
+
for job in items:
|
|
84
|
+
jid = str(job.get("id") or "")
|
|
85
|
+
table.add_row(
|
|
86
|
+
jid,
|
|
87
|
+
job.get("name") or "-",
|
|
88
|
+
job.get("taskset_name") or "-",
|
|
89
|
+
job.get("status") or "-",
|
|
90
|
+
(str(job.get("created_at") or ""))[:19],
|
|
91
|
+
)
|
|
92
|
+
console.print(table)
|
|
93
|
+
console.print(f"\n[dim]View: {web}/jobs[/dim]")
|
|
94
|
+
console.print("[dim]Tip: hud jobs <id> to see traces for a specific job[/dim]")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ── job traces ────────────────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _show_job_traces(job_id: str, *, json_output: bool, limit: int) -> None:
|
|
101
|
+
from hud.settings import settings
|
|
102
|
+
from hud.utils.platform import PlatformClient
|
|
103
|
+
|
|
104
|
+
client = PlatformClient.from_settings()
|
|
105
|
+
try:
|
|
106
|
+
data = client.get(f"/jobs/{job_id}/traces", params={"limit": limit})
|
|
107
|
+
except Exception as e:
|
|
108
|
+
console.print(f"[red]Failed to fetch traces: {e}[/red]")
|
|
109
|
+
raise typer.Exit(1) from e
|
|
110
|
+
|
|
111
|
+
items = data if isinstance(data, list) else (data.get("items") or [])
|
|
112
|
+
|
|
113
|
+
if json_output:
|
|
114
|
+
console.print_json(json.dumps(items, indent=2, default=str))
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
web = settings.hud_web_url.rstrip("/")
|
|
118
|
+
|
|
119
|
+
if not items:
|
|
120
|
+
console.print("[yellow]No traces found for this job.[/yellow]")
|
|
121
|
+
console.print(f"[dim]View: {web}/jobs/{job_id}[/dim]")
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
console.print(
|
|
125
|
+
Panel.fit(f"[bold cyan]Job Traces[/bold cyan] [dim]{job_id}[/dim]", border_style="cyan")
|
|
126
|
+
)
|
|
127
|
+
table = Table()
|
|
128
|
+
table.add_column("Trace ID", style="blue", no_wrap=True)
|
|
129
|
+
table.add_column("Status", style="yellow")
|
|
130
|
+
table.add_column("Reward", style="green", justify="right")
|
|
131
|
+
table.add_column("Started", style="dim")
|
|
132
|
+
table.add_column("Error", style="red")
|
|
133
|
+
|
|
134
|
+
for tr in items:
|
|
135
|
+
tid = str(tr.get("id") or "")
|
|
136
|
+
reward = tr.get("reward")
|
|
137
|
+
table.add_row(
|
|
138
|
+
tid,
|
|
139
|
+
tr.get("status") or "-",
|
|
140
|
+
f"{reward:.3f}" if reward is not None else "-",
|
|
141
|
+
(str(tr.get("start_time") or tr.get("created_at") or ""))[:19],
|
|
142
|
+
(tr.get("error") or "")[:40],
|
|
143
|
+
)
|
|
144
|
+
console.print(table)
|
|
145
|
+
console.print(f"\n[dim]View: {web}/jobs/{job_id}[/dim]")
|
|
146
|
+
console.print("[dim]Tip: hud trace <trace_id> to inspect a specific rollout[/dim]")
|
hud/cli/models.py
CHANGED
|
@@ -71,6 +71,8 @@ def list_models(
|
|
|
71
71
|
)
|
|
72
72
|
console.print(table)
|
|
73
73
|
console.print(f"\n[dim]Gateway: {settings.hud_gateway_url}[/dim]")
|
|
74
|
+
web = settings.hud_web_url.rstrip("/")
|
|
75
|
+
console.print(f"[dim]View a model in the browser: {web}/models/<id>[/dim]")
|
|
74
76
|
|
|
75
77
|
|
|
76
78
|
@models_app.command("fork")
|
|
@@ -116,6 +118,7 @@ def fork_model(
|
|
|
116
118
|
)
|
|
117
119
|
)
|
|
118
120
|
console.print(f"\n[dim]Train it: hud.TrainingClient({slug!r})[/dim]")
|
|
121
|
+
console.print(f"[dim]View: {_model_url(model['id'])}[/dim]")
|
|
119
122
|
|
|
120
123
|
|
|
121
124
|
@models_app.command("checkpoints")
|
|
@@ -127,13 +130,15 @@ def list_checkpoints(
|
|
|
127
130
|
from hud.cli.utils.api import require_api_key
|
|
128
131
|
|
|
129
132
|
require_api_key("list checkpoints")
|
|
130
|
-
|
|
133
|
+
model_id = _resolve_model_id(model)
|
|
134
|
+
checkpoints = _get_checkpoints(model_id)
|
|
131
135
|
|
|
132
136
|
if json_output:
|
|
133
137
|
console.print_json(json.dumps(checkpoints, indent=2))
|
|
134
138
|
return
|
|
135
139
|
if not checkpoints:
|
|
136
140
|
console.print("[yellow]No checkpoints yet — this model serves its base weights[/yellow]")
|
|
141
|
+
console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
|
|
137
142
|
return
|
|
138
143
|
|
|
139
144
|
checkpoints = sorted(checkpoints, key=lambda c: c.get("created_at") or "")
|
|
@@ -155,6 +160,7 @@ def list_checkpoints(
|
|
|
155
160
|
(ckpt.get("created_at") or "")[:19],
|
|
156
161
|
)
|
|
157
162
|
console.print(table)
|
|
163
|
+
console.print(f"\n[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
|
|
158
164
|
|
|
159
165
|
|
|
160
166
|
@models_app.command("head")
|
|
@@ -170,19 +176,22 @@ def show_head(
|
|
|
170
176
|
from hud.cli.utils.api import require_api_key
|
|
171
177
|
|
|
172
178
|
require_api_key("manage head")
|
|
179
|
+
model_id = _resolve_model_id(model)
|
|
173
180
|
|
|
174
181
|
if set_to is not None:
|
|
175
|
-
_set_head(
|
|
182
|
+
_set_head(model_id, set_to)
|
|
176
183
|
console.print(f"[green]Head set to[/green] [cyan]{set_to}[/cyan]")
|
|
184
|
+
console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
|
|
177
185
|
return
|
|
178
186
|
|
|
179
|
-
head = next((c for c in _get_checkpoints(
|
|
187
|
+
head = next((c for c in _get_checkpoints(model_id) if c.get("is_active")), None)
|
|
180
188
|
|
|
181
189
|
if json_output:
|
|
182
190
|
console.print_json(json.dumps(head, indent=2))
|
|
183
191
|
return
|
|
184
192
|
if head is None:
|
|
185
193
|
console.print("[yellow]No active checkpoint — this model serves its base weights[/yellow]")
|
|
194
|
+
console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
|
|
186
195
|
return
|
|
187
196
|
|
|
188
197
|
reward = head.get("mean_reward")
|
|
@@ -196,6 +205,15 @@ def show_head(
|
|
|
196
205
|
border_style="green",
|
|
197
206
|
)
|
|
198
207
|
)
|
|
208
|
+
console.print(f"[dim]View: {_model_url(model_id, tab='checkpoints')}[/dim]")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _model_url(model_id: str, *, tab: str | None = None) -> str:
|
|
212
|
+
"""Web app URL for a model (optionally a specific tab, e.g. ``checkpoints``)."""
|
|
213
|
+
from hud.settings import settings
|
|
214
|
+
|
|
215
|
+
url = f"{settings.hud_web_url.rstrip('/')}/models/{model_id}"
|
|
216
|
+
return f"{url}?tab={tab}" if tab else url
|
|
199
217
|
|
|
200
218
|
|
|
201
219
|
def _resolve_model_id(model: str) -> str:
|
hud/cli/templates.py
CHANGED
|
@@ -13,7 +13,7 @@ COPY . .
|
|
|
13
13
|
|
|
14
14
|
# Serve the Environment's control channel (tcp JSON-RPC) on 8765.
|
|
15
15
|
EXPOSE 8765
|
|
16
|
-
CMD ["uv", "run", "
|
|
16
|
+
CMD ["uv", "run", "hud", "serve", "env:env", "--host", "0.0.0.0", "--port", "8765"]
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
19
|
# fmt: off
|
|
@@ -78,7 +78,7 @@ async def count(sentence: str, letter: str):
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
# =============================================================================
|
|
81
|
-
# TEST - run with: python env.py
|
|
81
|
+
# TEST - run with: uv run python env.py
|
|
82
82
|
# =============================================================================
|
|
83
83
|
|
|
84
84
|
async def test():
|
|
@@ -136,7 +136,6 @@ version = "0.1.0"
|
|
|
136
136
|
requires-python = ">=3.11"
|
|
137
137
|
dependencies = ["hud-python"]
|
|
138
138
|
|
|
139
|
-
[
|
|
140
|
-
|
|
141
|
-
build-backend = "hatchling.build"
|
|
139
|
+
[tool.uv]
|
|
140
|
+
package = false
|
|
142
141
|
"""
|
hud/cli/tests/test_deploy.py
CHANGED
|
@@ -48,7 +48,7 @@ class TestResolveEnvironmentName:
|
|
|
48
48
|
|
|
49
49
|
def test_entrypoint_disambiguates_subagent(self, tmp_path: Path) -> None:
|
|
50
50
|
(tmp_path / "Dockerfile").write_text(
|
|
51
|
-
'CMD ["hud", "
|
|
51
|
+
'CMD ["hud", "serve", "env:env", "--port", "8765"]\n', encoding="utf-8"
|
|
52
52
|
)
|
|
53
53
|
(tmp_path / "env.py").write_text('env = Environment("trace-explorer")\n', encoding="utf-8")
|
|
54
54
|
(tmp_path / "verify.py").write_text(
|