hud-python 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +8 -0
- hud/agents/claude.py +4 -3
- hud/agents/openai.py +2 -1
- hud/agents/openai_chat_generic.py +3 -2
- hud/agents/tests/test_claude.py +2 -2
- hud/agents/tests/test_openai.py +1 -1
- hud/agents/utils.py +50 -0
- hud/cli/__init__.py +65 -9
- hud/cli/build.py +185 -25
- hud/cli/dev.py +130 -40
- hud/cli/eval.py +123 -24
- hud/cli/flows/dev.py +155 -0
- hud/cli/flows/tasks.py +29 -9
- hud/cli/tests/test_eval.py +6 -6
- hud/cli/utils/docker.py +6 -3
- hud/clients/base.py +2 -2
- hud/otel/context.py +42 -1
- hud/server/server.py +29 -3
- hud/settings.py +6 -0
- hud/telemetry/async_context.py +16 -2
- hud/telemetry/trace.py +6 -1
- hud/types.py +10 -0
- hud/utils/group_eval.py +14 -2
- hud/utils/tests/test_agent_factories.py +2 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/METADATA +8 -7
- {hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/RECORD +31 -29
- {hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/WHEEL +0 -0
- {hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/licenses/LICENSE +0 -0
hud/cli/flows/dev.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import contextlib
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from hud.settings import settings
|
|
10
|
+
from hud.shared.requests import make_request
|
|
11
|
+
from hud.utils.hud_console import hud_console
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def create_dynamic_trace(
|
|
17
|
+
*,
|
|
18
|
+
mcp_config: dict[str, dict[str, Any]],
|
|
19
|
+
build_status: bool,
|
|
20
|
+
environment_name: str,
|
|
21
|
+
) -> str | None:
|
|
22
|
+
"""
|
|
23
|
+
Create a dynamic trace for HUD dev sessions when running in HTTP mode.
|
|
24
|
+
|
|
25
|
+
Sends a POST to the HUD API with:
|
|
26
|
+
- mcp_config: points to the local MCP config (same as Cursor)
|
|
27
|
+
- build_status: True if Docker mode (built image), False if basic Python mode
|
|
28
|
+
- environment_name: Name of the environment/server/image
|
|
29
|
+
|
|
30
|
+
Returns the full URL to the live trace when successful, otherwise None.
|
|
31
|
+
"""
|
|
32
|
+
api_base = settings.hud_api_url.rstrip("/")
|
|
33
|
+
# Endpoint TBD; use a sensible default path that the backend can wire up
|
|
34
|
+
url = f"{api_base}/dev/dynamic-traces"
|
|
35
|
+
|
|
36
|
+
payload = {
|
|
37
|
+
"mcp_config": mcp_config,
|
|
38
|
+
"build_status": bool(build_status),
|
|
39
|
+
"environment_name": environment_name,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Best-effort; if missing API key, log and continue
|
|
43
|
+
api_key = settings.api_key
|
|
44
|
+
if not api_key:
|
|
45
|
+
logger.warning("Skipping dynamic trace creation; missing HUD_API_KEY")
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
resp = await make_request("POST", url=url, json=payload, api_key=api_key)
|
|
50
|
+
# New API returns an id; construct the URL as https://hud.so/trace/{id}
|
|
51
|
+
trace_id = None
|
|
52
|
+
if isinstance(resp, dict):
|
|
53
|
+
trace_id = resp.get("id")
|
|
54
|
+
if trace_id is None:
|
|
55
|
+
data = resp.get("data", {}) or {}
|
|
56
|
+
if isinstance(data, dict):
|
|
57
|
+
trace_id = data.get("id")
|
|
58
|
+
# Backcompat: if url is provided directly
|
|
59
|
+
if not trace_id:
|
|
60
|
+
direct_url = resp.get("url") or (resp.get("data", {}) or {}).get("url")
|
|
61
|
+
if isinstance(direct_url, str) and direct_url:
|
|
62
|
+
return direct_url
|
|
63
|
+
|
|
64
|
+
if isinstance(trace_id, str) and trace_id:
|
|
65
|
+
return f"https://hud.so/trace/{trace_id}"
|
|
66
|
+
return None
|
|
67
|
+
except Exception as e:
|
|
68
|
+
# Do not interrupt dev flow
|
|
69
|
+
try:
|
|
70
|
+
preview = json.dumps(payload)[:500]
|
|
71
|
+
logger.warning("Failed to create dynamic dev trace: %s | payload=%s", e, preview)
|
|
72
|
+
except Exception:
|
|
73
|
+
logger.warning("Failed to create dynamic dev trace: %s", e)
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def show_dev_ui(
|
|
78
|
+
*,
|
|
79
|
+
live_trace_url: str,
|
|
80
|
+
server_name: str,
|
|
81
|
+
port: int,
|
|
82
|
+
cursor_deeplink: str,
|
|
83
|
+
is_docker: bool = False,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Show the minimal dev UI with live trace link.
|
|
87
|
+
|
|
88
|
+
This is called only when we have a successful trace URL.
|
|
89
|
+
For full UI mode, the caller should use show_dev_server_info() directly.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
live_trace_url: URL to the live trace
|
|
93
|
+
server_name: Name of the server/image
|
|
94
|
+
port: Port the server is running on
|
|
95
|
+
cursor_deeplink: Pre-generated Cursor deeplink URL
|
|
96
|
+
is_docker: Whether this is Docker mode (affects hot-reload message)
|
|
97
|
+
"""
|
|
98
|
+
import webbrowser
|
|
99
|
+
|
|
100
|
+
from rich.panel import Panel
|
|
101
|
+
|
|
102
|
+
# Show header first
|
|
103
|
+
hud_console.header("HUD Development Server", icon="🚀")
|
|
104
|
+
|
|
105
|
+
# Try to open the live trace in the default browser
|
|
106
|
+
with contextlib.suppress(Exception):
|
|
107
|
+
# new=2 -> open in a new tab, if possible
|
|
108
|
+
webbrowser.open(live_trace_url, new=2)
|
|
109
|
+
|
|
110
|
+
# Show panel with just the link
|
|
111
|
+
# Center the link and style it: blue, bold, underlined
|
|
112
|
+
link_markup = f"[bold underline rgb(108,113,196)][link={live_trace_url}]{live_trace_url}[/link][/bold underline rgb(108,113,196)]" # noqa: E501
|
|
113
|
+
# Use center alignment by surrounding with spaces via justify
|
|
114
|
+
from rich.align import Align
|
|
115
|
+
|
|
116
|
+
panel = Panel(
|
|
117
|
+
Align.center(link_markup),
|
|
118
|
+
title="🔗 Live Dev Trace",
|
|
119
|
+
border_style="rgb(192,150,12)", # HUD gold
|
|
120
|
+
padding=(1, 2),
|
|
121
|
+
)
|
|
122
|
+
hud_console.console.print(panel)
|
|
123
|
+
|
|
124
|
+
# Show other info below
|
|
125
|
+
label = "Base image" if is_docker else "Server"
|
|
126
|
+
hud_console.info("")
|
|
127
|
+
hud_console.info(f"{hud_console.sym.ITEM} {label}: {server_name}")
|
|
128
|
+
hud_console.info(f"{hud_console.sym.ITEM} Cursor: {cursor_deeplink}")
|
|
129
|
+
hud_console.info("")
|
|
130
|
+
hud_console.info(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
|
|
131
|
+
if is_docker:
|
|
132
|
+
hud_console.dim_info(
|
|
133
|
+
"",
|
|
134
|
+
"Container restarts on file changes (mounted volumes), "
|
|
135
|
+
"if changing tools run hud dev again",
|
|
136
|
+
)
|
|
137
|
+
hud_console.info("")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def generate_cursor_deeplink(server_name: str, port: int) -> str:
|
|
141
|
+
"""Generate a Cursor deeplink for the MCP server.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
server_name: Name of the server
|
|
145
|
+
port: Port the server is running on
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Cursor deeplink URL
|
|
149
|
+
"""
|
|
150
|
+
server_config = {"url": f"http://localhost:{port}/mcp"}
|
|
151
|
+
config_json = json.dumps(server_config, indent=2)
|
|
152
|
+
config_base64 = base64.b64encode(config_json.encode()).decode()
|
|
153
|
+
return (
|
|
154
|
+
f"cursor://anysphere.cursor-deeplink/mcp/install?name={server_name}&config={config_base64}"
|
|
155
|
+
)
|
hud/cli/flows/tasks.py
CHANGED
|
@@ -11,7 +11,7 @@ import yaml
|
|
|
11
11
|
|
|
12
12
|
from hud.cli.push import push_environment
|
|
13
13
|
from hud.cli.utils.docker import require_docker_running
|
|
14
|
-
from hud.cli.utils.env_check import
|
|
14
|
+
from hud.cli.utils.env_check import find_environment_dir
|
|
15
15
|
from hud.cli.utils.registry import extract_name_and_tag
|
|
16
16
|
from hud.utils.hud_console import hud_console
|
|
17
17
|
from hud.utils.tasks import load_tasks
|
|
@@ -56,7 +56,9 @@ def _validate_tasks(tasks: list[Task]) -> bool:
|
|
|
56
56
|
return True
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
def _ensure_pushed(
|
|
59
|
+
def _ensure_pushed(
|
|
60
|
+
env_dir: Path, lock_data: dict[str, Any], check_docker: bool = True
|
|
61
|
+
) -> dict[str, Any]:
|
|
60
62
|
"""Ensure the environment is pushed to a registry; return updated lock data."""
|
|
61
63
|
pushed = bool(lock_data.get("push"))
|
|
62
64
|
if not pushed:
|
|
@@ -64,7 +66,8 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
|
|
|
64
66
|
if not hud_console.confirm("Push to a registry now (runs 'hud push')?", default=True):
|
|
65
67
|
raise typer.Exit(1)
|
|
66
68
|
# Check Docker availability before attempting a push
|
|
67
|
-
|
|
69
|
+
if check_docker:
|
|
70
|
+
require_docker_running()
|
|
68
71
|
|
|
69
72
|
# If Docker or login is not configured, the push function will fail and halt.
|
|
70
73
|
push_environment(str(env_dir), yes=True)
|
|
@@ -293,9 +296,24 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
293
296
|
hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
|
|
294
297
|
raise typer.Exit(1)
|
|
295
298
|
|
|
296
|
-
#
|
|
297
|
-
|
|
298
|
-
|
|
299
|
+
# For convert command, we don't need Docker running - just check for lock file
|
|
300
|
+
# This avoids showing Docker-related messages during conversion
|
|
301
|
+
lock_path = env_dir / "hud.lock.yaml"
|
|
302
|
+
if not lock_path.exists():
|
|
303
|
+
hud_console.error("No hud.lock.yaml found. The environment needs to be built first.")
|
|
304
|
+
hud_console.info("Run 'hud build' in the environment directory to build it.")
|
|
305
|
+
raise typer.Exit(1)
|
|
306
|
+
|
|
307
|
+
# Load lock data directly
|
|
308
|
+
try:
|
|
309
|
+
with open(lock_path) as f:
|
|
310
|
+
lock_data: dict[str, Any] = yaml.safe_load(f) or {}
|
|
311
|
+
except Exception as e:
|
|
312
|
+
hud_console.error(f"Failed to read hud.lock.yaml: {e}")
|
|
313
|
+
raise typer.Exit(1) from e
|
|
314
|
+
|
|
315
|
+
# Check if pushed - don't check Docker for convert command
|
|
316
|
+
lock_data = _ensure_pushed(env_dir, lock_data, check_docker=False)
|
|
299
317
|
|
|
300
318
|
# Derive remote image name org/name:tag
|
|
301
319
|
remote_image = _derive_remote_image(lock_data)
|
|
@@ -387,8 +405,11 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
387
405
|
f"Detected env vars in .env that look like API keys: {names_preview}.\n"
|
|
388
406
|
"Include them as remote headers (values will be ${VAR} placeholders)?"
|
|
389
407
|
)
|
|
390
|
-
if hud_console.confirm(prompt, default=True):
|
|
391
|
-
|
|
408
|
+
if not hud_console.confirm(prompt, default=True):
|
|
409
|
+
# User cancelled - exit without creating the file
|
|
410
|
+
hud_console.info("Conversion cancelled by user")
|
|
411
|
+
raise typer.Exit(0)
|
|
412
|
+
all_detected.update(missing)
|
|
392
413
|
|
|
393
414
|
# Final set of env vars to convert to headers
|
|
394
415
|
provided_keys = all_detected
|
|
@@ -461,6 +482,5 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
461
482
|
f.write("\n")
|
|
462
483
|
|
|
463
484
|
hud_console.success(f"Created remote tasks file: {remote_path.name}")
|
|
464
|
-
hud_console.hint("Proceeding with RL training on the remote environment")
|
|
465
485
|
|
|
466
486
|
return str(remote_path)
|
hud/cli/tests/test_eval.py
CHANGED
|
@@ -11,7 +11,7 @@ from hud.cli.eval import (
|
|
|
11
11
|
build_agent,
|
|
12
12
|
run_single_task,
|
|
13
13
|
)
|
|
14
|
-
from hud.types import Task, Trace
|
|
14
|
+
from hud.types import AgentType, Task, Trace
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class TestBuildAgent:
|
|
@@ -26,7 +26,7 @@ class TestBuildAgent:
|
|
|
26
26
|
mock_runner.return_value = mock_instance
|
|
27
27
|
|
|
28
28
|
# Test with verbose=False
|
|
29
|
-
result = build_agent(
|
|
29
|
+
result = build_agent(AgentType.INTEGRATION_TEST, verbose=False)
|
|
30
30
|
|
|
31
31
|
mock_runner.assert_called_once_with(verbose=False)
|
|
32
32
|
assert result == mock_instance
|
|
@@ -40,7 +40,7 @@ class TestBuildAgent:
|
|
|
40
40
|
mock_runner.return_value = mock_instance
|
|
41
41
|
|
|
42
42
|
# Test with verbose=False
|
|
43
|
-
result = build_agent(
|
|
43
|
+
result = build_agent(AgentType.CLAUDE, verbose=False)
|
|
44
44
|
|
|
45
45
|
mock_runner.assert_called_once_with(model="claude-sonnet-4-20250514", verbose=False)
|
|
46
46
|
assert result == mock_instance
|
|
@@ -55,7 +55,7 @@ class TestBuildAgent:
|
|
|
55
55
|
|
|
56
56
|
# Test with verbose=False
|
|
57
57
|
result = build_agent(
|
|
58
|
-
|
|
58
|
+
AgentType.CLAUDE,
|
|
59
59
|
model="claude-sonnet-4-20250514",
|
|
60
60
|
allowed_tools=["act"],
|
|
61
61
|
verbose=True,
|
|
@@ -97,7 +97,7 @@ class TestRunSingleTask:
|
|
|
97
97
|
patch("hud.cli.eval.find_environment_dir", return_value=None),
|
|
98
98
|
patch("hud.cli.eval.hud.trace"),
|
|
99
99
|
):
|
|
100
|
-
await run_single_task("test.json", agent_type=
|
|
100
|
+
await run_single_task("test.json", agent_type=AgentType.INTEGRATION_TEST, max_steps=10)
|
|
101
101
|
|
|
102
102
|
# Verify agent.run was called with the task containing agent_config
|
|
103
103
|
mock_agent.run.assert_called_once()
|
|
@@ -119,7 +119,7 @@ class TestRunSingleTask:
|
|
|
119
119
|
mock_grouped.return_value = [{"task": mock_task, "rewards": [1.0, 0.5]}]
|
|
120
120
|
|
|
121
121
|
await run_single_task(
|
|
122
|
-
"test.json", agent_type=
|
|
122
|
+
"test.json", agent_type=AgentType.INTEGRATION_TEST, group_size=3, max_steps=10
|
|
123
123
|
)
|
|
124
124
|
|
|
125
125
|
# Verify run_tasks_grouped was called with correct group_size
|
hud/cli/utils/docker.py
CHANGED
|
@@ -308,7 +308,10 @@ def require_docker_running() -> None:
|
|
|
308
308
|
"Is Docker running? Open Docker Desktop and wait until it reports 'Running'"
|
|
309
309
|
)
|
|
310
310
|
raise typer.Exit(1) from e
|
|
311
|
-
except
|
|
312
|
-
|
|
311
|
+
except typer.Exit:
|
|
312
|
+
# Propagate cleanly without extra noise; hints already printed above
|
|
313
|
+
raise
|
|
314
|
+
except Exception:
|
|
315
|
+
# Unknown failure - keep output minimal and avoid stack traces
|
|
313
316
|
hud_console.hint("Is the Docker daemon running?")
|
|
314
|
-
raise typer.Exit(1)
|
|
317
|
+
raise typer.Exit(1) # noqa: B904
|
hud/clients/base.py
CHANGED
|
@@ -146,7 +146,7 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
146
146
|
except HudException:
|
|
147
147
|
raise
|
|
148
148
|
except Exception as e:
|
|
149
|
-
|
|
149
|
+
hud_console.error(f"Failed to initialize MCP client: {e}")
|
|
150
150
|
raise HudException from e
|
|
151
151
|
|
|
152
152
|
# Common hud behavior - fetch telemetry
|
|
@@ -333,7 +333,7 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
333
333
|
tool_info = {
|
|
334
334
|
"name": tool.name,
|
|
335
335
|
"description": tool.description,
|
|
336
|
-
"
|
|
336
|
+
"inputSchema": tool.inputSchema,
|
|
337
337
|
}
|
|
338
338
|
analysis["tools"].append(tool_info)
|
|
339
339
|
|
hud/otel/context.py
CHANGED
|
@@ -6,6 +6,7 @@ User-facing APIs are in hud.telemetry.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
import contextlib
|
|
9
10
|
import contextvars
|
|
10
11
|
import logging
|
|
11
12
|
from contextlib import contextmanager
|
|
@@ -232,6 +233,8 @@ async def _update_task_status_async(
|
|
|
232
233
|
error_message: str | None = None,
|
|
233
234
|
trace_name: str | None = None,
|
|
234
235
|
task_id: str | None = None,
|
|
236
|
+
group_id: str | None = None,
|
|
237
|
+
extra_metadata: dict[str, Any] | None = None,
|
|
235
238
|
) -> None:
|
|
236
239
|
"""Async task status update."""
|
|
237
240
|
if not settings.telemetry_enabled:
|
|
@@ -271,12 +274,20 @@ async def _update_task_status_async(
|
|
|
271
274
|
metadata["mcp_tool_steps"] = get_mcp_tool_steps()
|
|
272
275
|
metadata["agent_steps"] = get_agent_steps()
|
|
273
276
|
|
|
277
|
+
# Merge any extra metadata provided by callers (e.g., task config summaries)
|
|
278
|
+
if extra_metadata:
|
|
279
|
+
with contextlib.suppress(Exception):
|
|
280
|
+
metadata.update(extra_metadata)
|
|
281
|
+
|
|
274
282
|
if metadata:
|
|
275
283
|
data["metadata"] = metadata
|
|
276
284
|
|
|
277
285
|
if task_id:
|
|
278
286
|
data["task_id"] = task_id
|
|
279
287
|
|
|
288
|
+
if group_id:
|
|
289
|
+
data["group_id"] = group_id
|
|
290
|
+
|
|
280
291
|
await make_request(
|
|
281
292
|
method="POST",
|
|
282
293
|
url=f"{settings.hud_telemetry_url}/trace/{task_run_id}/status",
|
|
@@ -297,10 +308,21 @@ def _fire_and_forget_status_update(
|
|
|
297
308
|
error_message: str | None = None,
|
|
298
309
|
trace_name: str | None = None,
|
|
299
310
|
task_id: str | None = None,
|
|
311
|
+
group_id: str | None = None,
|
|
312
|
+
extra_metadata: dict[str, Any] | None = None,
|
|
300
313
|
) -> None:
|
|
301
314
|
"""Fire and forget status update - works in any context including Jupyter."""
|
|
302
315
|
fire_and_forget(
|
|
303
|
-
_update_task_status_async(
|
|
316
|
+
_update_task_status_async(
|
|
317
|
+
task_run_id,
|
|
318
|
+
status,
|
|
319
|
+
job_id,
|
|
320
|
+
error_message,
|
|
321
|
+
trace_name,
|
|
322
|
+
task_id,
|
|
323
|
+
group_id,
|
|
324
|
+
extra_metadata,
|
|
325
|
+
),
|
|
304
326
|
f"update task {task_run_id} status to {status}",
|
|
305
327
|
)
|
|
306
328
|
|
|
@@ -312,6 +334,8 @@ def _update_task_status_sync(
|
|
|
312
334
|
error_message: str | None = None,
|
|
313
335
|
trace_name: str | None = None,
|
|
314
336
|
task_id: str | None = None,
|
|
337
|
+
group_id: str | None = None,
|
|
338
|
+
extra_metadata: dict[str, Any] | None = None,
|
|
315
339
|
) -> None:
|
|
316
340
|
"""Synchronous task status update."""
|
|
317
341
|
if not settings.telemetry_enabled:
|
|
@@ -351,12 +375,20 @@ def _update_task_status_sync(
|
|
|
351
375
|
metadata["mcp_tool_steps"] = get_mcp_tool_steps()
|
|
352
376
|
metadata["agent_steps"] = get_agent_steps()
|
|
353
377
|
|
|
378
|
+
# Merge any extra metadata provided by callers
|
|
379
|
+
if extra_metadata:
|
|
380
|
+
with contextlib.suppress(Exception):
|
|
381
|
+
metadata.update(extra_metadata)
|
|
382
|
+
|
|
354
383
|
if metadata:
|
|
355
384
|
data["metadata"] = metadata
|
|
356
385
|
|
|
357
386
|
if task_id:
|
|
358
387
|
data["task_id"] = task_id
|
|
359
388
|
|
|
389
|
+
if group_id:
|
|
390
|
+
data["group_id"] = group_id
|
|
391
|
+
|
|
360
392
|
make_request_sync(
|
|
361
393
|
method="POST",
|
|
362
394
|
url=f"{settings.hud_telemetry_url}/trace/{task_run_id}/status",
|
|
@@ -447,10 +479,12 @@ class trace:
|
|
|
447
479
|
attributes: dict[str, Any] | None = None,
|
|
448
480
|
job_id: str | None = None,
|
|
449
481
|
task_id: str | None = None,
|
|
482
|
+
group_id: str | None = None,
|
|
450
483
|
) -> None:
|
|
451
484
|
self.task_run_id = task_run_id
|
|
452
485
|
self.job_id = job_id
|
|
453
486
|
self.task_id = task_id
|
|
487
|
+
self.group_id = group_id
|
|
454
488
|
self.is_root = is_root
|
|
455
489
|
self.span_name = span_name
|
|
456
490
|
self.attributes = attributes or {}
|
|
@@ -473,6 +507,8 @@ class trace:
|
|
|
473
507
|
ctx = baggage.set_baggage("hud.job_id", self.job_id, context=ctx)
|
|
474
508
|
if self.task_id:
|
|
475
509
|
ctx = baggage.set_baggage("hud.task_id", self.task_id, context=ctx)
|
|
510
|
+
if self.group_id:
|
|
511
|
+
ctx = baggage.set_baggage("hud.group_id", self.group_id, context=ctx)
|
|
476
512
|
self._otel_token = context.attach(ctx)
|
|
477
513
|
|
|
478
514
|
# Start a span as current
|
|
@@ -486,6 +522,8 @@ class trace:
|
|
|
486
522
|
span_attrs["hud.job_id"] = self.job_id
|
|
487
523
|
if self.task_id:
|
|
488
524
|
span_attrs["hud.task_id"] = self.task_id
|
|
525
|
+
if self.group_id:
|
|
526
|
+
span_attrs["hud.group_id"] = self.group_id
|
|
489
527
|
|
|
490
528
|
# Use start_as_current_span context manager
|
|
491
529
|
self._span_manager = tracer.start_as_current_span(
|
|
@@ -502,6 +540,7 @@ class trace:
|
|
|
502
540
|
job_id=self.job_id,
|
|
503
541
|
trace_name=self.span_name,
|
|
504
542
|
task_id=self.task_id,
|
|
543
|
+
group_id=self.group_id,
|
|
505
544
|
)
|
|
506
545
|
# Print the nice trace URL box (only if not part of a job)
|
|
507
546
|
if not self.job_id:
|
|
@@ -528,6 +567,7 @@ class trace:
|
|
|
528
567
|
error_message=str(exc_val),
|
|
529
568
|
trace_name=self.span_name,
|
|
530
569
|
task_id=self.task_id,
|
|
570
|
+
group_id=self.group_id,
|
|
531
571
|
)
|
|
532
572
|
# Print error completion message (only if not part of a job)
|
|
533
573
|
if not self.job_id:
|
|
@@ -540,6 +580,7 @@ class trace:
|
|
|
540
580
|
job_id=self.job_id,
|
|
541
581
|
trace_name=self.span_name,
|
|
542
582
|
task_id=self.task_id,
|
|
583
|
+
group_id=self.group_id,
|
|
543
584
|
)
|
|
544
585
|
# Print success completion message (only if not part of a job)
|
|
545
586
|
if not self.job_id:
|
hud/server/server.py
CHANGED
|
@@ -311,11 +311,35 @@ class MCPServer(FastMCP):
|
|
|
311
311
|
if transport is None:
|
|
312
312
|
transport = "stdio"
|
|
313
313
|
|
|
314
|
-
# Register HTTP helpers for HTTP transport
|
|
314
|
+
# Register HTTP helpers and CORS for HTTP transport
|
|
315
315
|
if transport in ("http", "sse"):
|
|
316
316
|
self._register_hud_helpers()
|
|
317
317
|
logger.info("Registered HUD helper endpoints at /hud/*")
|
|
318
318
|
|
|
319
|
+
# Add CORS middleware if not already provided
|
|
320
|
+
from starlette.middleware import Middleware
|
|
321
|
+
from starlette.middleware.cors import CORSMiddleware
|
|
322
|
+
|
|
323
|
+
# Get or create middleware list
|
|
324
|
+
middleware = transport_kwargs.get("middleware", [])
|
|
325
|
+
if isinstance(middleware, list):
|
|
326
|
+
# Check if CORS is already configured
|
|
327
|
+
has_cors = any(
|
|
328
|
+
isinstance(m, Middleware) and m.cls == CORSMiddleware for m in middleware
|
|
329
|
+
)
|
|
330
|
+
if not has_cors:
|
|
331
|
+
# Add CORS with permissive defaults for dev
|
|
332
|
+
cors_middleware = Middleware(
|
|
333
|
+
CORSMiddleware,
|
|
334
|
+
allow_origins=["*"],
|
|
335
|
+
allow_methods=["GET", "POST", "DELETE", "OPTIONS"],
|
|
336
|
+
allow_headers=["*"],
|
|
337
|
+
expose_headers=["Mcp-Session-Id"],
|
|
338
|
+
)
|
|
339
|
+
middleware = [cors_middleware, *middleware]
|
|
340
|
+
transport_kwargs["middleware"] = middleware
|
|
341
|
+
logger.info("Added CORS middleware for browser compatibility")
|
|
342
|
+
|
|
319
343
|
try:
|
|
320
344
|
await super().run_async(
|
|
321
345
|
transport=transport, show_banner=show_banner, **transport_kwargs
|
|
@@ -506,9 +530,11 @@ class MCPServer(FastMCP):
|
|
|
506
530
|
return str(obj)
|
|
507
531
|
|
|
508
532
|
serialized = serialize_obj(result)
|
|
509
|
-
|
|
533
|
+
# Return the serialized CallToolResult directly (no wrapper)
|
|
534
|
+
return JSONResponse(serialized)
|
|
510
535
|
except Exception as e:
|
|
511
|
-
|
|
536
|
+
# Return a simple error object
|
|
537
|
+
return JSONResponse({"error": str(e)}, status_code=400)
|
|
512
538
|
|
|
513
539
|
return tool_endpoint
|
|
514
540
|
|
hud/settings.py
CHANGED
|
@@ -70,6 +70,12 @@ class Settings(BaseSettings):
|
|
|
70
70
|
validation_alias="HUD_RL_URL",
|
|
71
71
|
)
|
|
72
72
|
|
|
73
|
+
hud_api_url: str = Field(
|
|
74
|
+
default="https://api.hud.so",
|
|
75
|
+
description="Base URL for the HUD API server",
|
|
76
|
+
validation_alias="HUD_API_URL",
|
|
77
|
+
)
|
|
78
|
+
|
|
73
79
|
api_key: str | None = Field(
|
|
74
80
|
default=None,
|
|
75
81
|
description="API key for authentication with the HUD API",
|
hud/telemetry/async_context.py
CHANGED
|
@@ -81,14 +81,16 @@ class AsyncTrace:
|
|
|
81
81
|
attrs: dict[str, Any] | None = None,
|
|
82
82
|
job_id: str | None = None,
|
|
83
83
|
task_id: str | None = None,
|
|
84
|
+
group_id: str | None = None,
|
|
84
85
|
) -> None:
|
|
85
86
|
self.name = name
|
|
86
87
|
self.root = root
|
|
87
88
|
self.attrs = attrs or {}
|
|
88
89
|
self.job_id = job_id
|
|
89
90
|
self.task_id = task_id
|
|
91
|
+
self.group_id = group_id
|
|
90
92
|
self.task_run_id = str(uuid.uuid4())
|
|
91
|
-
self.trace_obj = Trace(self.task_run_id, name, job_id, task_id)
|
|
93
|
+
self.trace_obj = Trace(self.task_run_id, name, job_id, task_id, group_id)
|
|
92
94
|
self._otel_trace = None
|
|
93
95
|
|
|
94
96
|
async def __aenter__(self) -> Trace:
|
|
@@ -104,6 +106,7 @@ class AsyncTrace:
|
|
|
104
106
|
attributes=self.attrs,
|
|
105
107
|
job_id=self.job_id,
|
|
106
108
|
task_id=self.task_id,
|
|
109
|
+
group_id=self.group_id,
|
|
107
110
|
)
|
|
108
111
|
self._otel_trace.__enter__()
|
|
109
112
|
|
|
@@ -116,6 +119,7 @@ class AsyncTrace:
|
|
|
116
119
|
job_id=self.job_id,
|
|
117
120
|
trace_name=self.name,
|
|
118
121
|
task_id=self.task_id,
|
|
122
|
+
group_id=self.group_id,
|
|
119
123
|
),
|
|
120
124
|
name=f"trace-status-{self.task_run_id[:8]}",
|
|
121
125
|
)
|
|
@@ -146,6 +150,7 @@ class AsyncTrace:
|
|
|
146
150
|
error_message=str(exc_val) if exc_val else None,
|
|
147
151
|
trace_name=self.name,
|
|
148
152
|
task_id=self.task_id,
|
|
153
|
+
group_id=self.group_id,
|
|
149
154
|
),
|
|
150
155
|
name=f"trace-status-{self.task_run_id[:8]}-{status}",
|
|
151
156
|
)
|
|
@@ -264,6 +269,7 @@ def async_trace(
|
|
|
264
269
|
attrs: dict[str, Any] | None = None,
|
|
265
270
|
job_id: str | None = None,
|
|
266
271
|
task_id: str | None = None,
|
|
272
|
+
group_id: str | None = None,
|
|
267
273
|
) -> AsyncTrace:
|
|
268
274
|
"""Create an async trace context for telemetry tracking.
|
|
269
275
|
|
|
@@ -277,6 +283,7 @@ def async_trace(
|
|
|
277
283
|
attrs: Additional attributes to attach to the trace
|
|
278
284
|
job_id: Optional job ID to associate with this trace
|
|
279
285
|
task_id: Optional task ID for custom task identifiers
|
|
286
|
+
group_id: Optional group ID to associate with this trace
|
|
280
287
|
|
|
281
288
|
Returns:
|
|
282
289
|
AsyncTrace context manager
|
|
@@ -292,7 +299,14 @@ def async_trace(
|
|
|
292
299
|
Use this async version only in high-concurrency scenarios (200+ parallel
|
|
293
300
|
tasks) or when writing custom async evaluation frameworks.
|
|
294
301
|
"""
|
|
295
|
-
return AsyncTrace(
|
|
302
|
+
return AsyncTrace(
|
|
303
|
+
name,
|
|
304
|
+
root=root,
|
|
305
|
+
attrs=attrs,
|
|
306
|
+
job_id=job_id,
|
|
307
|
+
task_id=task_id,
|
|
308
|
+
group_id=group_id if group_id else str(uuid.uuid4()),
|
|
309
|
+
)
|
|
296
310
|
|
|
297
311
|
|
|
298
312
|
def async_job(
|
hud/telemetry/trace.py
CHANGED
|
@@ -34,11 +34,13 @@ class Trace:
|
|
|
34
34
|
name: str,
|
|
35
35
|
job_id: str | None = None,
|
|
36
36
|
task_id: str | None = None,
|
|
37
|
+
group_id: str | None = None,
|
|
37
38
|
) -> None:
|
|
38
39
|
self.id = trace_id
|
|
39
40
|
self.name = name
|
|
40
41
|
self.job_id = job_id
|
|
41
42
|
self.task_id = task_id
|
|
43
|
+
self.group_id = group_id
|
|
42
44
|
self.created_at = datetime.now(UTC)
|
|
43
45
|
|
|
44
46
|
async def log(self, metrics: dict[str, Any]) -> None:
|
|
@@ -93,6 +95,7 @@ def trace(
|
|
|
93
95
|
attrs: dict[str, Any] | None = None,
|
|
94
96
|
job_id: str | None = None,
|
|
95
97
|
task_id: str | None = None,
|
|
98
|
+
group_id: str | None = None,
|
|
96
99
|
) -> Generator[Trace, None, None]:
|
|
97
100
|
"""Start a HUD trace context for telemetry tracking.
|
|
98
101
|
|
|
@@ -104,6 +107,7 @@ def trace(
|
|
|
104
107
|
attrs: Additional attributes to attach to the trace
|
|
105
108
|
job_id: Optional job ID to associate with this trace
|
|
106
109
|
task_id: Optional task ID (for custom task identifiers)
|
|
110
|
+
group_id: Optional group ID to associate with this trace
|
|
107
111
|
|
|
108
112
|
Yields:
|
|
109
113
|
Trace: The trace object with logging capabilities
|
|
@@ -143,7 +147,7 @@ def trace(
|
|
|
143
147
|
task_run_id = str(uuid.uuid4())
|
|
144
148
|
|
|
145
149
|
# Create trace object
|
|
146
|
-
trace_obj = Trace(task_run_id, name, job_id, task_id)
|
|
150
|
+
trace_obj = Trace(task_run_id, name, job_id, task_id, group_id)
|
|
147
151
|
|
|
148
152
|
# Delegate to OpenTelemetry implementation
|
|
149
153
|
with OtelTrace(
|
|
@@ -153,5 +157,6 @@ def trace(
|
|
|
153
157
|
attributes=attrs or {},
|
|
154
158
|
job_id=job_id,
|
|
155
159
|
task_id=task_id,
|
|
160
|
+
group_id=group_id,
|
|
156
161
|
):
|
|
157
162
|
yield trace_obj
|
hud/types.py
CHANGED
|
@@ -5,6 +5,7 @@ import json
|
|
|
5
5
|
import logging
|
|
6
6
|
import uuid
|
|
7
7
|
from collections import defaultdict
|
|
8
|
+
from enum import Enum
|
|
8
9
|
from string import Template
|
|
9
10
|
from typing import Any, Literal
|
|
10
11
|
|
|
@@ -21,6 +22,14 @@ logger = logging.getLogger(__name__)
|
|
|
21
22
|
_missing_api_key_error_logged: bool = False
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
class AgentType(str, Enum):
|
|
26
|
+
CLAUDE = "claude"
|
|
27
|
+
OPENAI = "openai"
|
|
28
|
+
VLLM = "vllm"
|
|
29
|
+
LITELLM = "litellm"
|
|
30
|
+
INTEGRATION_TEST = "integration_test"
|
|
31
|
+
|
|
32
|
+
|
|
24
33
|
class Task(BaseModel):
|
|
25
34
|
"""
|
|
26
35
|
A task configuration that can be used to create a task.
|
|
@@ -325,6 +334,7 @@ class Trace(BaseModel):
|
|
|
325
334
|
|
|
326
335
|
__all__ = [
|
|
327
336
|
"AgentResponse",
|
|
337
|
+
"AgentType",
|
|
328
338
|
"MCPToolCall",
|
|
329
339
|
"MCPToolResult",
|
|
330
340
|
"Trace",
|