hud-python 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/cli/flows/dev.py ADDED
@@ -0,0 +1,155 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import contextlib
5
+ import json
6
+ import logging
7
+ from typing import Any
8
+
9
+ from hud.settings import settings
10
+ from hud.shared.requests import make_request
11
+ from hud.utils.hud_console import hud_console
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ async def create_dynamic_trace(
17
+ *,
18
+ mcp_config: dict[str, dict[str, Any]],
19
+ build_status: bool,
20
+ environment_name: str,
21
+ ) -> str | None:
22
+ """
23
+ Create a dynamic trace for HUD dev sessions when running in HTTP mode.
24
+
25
+ Sends a POST to the HUD API with:
26
+ - mcp_config: points to the local MCP config (same as Cursor)
27
+ - build_status: True if Docker mode (built image), False if basic Python mode
28
+ - environment_name: Name of the environment/server/image
29
+
30
+ Returns the full URL to the live trace when successful, otherwise None.
31
+ """
32
+ api_base = settings.hud_api_url.rstrip("/")
33
+ # Endpoint TBD; use a sensible default path that the backend can wire up
34
+ url = f"{api_base}/dev/dynamic-traces"
35
+
36
+ payload = {
37
+ "mcp_config": mcp_config,
38
+ "build_status": bool(build_status),
39
+ "environment_name": environment_name,
40
+ }
41
+
42
+ # Best-effort; if missing API key, log and continue
43
+ api_key = settings.api_key
44
+ if not api_key:
45
+ logger.warning("Skipping dynamic trace creation; missing HUD_API_KEY")
46
+ return None
47
+
48
+ try:
49
+ resp = await make_request("POST", url=url, json=payload, api_key=api_key)
50
+ # New API returns an id; construct the URL as https://hud.so/trace/{id}
51
+ trace_id = None
52
+ if isinstance(resp, dict):
53
+ trace_id = resp.get("id")
54
+ if trace_id is None:
55
+ data = resp.get("data", {}) or {}
56
+ if isinstance(data, dict):
57
+ trace_id = data.get("id")
58
+ # Backcompat: if url is provided directly
59
+ if not trace_id:
60
+ direct_url = resp.get("url") or (resp.get("data", {}) or {}).get("url")
61
+ if isinstance(direct_url, str) and direct_url:
62
+ return direct_url
63
+
64
+ if isinstance(trace_id, str) and trace_id:
65
+ return f"https://hud.so/trace/{trace_id}"
66
+ return None
67
+ except Exception as e:
68
+ # Do not interrupt dev flow
69
+ try:
70
+ preview = json.dumps(payload)[:500]
71
+ logger.warning("Failed to create dynamic dev trace: %s | payload=%s", e, preview)
72
+ except Exception:
73
+ logger.warning("Failed to create dynamic dev trace: %s", e)
74
+ return None
75
+
76
+
77
+ def show_dev_ui(
78
+ *,
79
+ live_trace_url: str,
80
+ server_name: str,
81
+ port: int,
82
+ cursor_deeplink: str,
83
+ is_docker: bool = False,
84
+ ) -> None:
85
+ """
86
+ Show the minimal dev UI with live trace link.
87
+
88
+ This is called only when we have a successful trace URL.
89
+ For full UI mode, the caller should use show_dev_server_info() directly.
90
+
91
+ Args:
92
+ live_trace_url: URL to the live trace
93
+ server_name: Name of the server/image
94
+ port: Port the server is running on
95
+ cursor_deeplink: Pre-generated Cursor deeplink URL
96
+ is_docker: Whether this is Docker mode (affects hot-reload message)
97
+ """
98
+ import webbrowser
99
+
100
+ from rich.panel import Panel
101
+
102
+ # Show header first
103
+ hud_console.header("HUD Development Server", icon="🚀")
104
+
105
+ # Try to open the live trace in the default browser
106
+ with contextlib.suppress(Exception):
107
+ # new=2 -> open in a new tab, if possible
108
+ webbrowser.open(live_trace_url, new=2)
109
+
110
+ # Show panel with just the link
111
+ # Center the link and style it: blue, bold, underlined
112
+ link_markup = f"[bold underline rgb(108,113,196)][link={live_trace_url}]{live_trace_url}[/link][/bold underline rgb(108,113,196)]" # noqa: E501
113
+ # Use center alignment by surrounding with spaces via justify
114
+ from rich.align import Align
115
+
116
+ panel = Panel(
117
+ Align.center(link_markup),
118
+ title="🔗 Live Dev Trace",
119
+ border_style="rgb(192,150,12)", # HUD gold
120
+ padding=(1, 2),
121
+ )
122
+ hud_console.console.print(panel)
123
+
124
+ # Show other info below
125
+ label = "Base image" if is_docker else "Server"
126
+ hud_console.info("")
127
+ hud_console.info(f"{hud_console.sym.ITEM} {label}: {server_name}")
128
+ hud_console.info(f"{hud_console.sym.ITEM} Cursor: {cursor_deeplink}")
129
+ hud_console.info("")
130
+ hud_console.info(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
131
+ if is_docker:
132
+ hud_console.dim_info(
133
+ "",
134
+ "Container restarts on file changes (mounted volumes), "
135
+ "if changing tools run hud dev again",
136
+ )
137
+ hud_console.info("")
138
+
139
+
140
+ def generate_cursor_deeplink(server_name: str, port: int) -> str:
141
+ """Generate a Cursor deeplink for the MCP server.
142
+
143
+ Args:
144
+ server_name: Name of the server
145
+ port: Port the server is running on
146
+
147
+ Returns:
148
+ Cursor deeplink URL
149
+ """
150
+ server_config = {"url": f"http://localhost:{port}/mcp"}
151
+ config_json = json.dumps(server_config, indent=2)
152
+ config_base64 = base64.b64encode(config_json.encode()).decode()
153
+ return (
154
+ f"cursor://anysphere.cursor-deeplink/mcp/install?name={server_name}&config={config_base64}"
155
+ )
hud/cli/flows/tasks.py CHANGED
@@ -11,7 +11,7 @@ import yaml
11
11
 
12
12
  from hud.cli.push import push_environment
13
13
  from hud.cli.utils.docker import require_docker_running
14
- from hud.cli.utils.env_check import ensure_built, find_environment_dir
14
+ from hud.cli.utils.env_check import find_environment_dir
15
15
  from hud.cli.utils.registry import extract_name_and_tag
16
16
  from hud.utils.hud_console import hud_console
17
17
  from hud.utils.tasks import load_tasks
@@ -56,7 +56,9 @@ def _validate_tasks(tasks: list[Task]) -> bool:
56
56
  return True
57
57
 
58
58
 
59
- def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
59
+ def _ensure_pushed(
60
+ env_dir: Path, lock_data: dict[str, Any], check_docker: bool = True
61
+ ) -> dict[str, Any]:
60
62
  """Ensure the environment is pushed to a registry; return updated lock data."""
61
63
  pushed = bool(lock_data.get("push"))
62
64
  if not pushed:
@@ -64,7 +66,8 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
64
66
  if not hud_console.confirm("Push to a registry now (runs 'hud push')?", default=True):
65
67
  raise typer.Exit(1)
66
68
  # Check Docker availability before attempting a push
67
- require_docker_running()
69
+ if check_docker:
70
+ require_docker_running()
68
71
 
69
72
  # If Docker or login is not configured, the push function will fail and halt.
70
73
  push_environment(str(env_dir), yes=True)
@@ -293,9 +296,24 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
293
296
  hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
294
297
  raise typer.Exit(1)
295
298
 
296
- # Ensure built and pushed
297
- lock_data = ensure_built(env_dir, interactive=True)
298
- lock_data = _ensure_pushed(env_dir, lock_data)
299
+ # For convert command, we don't need Docker running - just check for lock file
300
+ # This avoids showing Docker-related messages during conversion
301
+ lock_path = env_dir / "hud.lock.yaml"
302
+ if not lock_path.exists():
303
+ hud_console.error("No hud.lock.yaml found. The environment needs to be built first.")
304
+ hud_console.info("Run 'hud build' in the environment directory to build it.")
305
+ raise typer.Exit(1)
306
+
307
+ # Load lock data directly
308
+ try:
309
+ with open(lock_path) as f:
310
+ lock_data: dict[str, Any] = yaml.safe_load(f) or {}
311
+ except Exception as e:
312
+ hud_console.error(f"Failed to read hud.lock.yaml: {e}")
313
+ raise typer.Exit(1) from e
314
+
315
+ # Check if pushed - don't check Docker for convert command
316
+ lock_data = _ensure_pushed(env_dir, lock_data, check_docker=False)
299
317
 
300
318
  # Derive remote image name org/name:tag
301
319
  remote_image = _derive_remote_image(lock_data)
@@ -387,8 +405,11 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
387
405
  f"Detected env vars in .env that look like API keys: {names_preview}.\n"
388
406
  "Include them as remote headers (values will be ${VAR} placeholders)?"
389
407
  )
390
- if hud_console.confirm(prompt, default=True):
391
- all_detected.update(missing)
408
+ if not hud_console.confirm(prompt, default=True):
409
+ # User cancelled - exit without creating the file
410
+ hud_console.info("Conversion cancelled by user")
411
+ raise typer.Exit(0)
412
+ all_detected.update(missing)
392
413
 
393
414
  # Final set of env vars to convert to headers
394
415
  provided_keys = all_detected
@@ -461,6 +482,5 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
461
482
  f.write("\n")
462
483
 
463
484
  hud_console.success(f"Created remote tasks file: {remote_path.name}")
464
- hud_console.hint("Proceeding with RL training on the remote environment")
465
485
 
466
486
  return str(remote_path)
@@ -11,7 +11,7 @@ from hud.cli.eval import (
11
11
  build_agent,
12
12
  run_single_task,
13
13
  )
14
- from hud.types import Task, Trace
14
+ from hud.types import AgentType, Task, Trace
15
15
 
16
16
 
17
17
  class TestBuildAgent:
@@ -26,7 +26,7 @@ class TestBuildAgent:
26
26
  mock_runner.return_value = mock_instance
27
27
 
28
28
  # Test with verbose=False
29
- result = build_agent("integration_test", verbose=False)
29
+ result = build_agent(AgentType.INTEGRATION_TEST, verbose=False)
30
30
 
31
31
  mock_runner.assert_called_once_with(verbose=False)
32
32
  assert result == mock_instance
@@ -40,7 +40,7 @@ class TestBuildAgent:
40
40
  mock_runner.return_value = mock_instance
41
41
 
42
42
  # Test with verbose=False
43
- result = build_agent("claude", verbose=False)
43
+ result = build_agent(AgentType.CLAUDE, verbose=False)
44
44
 
45
45
  mock_runner.assert_called_once_with(model="claude-sonnet-4-20250514", verbose=False)
46
46
  assert result == mock_instance
@@ -55,7 +55,7 @@ class TestBuildAgent:
55
55
 
56
56
  # Test with verbose=False
57
57
  result = build_agent(
58
- "claude",
58
+ AgentType.CLAUDE,
59
59
  model="claude-sonnet-4-20250514",
60
60
  allowed_tools=["act"],
61
61
  verbose=True,
@@ -97,7 +97,7 @@ class TestRunSingleTask:
97
97
  patch("hud.cli.eval.find_environment_dir", return_value=None),
98
98
  patch("hud.cli.eval.hud.trace"),
99
99
  ):
100
- await run_single_task("test.json", agent_type="integration_test", max_steps=10)
100
+ await run_single_task("test.json", agent_type=AgentType.INTEGRATION_TEST, max_steps=10)
101
101
 
102
102
  # Verify agent.run was called with the task containing agent_config
103
103
  mock_agent.run.assert_called_once()
@@ -119,7 +119,7 @@ class TestRunSingleTask:
119
119
  mock_grouped.return_value = [{"task": mock_task, "rewards": [1.0, 0.5]}]
120
120
 
121
121
  await run_single_task(
122
- "test.json", agent_type="integration_test", group_size=3, max_steps=10
122
+ "test.json", agent_type=AgentType.INTEGRATION_TEST, group_size=3, max_steps=10
123
123
  )
124
124
 
125
125
  # Verify run_tasks_grouped was called with correct group_size
hud/cli/utils/docker.py CHANGED
@@ -308,7 +308,10 @@ def require_docker_running() -> None:
308
308
  "Is Docker running? Open Docker Desktop and wait until it reports 'Running'"
309
309
  )
310
310
  raise typer.Exit(1) from e
311
- except Exception as e:
312
- hud_console.error(f"Docker check failed: {e}")
311
+ except typer.Exit:
312
+ # Propagate cleanly without extra noise; hints already printed above
313
+ raise
314
+ except Exception:
315
+ # Unknown failure - keep output minimal and avoid stack traces
313
316
  hud_console.hint("Is the Docker daemon running?")
314
- raise typer.Exit(1) from e
317
+ raise typer.Exit(1) # noqa: B904
hud/clients/base.py CHANGED
@@ -146,7 +146,7 @@ class BaseHUDClient(AgentMCPClient):
146
146
  except HudException:
147
147
  raise
148
148
  except Exception as e:
149
- # Auto-converts to appropriate HUD exception type with hints
149
+ hud_console.error(f"Failed to initialize MCP client: {e}")
150
150
  raise HudException from e
151
151
 
152
152
  # Common hud behavior - fetch telemetry
@@ -333,7 +333,7 @@ class BaseHUDClient(AgentMCPClient):
333
333
  tool_info = {
334
334
  "name": tool.name,
335
335
  "description": tool.description,
336
- "input_schema": tool.inputSchema,
336
+ "inputSchema": tool.inputSchema,
337
337
  }
338
338
  analysis["tools"].append(tool_info)
339
339
 
hud/otel/context.py CHANGED
@@ -6,6 +6,7 @@ User-facing APIs are in hud.telemetry.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ import contextlib
9
10
  import contextvars
10
11
  import logging
11
12
  from contextlib import contextmanager
@@ -232,6 +233,8 @@ async def _update_task_status_async(
232
233
  error_message: str | None = None,
233
234
  trace_name: str | None = None,
234
235
  task_id: str | None = None,
236
+ group_id: str | None = None,
237
+ extra_metadata: dict[str, Any] | None = None,
235
238
  ) -> None:
236
239
  """Async task status update."""
237
240
  if not settings.telemetry_enabled:
@@ -271,12 +274,20 @@ async def _update_task_status_async(
271
274
  metadata["mcp_tool_steps"] = get_mcp_tool_steps()
272
275
  metadata["agent_steps"] = get_agent_steps()
273
276
 
277
+ # Merge any extra metadata provided by callers (e.g., task config summaries)
278
+ if extra_metadata:
279
+ with contextlib.suppress(Exception):
280
+ metadata.update(extra_metadata)
281
+
274
282
  if metadata:
275
283
  data["metadata"] = metadata
276
284
 
277
285
  if task_id:
278
286
  data["task_id"] = task_id
279
287
 
288
+ if group_id:
289
+ data["group_id"] = group_id
290
+
280
291
  await make_request(
281
292
  method="POST",
282
293
  url=f"{settings.hud_telemetry_url}/trace/{task_run_id}/status",
@@ -297,10 +308,21 @@ def _fire_and_forget_status_update(
297
308
  error_message: str | None = None,
298
309
  trace_name: str | None = None,
299
310
  task_id: str | None = None,
311
+ group_id: str | None = None,
312
+ extra_metadata: dict[str, Any] | None = None,
300
313
  ) -> None:
301
314
  """Fire and forget status update - works in any context including Jupyter."""
302
315
  fire_and_forget(
303
- _update_task_status_async(task_run_id, status, job_id, error_message, trace_name, task_id),
316
+ _update_task_status_async(
317
+ task_run_id,
318
+ status,
319
+ job_id,
320
+ error_message,
321
+ trace_name,
322
+ task_id,
323
+ group_id,
324
+ extra_metadata,
325
+ ),
304
326
  f"update task {task_run_id} status to {status}",
305
327
  )
306
328
 
@@ -312,6 +334,8 @@ def _update_task_status_sync(
312
334
  error_message: str | None = None,
313
335
  trace_name: str | None = None,
314
336
  task_id: str | None = None,
337
+ group_id: str | None = None,
338
+ extra_metadata: dict[str, Any] | None = None,
315
339
  ) -> None:
316
340
  """Synchronous task status update."""
317
341
  if not settings.telemetry_enabled:
@@ -351,12 +375,20 @@ def _update_task_status_sync(
351
375
  metadata["mcp_tool_steps"] = get_mcp_tool_steps()
352
376
  metadata["agent_steps"] = get_agent_steps()
353
377
 
378
+ # Merge any extra metadata provided by callers
379
+ if extra_metadata:
380
+ with contextlib.suppress(Exception):
381
+ metadata.update(extra_metadata)
382
+
354
383
  if metadata:
355
384
  data["metadata"] = metadata
356
385
 
357
386
  if task_id:
358
387
  data["task_id"] = task_id
359
388
 
389
+ if group_id:
390
+ data["group_id"] = group_id
391
+
360
392
  make_request_sync(
361
393
  method="POST",
362
394
  url=f"{settings.hud_telemetry_url}/trace/{task_run_id}/status",
@@ -447,10 +479,12 @@ class trace:
447
479
  attributes: dict[str, Any] | None = None,
448
480
  job_id: str | None = None,
449
481
  task_id: str | None = None,
482
+ group_id: str | None = None,
450
483
  ) -> None:
451
484
  self.task_run_id = task_run_id
452
485
  self.job_id = job_id
453
486
  self.task_id = task_id
487
+ self.group_id = group_id
454
488
  self.is_root = is_root
455
489
  self.span_name = span_name
456
490
  self.attributes = attributes or {}
@@ -473,6 +507,8 @@ class trace:
473
507
  ctx = baggage.set_baggage("hud.job_id", self.job_id, context=ctx)
474
508
  if self.task_id:
475
509
  ctx = baggage.set_baggage("hud.task_id", self.task_id, context=ctx)
510
+ if self.group_id:
511
+ ctx = baggage.set_baggage("hud.group_id", self.group_id, context=ctx)
476
512
  self._otel_token = context.attach(ctx)
477
513
 
478
514
  # Start a span as current
@@ -486,6 +522,8 @@ class trace:
486
522
  span_attrs["hud.job_id"] = self.job_id
487
523
  if self.task_id:
488
524
  span_attrs["hud.task_id"] = self.task_id
525
+ if self.group_id:
526
+ span_attrs["hud.group_id"] = self.group_id
489
527
 
490
528
  # Use start_as_current_span context manager
491
529
  self._span_manager = tracer.start_as_current_span(
@@ -502,6 +540,7 @@ class trace:
502
540
  job_id=self.job_id,
503
541
  trace_name=self.span_name,
504
542
  task_id=self.task_id,
543
+ group_id=self.group_id,
505
544
  )
506
545
  # Print the nice trace URL box (only if not part of a job)
507
546
  if not self.job_id:
@@ -528,6 +567,7 @@ class trace:
528
567
  error_message=str(exc_val),
529
568
  trace_name=self.span_name,
530
569
  task_id=self.task_id,
570
+ group_id=self.group_id,
531
571
  )
532
572
  # Print error completion message (only if not part of a job)
533
573
  if not self.job_id:
@@ -540,6 +580,7 @@ class trace:
540
580
  job_id=self.job_id,
541
581
  trace_name=self.span_name,
542
582
  task_id=self.task_id,
583
+ group_id=self.group_id,
543
584
  )
544
585
  # Print success completion message (only if not part of a job)
545
586
  if not self.job_id:
hud/server/server.py CHANGED
@@ -311,11 +311,35 @@ class MCPServer(FastMCP):
311
311
  if transport is None:
312
312
  transport = "stdio"
313
313
 
314
- # Register HTTP helpers for HTTP transport
314
+ # Register HTTP helpers and CORS for HTTP transport
315
315
  if transport in ("http", "sse"):
316
316
  self._register_hud_helpers()
317
317
  logger.info("Registered HUD helper endpoints at /hud/*")
318
318
 
319
+ # Add CORS middleware if not already provided
320
+ from starlette.middleware import Middleware
321
+ from starlette.middleware.cors import CORSMiddleware
322
+
323
+ # Get or create middleware list
324
+ middleware = transport_kwargs.get("middleware", [])
325
+ if isinstance(middleware, list):
326
+ # Check if CORS is already configured
327
+ has_cors = any(
328
+ isinstance(m, Middleware) and m.cls == CORSMiddleware for m in middleware
329
+ )
330
+ if not has_cors:
331
+ # Add CORS with permissive defaults for dev
332
+ cors_middleware = Middleware(
333
+ CORSMiddleware,
334
+ allow_origins=["*"],
335
+ allow_methods=["GET", "POST", "DELETE", "OPTIONS"],
336
+ allow_headers=["*"],
337
+ expose_headers=["Mcp-Session-Id"],
338
+ )
339
+ middleware = [cors_middleware, *middleware]
340
+ transport_kwargs["middleware"] = middleware
341
+ logger.info("Added CORS middleware for browser compatibility")
342
+
319
343
  try:
320
344
  await super().run_async(
321
345
  transport=transport, show_banner=show_banner, **transport_kwargs
@@ -506,9 +530,11 @@ class MCPServer(FastMCP):
506
530
  return str(obj)
507
531
 
508
532
  serialized = serialize_obj(result)
509
- return JSONResponse({"success": True, "result": serialized})
533
+ # Return the serialized CallToolResult directly (no wrapper)
534
+ return JSONResponse(serialized)
510
535
  except Exception as e:
511
- return JSONResponse({"success": False, "error": str(e)}, status_code=400)
536
+ # Return a simple error object
537
+ return JSONResponse({"error": str(e)}, status_code=400)
512
538
 
513
539
  return tool_endpoint
514
540
 
hud/settings.py CHANGED
@@ -70,6 +70,12 @@ class Settings(BaseSettings):
70
70
  validation_alias="HUD_RL_URL",
71
71
  )
72
72
 
73
+ hud_api_url: str = Field(
74
+ default="https://api.hud.so",
75
+ description="Base URL for the HUD API server",
76
+ validation_alias="HUD_API_URL",
77
+ )
78
+
73
79
  api_key: str | None = Field(
74
80
  default=None,
75
81
  description="API key for authentication with the HUD API",
@@ -81,14 +81,16 @@ class AsyncTrace:
81
81
  attrs: dict[str, Any] | None = None,
82
82
  job_id: str | None = None,
83
83
  task_id: str | None = None,
84
+ group_id: str | None = None,
84
85
  ) -> None:
85
86
  self.name = name
86
87
  self.root = root
87
88
  self.attrs = attrs or {}
88
89
  self.job_id = job_id
89
90
  self.task_id = task_id
91
+ self.group_id = group_id
90
92
  self.task_run_id = str(uuid.uuid4())
91
- self.trace_obj = Trace(self.task_run_id, name, job_id, task_id)
93
+ self.trace_obj = Trace(self.task_run_id, name, job_id, task_id, group_id)
92
94
  self._otel_trace = None
93
95
 
94
96
  async def __aenter__(self) -> Trace:
@@ -104,6 +106,7 @@ class AsyncTrace:
104
106
  attributes=self.attrs,
105
107
  job_id=self.job_id,
106
108
  task_id=self.task_id,
109
+ group_id=self.group_id,
107
110
  )
108
111
  self._otel_trace.__enter__()
109
112
 
@@ -116,6 +119,7 @@ class AsyncTrace:
116
119
  job_id=self.job_id,
117
120
  trace_name=self.name,
118
121
  task_id=self.task_id,
122
+ group_id=self.group_id,
119
123
  ),
120
124
  name=f"trace-status-{self.task_run_id[:8]}",
121
125
  )
@@ -146,6 +150,7 @@ class AsyncTrace:
146
150
  error_message=str(exc_val) if exc_val else None,
147
151
  trace_name=self.name,
148
152
  task_id=self.task_id,
153
+ group_id=self.group_id,
149
154
  ),
150
155
  name=f"trace-status-{self.task_run_id[:8]}-{status}",
151
156
  )
@@ -264,6 +269,7 @@ def async_trace(
264
269
  attrs: dict[str, Any] | None = None,
265
270
  job_id: str | None = None,
266
271
  task_id: str | None = None,
272
+ group_id: str | None = None,
267
273
  ) -> AsyncTrace:
268
274
  """Create an async trace context for telemetry tracking.
269
275
 
@@ -277,6 +283,7 @@ def async_trace(
277
283
  attrs: Additional attributes to attach to the trace
278
284
  job_id: Optional job ID to associate with this trace
279
285
  task_id: Optional task ID for custom task identifiers
286
+ group_id: Optional group ID to associate with this trace
280
287
 
281
288
  Returns:
282
289
  AsyncTrace context manager
@@ -292,7 +299,14 @@ def async_trace(
292
299
  Use this async version only in high-concurrency scenarios (200+ parallel
293
300
  tasks) or when writing custom async evaluation frameworks.
294
301
  """
295
- return AsyncTrace(name, root=root, attrs=attrs, job_id=job_id, task_id=task_id)
302
+ return AsyncTrace(
303
+ name,
304
+ root=root,
305
+ attrs=attrs,
306
+ job_id=job_id,
307
+ task_id=task_id,
308
+ group_id=group_id if group_id else str(uuid.uuid4()),
309
+ )
296
310
 
297
311
 
298
312
  def async_job(
hud/telemetry/trace.py CHANGED
@@ -34,11 +34,13 @@ class Trace:
34
34
  name: str,
35
35
  job_id: str | None = None,
36
36
  task_id: str | None = None,
37
+ group_id: str | None = None,
37
38
  ) -> None:
38
39
  self.id = trace_id
39
40
  self.name = name
40
41
  self.job_id = job_id
41
42
  self.task_id = task_id
43
+ self.group_id = group_id
42
44
  self.created_at = datetime.now(UTC)
43
45
 
44
46
  async def log(self, metrics: dict[str, Any]) -> None:
@@ -93,6 +95,7 @@ def trace(
93
95
  attrs: dict[str, Any] | None = None,
94
96
  job_id: str | None = None,
95
97
  task_id: str | None = None,
98
+ group_id: str | None = None,
96
99
  ) -> Generator[Trace, None, None]:
97
100
  """Start a HUD trace context for telemetry tracking.
98
101
 
@@ -104,6 +107,7 @@ def trace(
104
107
  attrs: Additional attributes to attach to the trace
105
108
  job_id: Optional job ID to associate with this trace
106
109
  task_id: Optional task ID (for custom task identifiers)
110
+ group_id: Optional group ID to associate with this trace
107
111
 
108
112
  Yields:
109
113
  Trace: The trace object with logging capabilities
@@ -143,7 +147,7 @@ def trace(
143
147
  task_run_id = str(uuid.uuid4())
144
148
 
145
149
  # Create trace object
146
- trace_obj = Trace(task_run_id, name, job_id, task_id)
150
+ trace_obj = Trace(task_run_id, name, job_id, task_id, group_id)
147
151
 
148
152
  # Delegate to OpenTelemetry implementation
149
153
  with OtelTrace(
@@ -153,5 +157,6 @@ def trace(
153
157
  attributes=attrs or {},
154
158
  job_id=job_id,
155
159
  task_id=task_id,
160
+ group_id=group_id,
156
161
  ):
157
162
  yield trace_obj
hud/types.py CHANGED
@@ -5,6 +5,7 @@ import json
5
5
  import logging
6
6
  import uuid
7
7
  from collections import defaultdict
8
+ from enum import Enum
8
9
  from string import Template
9
10
  from typing import Any, Literal
10
11
 
@@ -21,6 +22,14 @@ logger = logging.getLogger(__name__)
21
22
  _missing_api_key_error_logged: bool = False
22
23
 
23
24
 
25
+ class AgentType(str, Enum):
26
+ CLAUDE = "claude"
27
+ OPENAI = "openai"
28
+ VLLM = "vllm"
29
+ LITELLM = "litellm"
30
+ INTEGRATION_TEST = "integration_test"
31
+
32
+
24
33
  class Task(BaseModel):
25
34
  """
26
35
  A task configuration that can be used to create a task.
@@ -325,6 +334,7 @@ class Trace(BaseModel):
325
334
 
326
335
  __all__ = [
327
336
  "AgentResponse",
337
+ "AgentType",
328
338
  "MCPToolCall",
329
339
  "MCPToolResult",
330
340
  "Trace",