hud-python 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (51) hide show
  1. hud/__init__.py +22 -2
  2. hud/adapters/claude/adapter.py +9 -2
  3. hud/adapters/claude/tests/__init__.py +1 -0
  4. hud/adapters/claude/tests/test_adapter.py +519 -0
  5. hud/adapters/common/types.py +5 -1
  6. hud/adapters/operator/adapter.py +4 -0
  7. hud/adapters/operator/tests/__init__.py +1 -0
  8. hud/adapters/operator/tests/test_adapter.py +370 -0
  9. hud/agent/__init__.py +4 -0
  10. hud/agent/base.py +18 -2
  11. hud/agent/claude.py +20 -17
  12. hud/agent/claude_plays_pokemon.py +283 -0
  13. hud/agent/langchain.py +12 -7
  14. hud/agent/misc/__init__.py +3 -0
  15. hud/agent/misc/response_agent.py +80 -0
  16. hud/agent/operator.py +27 -19
  17. hud/agent/tests/__init__.py +1 -0
  18. hud/agent/tests/test_base.py +202 -0
  19. hud/env/docker_client.py +28 -18
  20. hud/env/environment.py +32 -16
  21. hud/env/local_docker_client.py +83 -42
  22. hud/env/remote_client.py +1 -3
  23. hud/env/remote_docker_client.py +71 -14
  24. hud/exceptions.py +12 -0
  25. hud/gym.py +71 -53
  26. hud/job.py +59 -14
  27. hud/server/requests.py +26 -4
  28. hud/settings.py +7 -1
  29. hud/task.py +45 -33
  30. hud/taskset.py +56 -4
  31. hud/telemetry/__init__.py +21 -0
  32. hud/telemetry/_trace.py +173 -0
  33. hud/telemetry/context.py +169 -0
  34. hud/telemetry/exporter.py +417 -0
  35. hud/telemetry/instrumentation/__init__.py +3 -0
  36. hud/telemetry/instrumentation/mcp.py +495 -0
  37. hud/telemetry/instrumentation/registry.py +59 -0
  38. hud/telemetry/mcp_models.py +331 -0
  39. hud/telemetry/tests/__init__.py +1 -0
  40. hud/telemetry/tests/test_context.py +207 -0
  41. hud/telemetry/tests/test_trace.py +270 -0
  42. hud/types.py +11 -27
  43. hud/utils/common.py +22 -2
  44. hud/utils/misc.py +53 -0
  45. hud/utils/tests/test_version.py +1 -1
  46. hud/version.py +7 -0
  47. {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/METADATA +98 -30
  48. hud_python-0.2.6.dist-info/RECORD +84 -0
  49. hud_python-0.2.4.dist-info/RECORD +0 -62
  50. {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/WHEEL +0 -0
  51. {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/licenses/LICENSE +0 -0
hud/task.py CHANGED
@@ -1,7 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import tempfile
4
+ from pathlib import Path
3
5
  from typing import TYPE_CHECKING, Any
4
6
 
7
+ from inspect_ai.util._sandbox import SandboxEnvironmentSpec
5
8
  from pydantic import BaseModel
6
9
 
7
10
  from hud.types import CustomGym, Gym
@@ -10,11 +13,7 @@ from hud.utils.common import FunctionConfig, FunctionConfigs
10
13
  if TYPE_CHECKING:
11
14
  from inspect_ai.dataset import Sample
12
15
 
13
- # Environment specifications:
14
- # These represent the environment as a whole, including both the controller
15
- # and the environment type (eg, what os, which services are running)
16
-
17
- UBUNTU_DOCKERFILE = "ubuntu:latest"
16
+ from hud.agent import Agent
18
17
 
19
18
 
20
19
  def convert_inspect_setup(setup: str) -> list[FunctionConfig]:
@@ -57,6 +56,12 @@ class Task(BaseModel):
57
56
  gym: Gym | None = None
58
57
  config: dict[str, Any] | None = None
59
58
 
59
+ description: str | None = None
60
+
61
+ @classmethod
62
+ def from_dict(cls, data: dict[str, Any]) -> Task:
63
+ return cls(**data)
64
+
60
65
  @classmethod
61
66
  def from_inspect_sample(cls, sample: Sample) -> Task:
62
67
  """Create a Task from an Inspect dataset sample.
@@ -91,38 +96,37 @@ class Task(BaseModel):
91
96
  evaluate_config = None
92
97
  if sample.target:
93
98
  if isinstance(sample.target, str):
94
- evaluate_config = ("response_includes", [sample.target])
99
+ evaluate_config = FunctionConfig(function="response_includes", args=[sample.target])
95
100
  elif isinstance(sample.target, list):
96
- evaluate_config = ("match_all", sample.target)
101
+ evaluate_config = FunctionConfig(function="match_all", args=sample.target)
97
102
 
98
- task_gym: Gym | None = None
99
- task_setup: FunctionConfigs | None = None
103
+ task_setup: FunctionConfigs | None = (
104
+ convert_inspect_setup(sample.setup) if sample.setup else None
105
+ )
100
106
 
101
107
  sandbox = sample.sandbox
102
- dockerfile = None
103
- use_qa_gym = True
104
-
105
- if sandbox:
106
- if isinstance(sandbox, str):
107
- if sandbox == "docker":
108
- dockerfile = UBUNTU_DOCKERFILE
109
- use_qa_gym = False
110
- elif isinstance(sandbox, tuple) and len(sandbox) == 2:
111
- sandbox_type, sandbox_config = sandbox
112
- if sandbox_type == "docker":
113
- dockerfile = sandbox_config
114
- use_qa_gym = False
115
-
116
- if use_qa_gym:
117
- task_gym = "qa"
118
- task_setup = None
119
- else:
120
- task_gym = CustomGym(
121
- dockerfile=dockerfile or UBUNTU_DOCKERFILE,
122
- location="local",
123
- )
124
- task_setup = [x for x in convert_inspect_setup(sample.setup)] if sample.setup else None
125
- # TODO: Handle sample.files for CustomGym case if needed
108
+
109
+ match sandbox:
110
+ case "docker":
111
+ task_gym = CustomGym(
112
+ image_or_build_context="ubuntu:latest",
113
+ location="local",
114
+ )
115
+ case SandboxEnvironmentSpec(type="docker", config=str()):
116
+ # create temp dir and put dockerfile there, then use that path
117
+ temp_dir = tempfile.mkdtemp()
118
+ temp_dir_path = Path(temp_dir)
119
+ dockerfile_path = temp_dir_path / "Dockerfile"
120
+ dockerfile_path.write_text(sandbox.config)
121
+ task_gym = CustomGym(
122
+ image_or_build_context=temp_dir_path,
123
+ location="local",
124
+ )
125
+ case None:
126
+ task_gym = "qa"
127
+ task_setup = None
128
+ case _:
129
+ raise ValueError(f"Unsupported sandbox type: {sandbox}")
126
130
 
127
131
  return cls(
128
132
  id=None,
@@ -132,3 +136,11 @@ class Task(BaseModel):
132
136
  gym=task_gym,
133
137
  # files=sample.files, # TODO: Decide how/if to handle files
134
138
  )
139
+
140
+ async def fit(self, agent: Agent | type[Agent]) -> None:
141
+ if isinstance(agent, type):
142
+ agent = agent()
143
+
144
+ if self.gym is None:
145
+ return
146
+ self.gym = agent.transfer_gyms.get(self.gym, self.gym)
hud/taskset.py CHANGED
@@ -5,15 +5,19 @@ from venv import logger
5
5
 
6
6
  from pydantic import BaseModel
7
7
 
8
+ from hud.env.environment import create_remote_config
8
9
  from hud.server import make_request
9
10
  from hud.settings import settings
10
11
  from hud.task import Task
12
+ from hud.utils.config import REMOTE_EVALUATE, REMOTE_SETUP
11
13
 
12
14
  if TYPE_CHECKING:
13
15
  from collections.abc import Iterator
14
16
 
15
17
  from inspect_ai.dataset import Dataset
16
18
 
19
+ from hud.agent import Agent
20
+
17
21
 
18
22
  class TaskSet(BaseModel):
19
23
  """
@@ -21,11 +25,13 @@ class TaskSet(BaseModel):
21
25
 
22
26
  Attributes:
23
27
  id: Unique identifier for the taskset
28
+ name: Name of the taskset
24
29
  description: Description of the taskset
25
30
  tasks: List of Task objects in the taskset
26
31
  """
27
32
 
28
33
  id: str | None = None
34
+ name: str | None = None
29
35
  description: str | None = None
30
36
  tasks: list[Task] = []
31
37
 
@@ -61,16 +67,50 @@ class TaskSet(BaseModel):
61
67
 
62
68
  async def upload(
63
69
  self,
64
- name: str,
70
+ name: str | None = None,
65
71
  description: str | None = None,
66
72
  api_key: str | None = None,
67
73
  ) -> None:
68
74
  """
69
75
  Uploads the taskset to the server.
70
76
  """
77
+ if name is None:
78
+ name = self.name
79
+
80
+ if name is None:
81
+ raise ValueError("Taskset name is required")
82
+
71
83
  if api_key is None:
72
84
  api_key = settings.api_key
73
85
 
86
+ # Convert all tasks to expanded configs
87
+ processed_tasks = []
88
+ for task in self.tasks:
89
+ if task.setup is not None:
90
+ setup_config = (
91
+ create_remote_config(None, task.setup, REMOTE_SETUP)[0].args[0].model_dump()
92
+ )
93
+ else:
94
+ setup_config = None
95
+ if task.evaluate is not None:
96
+ evaluate_config = (
97
+ create_remote_config(None, task.evaluate, REMOTE_EVALUATE)[0]
98
+ .args[0]
99
+ .model_dump()
100
+ )
101
+ else:
102
+ evaluate_config = None
103
+
104
+ processed_tasks.append(
105
+ {
106
+ "prompt": task.prompt,
107
+ "gym": task.gym,
108
+ "setup": setup_config,
109
+ "evaluate": evaluate_config,
110
+ "config": task.config,
111
+ }
112
+ )
113
+
74
114
  await make_request(
75
115
  method="POST",
76
116
  url=f"{settings.base_url}/v2/tasksets",
@@ -78,13 +118,25 @@ class TaskSet(BaseModel):
78
118
  json={
79
119
  "name": name,
80
120
  "description": description,
81
- "tasks": [task.model_dump() for task in self.tasks],
121
+ "tasks": processed_tasks,
82
122
  },
83
123
  )
84
124
  logger.info(
85
- "[HUD] Taskset %s uploaded successfully, see it on app.hud.so/tasksets/%s", name, name
125
+ "Taskset %s uploaded successfully, see it on app.hud.so/evalsets/%s", name, name
86
126
  )
87
127
 
128
+ async def fit(self, agent: Agent | type[Agent]) -> None:
129
+ """
130
+ Automatically adapts the taskset to the agent's transfer_gyms.
131
+ """
132
+ if isinstance(agent, type):
133
+ agent = agent()
134
+
135
+ for task in self.tasks:
136
+ if task.gym is None:
137
+ continue
138
+ task.gym = agent.transfer_gyms.get(task.gym, task.gym)
139
+
88
140
 
89
141
  async def load_taskset(taskset_id: str, api_key: str | None = None) -> TaskSet:
90
142
  """
@@ -107,7 +159,7 @@ async def load_taskset(taskset_id: str, api_key: str | None = None) -> TaskSet:
107
159
  api_key=api_key,
108
160
  )
109
161
 
110
- logger.info(f"[HUD] Taskset {taskset_id} loaded successfully")
162
+ logger.info(f"Taskset {taskset_id} loaded successfully")
111
163
 
112
164
  return TaskSet.model_validate(
113
165
  {
@@ -0,0 +1,21 @@
1
+ """
2
+ HUD telemetry module for capturing and reporting telemetry data from MCP calls.
3
+
4
+ This module provides functionality to trace MCP calls and export telemetry data
5
+ to the HUD platform for analysis.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from hud.telemetry._trace import init_telemetry, register_trace, trace
11
+ from hud.telemetry.context import get_current_task_run_id, set_current_task_run_id
12
+ from hud.telemetry.exporter import flush
13
+
14
+ __all__ = [
15
+ "flush",
16
+ "get_current_task_run_id",
17
+ "init_telemetry",
18
+ "register_trace",
19
+ "set_current_task_run_id",
20
+ "trace",
21
+ ]
@@ -0,0 +1,173 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ import uuid
7
+ from contextlib import contextmanager
8
+ from functools import wraps
9
+ from typing import (
10
+ TYPE_CHECKING,
11
+ Any,
12
+ ParamSpec,
13
+ TypeVar,
14
+ overload,
15
+ )
16
+
17
+ from hud.telemetry import exporter
18
+ from hud.telemetry.context import (
19
+ flush_buffer,
20
+ get_current_task_run_id,
21
+ is_root_trace,
22
+ set_current_task_run_id,
23
+ )
24
+ from hud.telemetry.exporter import submit_to_worker_loop
25
+ from hud.telemetry.instrumentation.registry import registry
26
+
27
+ if TYPE_CHECKING:
28
+ from collections.abc import (
29
+ Callable,
30
+ Coroutine,
31
+ Generator,
32
+ )
33
+
34
+ from hud.telemetry.mcp_models import BaseMCPCall
35
+
36
+ logger = logging.getLogger("hud.telemetry")
37
+ T = TypeVar("T")
38
+
39
+
40
+ def init_telemetry() -> None:
41
+ """Initialize telemetry instrumentors and ensure worker is started if telemetry is active."""
42
+ registry.install_all()
43
+ logger.info("Telemetry initialized.")
44
+
45
+
46
+ @contextmanager
47
+ def trace(
48
+ name: str | None = None,
49
+ attributes: dict[str, Any] | None = None,
50
+ ) -> Generator[str, None, None]:
51
+ """
52
+ Context manager for tracing a block of code.
53
+ The task_run_id is always generated internally as a UUID.
54
+ Telemetry export is handled by a background worker thread.
55
+
56
+ Args:
57
+ attributes: Optional dictionary of attributes to associate with this trace
58
+ name: Optional name for this trace, will be added to attributes.
59
+
60
+ Returns:
61
+ The generated task run ID (UUID string) used for this trace
62
+ """
63
+ task_run_id = str(uuid.uuid4())
64
+
65
+ local_attributes = attributes.copy() if attributes is not None else {}
66
+ if name is not None:
67
+ local_attributes["trace_name"] = name
68
+
69
+ start_time = time.time()
70
+ logger.debug("Starting trace %s (Name: %s)", task_run_id, name if name else "Unnamed")
71
+
72
+ previous_task_id = get_current_task_run_id()
73
+ was_root = is_root_trace.get()
74
+
75
+ set_current_task_run_id(task_run_id)
76
+ is_root = previous_task_id is None
77
+ is_root_trace.set(is_root)
78
+
79
+ try:
80
+ yield task_run_id
81
+ finally:
82
+ end_time = time.time()
83
+ duration = end_time - start_time
84
+
85
+ mcp_calls: list[BaseMCPCall] = flush_buffer()
86
+
87
+ trace_attributes_final = {
88
+ **local_attributes,
89
+ "start_time": start_time,
90
+ "end_time": end_time,
91
+ "duration": duration,
92
+ "is_root": is_root,
93
+ }
94
+
95
+ if is_root and mcp_calls:
96
+ try:
97
+ coro_to_submit = exporter.export_telemetry(
98
+ task_run_id=task_run_id,
99
+ trace_attributes=trace_attributes_final,
100
+ mcp_calls=mcp_calls,
101
+ )
102
+ future = submit_to_worker_loop(coro_to_submit)
103
+ if future:
104
+ logger.debug(
105
+ "Telemetry for trace %s submitted to background worker.", task_run_id
106
+ )
107
+ else:
108
+ logger.warning(
109
+ "Failed to submit telemetry for trace %s to"
110
+ "background worker (loop not available).",
111
+ task_run_id,
112
+ )
113
+ except Exception as e:
114
+ logger.warning("Failed to submit telemetry for trace %s: %s", task_run_id, e)
115
+
116
+ set_current_task_run_id(previous_task_id)
117
+ is_root_trace.set(was_root)
118
+
119
+ logger.debug(
120
+ "Ended trace %s (Name: %s) with %d MCP call(s)",
121
+ task_run_id,
122
+ name if name else "Unnamed",
123
+ len(mcp_calls),
124
+ )
125
+
126
+ logger.info("View trace at https://app.hud.so/jobs/traces/%s", task_run_id)
127
+
128
+
129
+ P = ParamSpec("P")
130
+ R = TypeVar("R")
131
+
132
+
133
+ def register_trace(
134
+ name: str | None = None, attributes: dict[str, Any] | None = None
135
+ ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
136
+ """
137
+ Decorator to wrap a synchronous or asynchronous function call
138
+ within a hud._telemetry.trace context.
139
+
140
+ Args:
141
+ name: Optional name for the trace.
142
+ attributes: Optional dictionary of attributes for the trace.
143
+ """
144
+
145
+ @overload
146
+ def decorator(
147
+ func: Callable[P, Coroutine[Any, Any, R]],
148
+ ) -> Callable[P, Coroutine[Any, Any, R]]: ...
149
+
150
+ @overload
151
+ def decorator(func: Callable[P, R]) -> Callable[P, R]: ...
152
+
153
+ def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
154
+ if asyncio.iscoroutinefunction(func):
155
+
156
+ @wraps(func)
157
+ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
158
+ effective_name = name if name else func.__name__
159
+ with trace(name=effective_name, attributes=attributes):
160
+ return await func(*args, **kwargs)
161
+
162
+ return async_wrapper
163
+ else:
164
+
165
+ @wraps(func)
166
+ def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
167
+ effective_name = name if name else func.__name__
168
+ with trace(name=effective_name, attributes=attributes):
169
+ return func(*args, **kwargs)
170
+
171
+ return sync_wrapper
172
+
173
+ return decorator
@@ -0,0 +1,169 @@
1
+ from __future__ import annotations
2
+
3
+ import contextvars
4
+ import logging
5
+ from collections import defaultdict
6
+ from datetime import datetime
7
+ from typing import Any, TypeVar
8
+
9
+ from hud.telemetry.mcp_models import (
10
+ BaseMCPCall,
11
+ MCPManualTestCall,
12
+ MCPNotificationCall,
13
+ MCPRequestCall,
14
+ MCPResponseCall,
15
+ StatusType,
16
+ )
17
+
18
+ logger = logging.getLogger("hud.telemetry")
19
+
20
+ # Context variables for tracing
21
+ current_task_run_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
22
+ "current_task_run_id", default=None
23
+ )
24
+ # NEW: Global dictionary for buffering, keyed by task_run_id
25
+ _GLOBAL_MCP_CALL_BUFFERS: defaultdict[str, list[BaseMCPCall]] = defaultdict(list)
26
+ is_root_trace: contextvars.ContextVar[bool] = contextvars.ContextVar("is_root_trace", default=False)
27
+
28
+ # Maximum buffer size before automatic flush
29
+ MAX_BUFFER_SIZE = 100
30
+
31
+ # Type variable for record factories
32
+ T = TypeVar("T", bound=BaseMCPCall)
33
+
34
+
35
+ def get_current_task_run_id() -> str | None:
36
+ """Get the task_run_id for the current trace context."""
37
+ return current_task_run_id.get()
38
+
39
+
40
+ def set_current_task_run_id(task_run_id: str | None) -> None:
41
+ """Set the task_run_id for the current trace context."""
42
+ current_task_run_id.set(task_run_id)
43
+
44
+
45
+ def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
46
+ task_run_id = get_current_task_run_id()
47
+
48
+ if not task_run_id:
49
+ logger.warning(
50
+ "BUFFER_MCP_CALL: No task_run_id. Skipping buffer for %s", type(record).__name__
51
+ )
52
+ return
53
+
54
+ # Ensure 'record' is a Pydantic model instance from here
55
+ if isinstance(record, dict):
56
+ try:
57
+ record_model = BaseMCPCall.from_dict(record)
58
+ record = record_model
59
+ except Exception as e_conv:
60
+ logger.exception("BUFFER_MCP_CALL: Failed to convert dict to BaseMCPCall: %s", e_conv)
61
+ return
62
+
63
+ _GLOBAL_MCP_CALL_BUFFERS[task_run_id].append(record)
64
+ buffer_len = len(_GLOBAL_MCP_CALL_BUFFERS[task_run_id])
65
+
66
+ if buffer_len >= MAX_BUFFER_SIZE:
67
+ flush_buffer(export=True)
68
+
69
+
70
+ def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
71
+ """
72
+ Clear the MCP calls buffer and return its contents.
73
+
74
+ Args:
75
+ export: Whether to trigger export of this buffer
76
+
77
+ Returns:
78
+ The list of buffered MCP calls
79
+ """
80
+ task_run_id = get_current_task_run_id()
81
+ if not task_run_id:
82
+ logger.warning("FLUSH_BUFFER: No current task_run_id. Cannot flush.")
83
+ return []
84
+
85
+ buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(
86
+ task_run_id, []
87
+ ) # Get and remove the list for this task
88
+
89
+ return buffer_for_task # Return the flushed items
90
+
91
+
92
+ def create_request_record(
93
+ method: str, status: StatusType = StatusType.STARTED, **kwargs: Any
94
+ ) -> MCPRequestCall:
95
+ """Create and buffer a request record"""
96
+ task_run_id = get_current_task_run_id()
97
+ if not task_run_id:
98
+ logger.warning("No active task_run_id, request record will not be created")
99
+ raise ValueError("No active task_run_id")
100
+
101
+ record = MCPRequestCall(
102
+ task_run_id=task_run_id,
103
+ method=method,
104
+ status=status,
105
+ start_time=kwargs.pop("start_time", None) or datetime.now().timestamp(),
106
+ **kwargs,
107
+ )
108
+ buffer_mcp_call(record)
109
+ return record
110
+
111
+
112
+ def create_response_record(
113
+ method: str, related_request_id: str | int | None = None, is_error: bool = False, **kwargs: Any
114
+ ) -> MCPResponseCall:
115
+ """Create and buffer a response record"""
116
+ task_run_id = get_current_task_run_id()
117
+ if not task_run_id:
118
+ logger.warning("No active task_run_id, response record will not be created")
119
+ raise ValueError("No active task_run_id")
120
+
121
+ record = MCPResponseCall(
122
+ task_run_id=task_run_id,
123
+ method=method,
124
+ status=StatusType.COMPLETED,
125
+ related_request_id=related_request_id,
126
+ is_error=is_error,
127
+ **kwargs,
128
+ )
129
+
130
+ buffer_mcp_call(record)
131
+ return record
132
+
133
+
134
+ def create_notification_record(
135
+ method: str, status: StatusType = StatusType.STARTED, **kwargs: Any
136
+ ) -> MCPNotificationCall:
137
+ """Create and buffer a notification record"""
138
+ task_run_id = get_current_task_run_id()
139
+ if not task_run_id:
140
+ logger.warning("No active task_run_id, notification record will not be created")
141
+ raise ValueError("No active task_run_id")
142
+
143
+ record = MCPNotificationCall(
144
+ task_run_id=task_run_id,
145
+ method=method,
146
+ status=status,
147
+ start_time=kwargs.pop("start_time", None) or datetime.now().timestamp(),
148
+ **kwargs,
149
+ )
150
+ buffer_mcp_call(record)
151
+ return record
152
+
153
+
154
+ def create_manual_test_record(**custom_data: Any) -> MCPManualTestCall | None:
155
+ """Create and buffer a manual test record"""
156
+ task_run_id = get_current_task_run_id()
157
+ if not task_run_id:
158
+ logger.warning("No active task_run_id, manual test record will not be created")
159
+ return None
160
+
161
+ record = MCPManualTestCall.create(task_run_id=task_run_id, **custom_data)
162
+ buffer_mcp_call(record)
163
+ return record
164
+
165
+
166
+ def reset_context() -> None:
167
+ """Reset all telemetry context variables. Useful for test isolation."""
168
+ set_current_task_run_id(None)
169
+ is_root_trace.set(False)