hud-python 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. hud/__init__.py +1 -1
  2. hud/agents/__init__.py +65 -6
  3. hud/agents/base.py +33 -15
  4. hud/agents/claude.py +60 -31
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +15 -26
  7. hud/agents/gemini_cua.py +6 -17
  8. hud/agents/misc/response_agent.py +7 -0
  9. hud/agents/openai.py +16 -29
  10. hud/agents/openai_chat.py +3 -19
  11. hud/agents/operator.py +5 -17
  12. hud/agents/resolver.py +70 -0
  13. hud/agents/tests/test_claude.py +2 -4
  14. hud/agents/tests/test_openai.py +2 -1
  15. hud/agents/tests/test_resolver.py +192 -0
  16. hud/agents/types.py +148 -0
  17. hud/cli/__init__.py +34 -3
  18. hud/cli/build.py +37 -5
  19. hud/cli/dev.py +11 -2
  20. hud/cli/eval.py +51 -39
  21. hud/cli/flows/init.py +1 -1
  22. hud/cli/pull.py +1 -1
  23. hud/cli/push.py +9 -2
  24. hud/cli/tests/test_build.py +2 -2
  25. hud/cli/tests/test_push.py +1 -1
  26. hud/cli/utils/metadata.py +1 -1
  27. hud/cli/utils/tests/test_metadata.py +1 -1
  28. hud/clients/mcp_use.py +6 -1
  29. hud/datasets/loader.py +17 -18
  30. hud/datasets/runner.py +16 -10
  31. hud/datasets/tests/test_loader.py +15 -15
  32. hud/environment/__init__.py +5 -3
  33. hud/environment/connection.py +58 -6
  34. hud/environment/connectors/mcp_config.py +29 -1
  35. hud/environment/environment.py +218 -77
  36. hud/environment/router.py +175 -24
  37. hud/environment/scenarios.py +313 -186
  38. hud/environment/tests/test_connectors.py +10 -23
  39. hud/environment/tests/test_environment.py +432 -0
  40. hud/environment/tests/test_local_connectors.py +81 -40
  41. hud/environment/tests/test_scenarios.py +820 -14
  42. hud/eval/context.py +63 -10
  43. hud/eval/instrument.py +4 -2
  44. hud/eval/manager.py +79 -12
  45. hud/eval/task.py +36 -4
  46. hud/eval/tests/test_eval.py +1 -1
  47. hud/eval/tests/test_task.py +147 -1
  48. hud/eval/types.py +2 -0
  49. hud/eval/utils.py +14 -3
  50. hud/patches/mcp_patches.py +178 -21
  51. hud/telemetry/instrument.py +8 -1
  52. hud/telemetry/tests/test_eval_telemetry.py +8 -8
  53. hud/tools/__init__.py +2 -0
  54. hud/tools/agent.py +223 -0
  55. hud/tools/computer/__init__.py +34 -5
  56. hud/tools/shell.py +3 -3
  57. hud/tools/tests/test_agent_tool.py +355 -0
  58. hud/types.py +62 -34
  59. hud/utils/hud_console.py +30 -17
  60. hud/utils/strict_schema.py +1 -1
  61. hud/utils/tests/test_version.py +1 -1
  62. hud/version.py +1 -1
  63. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA +2 -2
  64. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/RECORD +67 -61
  65. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/WHEEL +0 -0
  66. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  67. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -18,8 +18,10 @@ def patch_streamable_http_error_handling() -> None:
18
18
  Patch StreamableHTTPTransport.post_writer to handle request errors properly.
19
19
 
20
20
  The original implementation doesn't catch errors in handle_request_async,
21
- which can cause silent failures. This patch wraps the handler to send
22
- errors to the read stream so clients know the request failed.
21
+ which can cause the client to hang indefinitely. This patch wraps the handler
22
+ to send a proper JSONRPCError response when transport errors occur (e.g.,
23
+ ReadTimeout), allowing the waiting caller to receive the error and fail
24
+ gracefully instead of hanging.
23
25
  """
24
26
  try:
25
27
  from mcp.client.streamable_http import StreamableHTTPTransport
@@ -33,10 +35,67 @@ def patch_streamable_http_error_handling() -> None:
33
35
  start_get_stream: Any,
34
36
  tg: Any,
35
37
  ) -> None:
36
- """Patched post_writer with error handling for handle_request_async."""
38
+ import asyncio
39
+ import ssl
40
+ import time
41
+
42
+ import httpx
37
43
  from mcp.client.streamable_http import RequestContext
38
- from mcp.shared.message import ClientMessageMetadata
39
- from mcp.types import JSONRPCRequest
44
+ from mcp.shared.message import ClientMessageMetadata, SessionMessage
45
+ from mcp.types import ErrorData, JSONRPCError, JSONRPCMessage, JSONRPCRequest
46
+
47
+ from hud.settings import settings
48
+
49
+ async def handle_request_async(ctx: RequestContext, is_resumption: bool) -> None:
50
+ msg = ctx.session_message.message
51
+ # Use configured timeout, minimum 30s to prevent instant failures
52
+ timeout = max(settings.client_timeout, 15.0)
53
+ deadline = time.monotonic() + timeout
54
+ retryable = (
55
+ httpx.ConnectError,
56
+ httpx.ReadError,
57
+ httpx.TimeoutException,
58
+ ssl.SSLError,
59
+ )
60
+
61
+ async def send_error_response(exc: Exception) -> None:
62
+ """Send an error response to the client."""
63
+ if isinstance(msg.root, JSONRPCRequest):
64
+ error_response = JSONRPCError(
65
+ jsonrpc="2.0",
66
+ id=msg.root.id,
67
+ error=ErrorData(
68
+ code=-32000,
69
+ message=f"Transport error: {type(exc).__name__}",
70
+ data={"error_type": type(exc).__name__, "detail": str(exc)},
71
+ ),
72
+ )
73
+ await ctx.read_stream_writer.send(
74
+ SessionMessage(JSONRPCMessage(error_response))
75
+ )
76
+ else:
77
+ await ctx.read_stream_writer.send(exc)
78
+
79
+ while True:
80
+ try:
81
+ if is_resumption:
82
+ await self._handle_resumption_request(ctx)
83
+ else:
84
+ await self._handle_post_request(ctx)
85
+ return
86
+ except retryable as e:
87
+ if time.monotonic() >= deadline:
88
+ logger.error("MCP request failed after timeout: %s", e)
89
+ await send_error_response(e)
90
+ return
91
+ logger.warning("Retrying MCP request after error: %s", e)
92
+ await asyncio.sleep(2.0)
93
+ except asyncio.CancelledError:
94
+ raise
95
+ except Exception as e:
96
+ logger.exception("Request handler error: %s", e)
97
+ await send_error_response(e)
98
+ return
40
99
 
41
100
  try:
42
101
  async with write_stream_reader:
@@ -47,7 +106,6 @@ def patch_streamable_http_error_handling() -> None:
47
106
  if isinstance(session_message.metadata, ClientMessageMetadata)
48
107
  else None
49
108
  )
50
-
51
109
  is_resumption = bool(metadata and metadata.resumption_token)
52
110
 
53
111
  logger.debug("Sending client message: %s", message)
@@ -65,21 +123,6 @@ def patch_streamable_http_error_handling() -> None:
65
123
  sse_read_timeout=self.sse_read_timeout,
66
124
  )
67
125
 
68
- # Patched: Accept ctx and is_resumption as params, add error handling
69
- async def handle_request_async(
70
- ctx: RequestContext = ctx,
71
- is_resumption: bool = is_resumption,
72
- ) -> None:
73
- try:
74
- if is_resumption:
75
- await self._handle_resumption_request(ctx)
76
- else:
77
- await self._handle_post_request(ctx)
78
- except Exception as e:
79
- # Send error to read stream so client knows request failed
80
- logger.error("Request handler error: %s", e)
81
- await ctx.read_stream_writer.send(e)
82
-
83
126
  if isinstance(message.root, JSONRPCRequest):
84
127
  tg.start_soon(handle_request_async, ctx, is_resumption)
85
128
  else:
@@ -122,6 +165,119 @@ def patch_client_session_validation() -> None:
122
165
  logger.warning("Failed to patch client session: %s", e)
123
166
 
124
167
 
168
+ def patch_server_output_validation() -> None:
169
+ """
170
+ Patch MCP server to skip structured output validation and auto-generate
171
+ structuredContent for FastMCP tools with x-fastmcp-wrap-result.
172
+ """
173
+ try:
174
+ import json
175
+
176
+ import mcp.types as types
177
+ from mcp.server.lowlevel.server import Server
178
+
179
+ def patched_call_tool(
180
+ self: Any, validate_input: bool = True, validate_output: bool = False
181
+ ) -> Any:
182
+ """Patched call_tool that skips output validation."""
183
+
184
+ def decorator(func: Any) -> Any:
185
+ async def handler(req: types.CallToolRequest) -> Any:
186
+ try:
187
+ tool_name = req.params.name
188
+ arguments = req.params.arguments or {}
189
+ tool = await self._get_cached_tool_definition(tool_name)
190
+
191
+ if validate_input and tool:
192
+ try:
193
+ import jsonschema
194
+
195
+ jsonschema.validate(instance=arguments, schema=tool.inputSchema)
196
+ except jsonschema.ValidationError as e:
197
+ return self._make_error_result(
198
+ f"Input validation error: {e.message}"
199
+ )
200
+
201
+ results = await func(tool_name, arguments)
202
+
203
+ # output normalization
204
+ unstructured_content: list[Any]
205
+ maybe_structured_content: dict[str, Any] | None
206
+ if isinstance(results, types.CallToolResult):
207
+ return types.ServerResult(results)
208
+ elif isinstance(results, tuple) and len(results) == 2:
209
+ unstructured_content, maybe_structured_content = results
210
+ elif isinstance(results, dict):
211
+ maybe_structured_content = results
212
+ text = json.dumps(results, indent=2)
213
+ unstructured_content = [types.TextContent(type="text", text=text)]
214
+ elif results is None:
215
+ # None means success with no content
216
+ unstructured_content = []
217
+ maybe_structured_content = None
218
+ elif isinstance(results, (str, bytes, bytearray, memoryview)):
219
+ # Handle string/bytes explicitly before iterable check
220
+ # (these are iterable but should not be split into chars/ints)
221
+ if isinstance(results, str):
222
+ text = results
223
+ elif isinstance(results, memoryview):
224
+ text = bytes(results).decode("utf-8", errors="replace")
225
+ else:
226
+ text = bytes(results).decode("utf-8", errors="replace")
227
+ unstructured_content = [types.TextContent(type="text", text=text)]
228
+ maybe_structured_content = None
229
+ elif isinstance(results, (int, float, bool)):
230
+ # Primitives -> string representation
231
+ unstructured_content = [
232
+ types.TextContent(type="text", text=str(results))
233
+ ]
234
+ maybe_structured_content = None
235
+ elif hasattr(results, "__iter__"):
236
+ unstructured_content = list(results)
237
+ maybe_structured_content = None
238
+ else:
239
+ return self._make_error_result(
240
+ f"Unexpected return type: {type(results).__name__}"
241
+ )
242
+
243
+ # Auto-generate structuredContent for FastMCP tools
244
+ # FastMCP generates outputSchema but doesn't populate it
245
+ if maybe_structured_content is None and tool:
246
+ output_schema = getattr(tool, "outputSchema", None)
247
+ if output_schema and output_schema.get("x-fastmcp-wrap-result"):
248
+ for item in unstructured_content:
249
+ if isinstance(item, types.TextContent):
250
+ try:
251
+ parsed = json.loads(item.text)
252
+ maybe_structured_content = {"result": parsed}
253
+ except json.JSONDecodeError:
254
+ maybe_structured_content = {"result": item.text}
255
+ break
256
+
257
+ return types.ServerResult(
258
+ types.CallToolResult(
259
+ content=list(unstructured_content),
260
+ structuredContent=maybe_structured_content,
261
+ isError=False,
262
+ )
263
+ )
264
+ except Exception as e:
265
+ return self._make_error_result(str(e))
266
+
267
+ self.request_handlers[types.CallToolRequest] = handler
268
+ return func
269
+
270
+ return decorator
271
+
272
+ Server.call_tool = patched_call_tool
273
+ logger.debug("Patched Server.call_tool to skip output validation")
274
+
275
+ except ImportError:
276
+ logger.debug("mcp.server.lowlevel.server not available, skipping patch")
277
+ except Exception as e:
278
+ logger.warning("Failed to patch server output validation: %s", e)
279
+
280
+
125
281
  def suppress_fastmcp_logging(level: int = logging.WARNING) -> None:
126
282
  """
127
283
  Suppress verbose fastmcp logging.
@@ -147,5 +303,6 @@ def apply_all_patches() -> None:
147
303
  """Apply all MCP patches."""
148
304
  patch_streamable_http_error_handling()
149
305
  patch_client_session_validation()
306
+ patch_server_output_validation()
150
307
  suppress_fastmcp_logging()
151
308
  logger.debug("All MCP patches applied")
@@ -83,6 +83,7 @@ def instrument(
83
83
  name: str | None = None,
84
84
  category: str = "function",
85
85
  span_type: str | None = None,
86
+ internal_type: str | None = None,
86
87
  record_args: bool = True,
87
88
  record_result: bool = True,
88
89
  ) -> Callable[[Callable[..., Any]], Callable[..., Any]]: ...
@@ -95,6 +96,7 @@ def instrument(
95
96
  name: str | None = None,
96
97
  category: str = "function",
97
98
  span_type: str | None = None,
99
+ internal_type: str | None = None,
98
100
  record_args: bool = True,
99
101
  record_result: bool = True,
100
102
  ) -> Callable[P, R]: ...
@@ -107,6 +109,7 @@ def instrument(
107
109
  name: str | None = None,
108
110
  category: str = "function",
109
111
  span_type: str | None = None,
112
+ internal_type: str | None = None,
110
113
  record_args: bool = True,
111
114
  record_result: bool = True,
112
115
  ) -> Callable[P, Awaitable[R]]: ...
@@ -118,6 +121,7 @@ def instrument(
118
121
  name: str | None = None,
119
122
  category: str = "function",
120
123
  span_type: str | None = None,
124
+ internal_type: str | None = None,
121
125
  record_args: bool = True,
122
126
  record_result: bool = True,
123
127
  ) -> Callable[..., Any]:
@@ -130,6 +134,7 @@ def instrument(
130
134
  name: Custom span name (defaults to module.function)
131
135
  category: Span category (e.g., "agent", "tool", "function", "mcp")
132
136
  span_type: Alias for category (deprecated, use category instead)
137
+ internal_type: Internal span type (e.g., "user-message")
133
138
  record_args: Whether to record function arguments
134
139
  record_result: Whether to record function result
135
140
 
@@ -204,7 +209,7 @@ def instrument(
204
209
 
205
210
  # Build span
206
211
  span_id = uuid.uuid4().hex[:16]
207
- span = {
212
+ span: dict[str, Any] = {
208
213
  "name": span_name,
209
214
  "trace_id": _normalize_trace_id(task_run_id),
210
215
  "span_id": span_id,
@@ -216,6 +221,8 @@ def instrument(
216
221
  "attributes": attributes.model_dump(mode="json", exclude_none=True),
217
222
  "exceptions": [{"message": error}] if error else None,
218
223
  }
224
+ if internal_type:
225
+ span["internal_type"] = internal_type
219
226
  return span
220
227
 
221
228
  @functools.wraps(func)
@@ -49,8 +49,8 @@ class TestEvalContextTelemetry:
49
49
  """Say hello."""
50
50
  return f"Hello, {name}!"
51
51
 
52
- # Create task from environment
53
- task = Task(env=env)
52
+ # Create task from environment (args={} = runnable, args=None = template)
53
+ task = Task(env=env, args={})
54
54
 
55
55
  with (
56
56
  patch("hud.settings.settings") as mock_settings,
@@ -110,7 +110,7 @@ class TestEvalContextTelemetry:
110
110
  """Always fails."""
111
111
  raise ValueError("Tool error")
112
112
 
113
- task = Task(env=env)
113
+ task = Task(env=env, args={})
114
114
 
115
115
  with (
116
116
  patch("hud.settings.settings") as mock_settings,
@@ -162,7 +162,7 @@ class TestEvalContextTelemetry:
162
162
  """Multiply two numbers."""
163
163
  return a * b
164
164
 
165
- task = Task(env=env)
165
+ task = Task(env=env, args={})
166
166
 
167
167
  with (
168
168
  patch("hud.settings.settings") as mock_settings,
@@ -195,7 +195,7 @@ class TestEvalContextTelemetry:
195
195
  async def simple_tool() -> str:
196
196
  return "done"
197
197
 
198
- task = Task(env=env)
198
+ task = Task(env=env, args={})
199
199
 
200
200
  with (
201
201
  patch("hud.eval.context.flush") as mock_flush,
@@ -229,7 +229,7 @@ class TestEvalContextTelemetry:
229
229
  async def test_tool() -> str:
230
230
  return "ok"
231
231
 
232
- task = Task(env=env)
232
+ task = Task(env=env, args={})
233
233
 
234
234
  with (
235
235
  patch("hud.settings.settings") as mock_settings,
@@ -272,7 +272,7 @@ class TestSpanFormat:
272
272
  async def echo(message: str) -> str:
273
273
  return message
274
274
 
275
- task = Task(env=env)
275
+ task = Task(env=env, args={})
276
276
 
277
277
  with (
278
278
  patch("hud.settings.settings") as mock_settings,
@@ -329,7 +329,7 @@ class TestSpanFormat:
329
329
  async def noop() -> None:
330
330
  pass
331
331
 
332
- task = Task(env=env)
332
+ task = Task(env=env, args={})
333
333
 
334
334
  with (
335
335
  patch("hud.settings.settings") as mock_settings,
hud/tools/__init__.py CHANGED
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
7
+ from .agent import AgentTool
7
8
  from .base import BaseHub, BaseTool
8
9
  from .bash import BashTool
9
10
  from .edit import EditTool
@@ -21,6 +22,7 @@ if TYPE_CHECKING:
21
22
  )
22
23
 
23
24
  __all__ = [
25
+ "AgentTool",
24
26
  "AnthropicComputerTool",
25
27
  "BaseHub",
26
28
  "BaseTool",
hud/tools/agent.py ADDED
@@ -0,0 +1,223 @@
1
+ """AgentTool - run a Task with an agent as a tool."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import inspect
6
+ from typing import TYPE_CHECKING, Any, Union, get_args, get_origin
7
+
8
+ from fastmcp.tools.tool import FunctionTool, ToolResult
9
+ from mcp.types import TextContent
10
+
11
+ from hud.tools.base import BaseTool
12
+
13
+ if TYPE_CHECKING:
14
+ from hud.agents.base import MCPAgent
15
+ from hud.eval.task import Task
16
+
17
+ __all__ = ["AgentTool"]
18
+
19
+
20
+ def _is_eval_only(param: inspect.Parameter) -> bool:
21
+ """Check if param is eval-only: has None default AND None in type union.
22
+
23
+ Handles both runtime types and string annotations (PEP 563).
24
+ """
25
+ # Must have default of None
26
+ if param.default is not None:
27
+ return False
28
+ if param.annotation is inspect.Parameter.empty:
29
+ return False
30
+
31
+ annotation = param.annotation
32
+
33
+ # Handle string annotations (from __future__ annotations or quoted)
34
+ if isinstance(annotation, str):
35
+ # Check if it looks like "X | None", "Union[X, None]", or "Optional[X]"
36
+ return (
37
+ "| None" in annotation
38
+ or "None |" in annotation
39
+ or "Optional[" in annotation
40
+ or ("Union[" in annotation and "None" in annotation)
41
+ )
42
+
43
+ # Handle runtime type annotations
44
+ origin = get_origin(annotation)
45
+
46
+ # Union types (X | None or Union[X, None])
47
+ if origin is Union:
48
+ return type(None) in get_args(annotation)
49
+
50
+ # For Python 3.10+ union syntax at runtime (types.UnionType)
51
+ try:
52
+ import types
53
+
54
+ if isinstance(annotation, types.UnionType):
55
+ return type(None) in get_args(annotation)
56
+ except (ImportError, AttributeError):
57
+ pass
58
+
59
+ return False
60
+
61
+
62
+ class AgentTool(BaseTool):
63
+ """Tool that runs a Task template with an agent.
64
+
65
+ Parameters with `| None = None` are eval-only and hidden from the tool schema.
66
+
67
+ Example:
68
+ ```python
69
+ @env.scenario()
70
+ async def investigate(
71
+ issue_id: str, # Required - orchestrator sees
72
+ expected_cause: str | None = None, # Eval only - hidden
73
+ ):
74
+ yield {"task": f"Investigate {issue_id}"}
75
+
76
+
77
+ seer = AgentTool(env("investigate"), model="ft:seer-v2")
78
+ ```
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ task: Task,
84
+ *,
85
+ model: str | None = None,
86
+ agent: type[MCPAgent] | None = None,
87
+ agent_params: dict[str, Any] | None = None,
88
+ name: str | None = None,
89
+ description: str | None = None,
90
+ trace: bool = False,
91
+ ) -> None:
92
+ if not model and agent is None:
93
+ raise ValueError("Must provide either 'model' or 'agent'")
94
+ if model and agent is not None:
95
+ raise ValueError("Cannot provide both 'model' and 'agent'")
96
+
97
+ self._task = task
98
+ self._model = model
99
+ self._agent_cls = agent
100
+ self._agent_params = agent_params or {}
101
+ self._trace = trace
102
+
103
+ # Get visible params from scenario function
104
+ self._visible_params: set[str] = set()
105
+ self._param_schema: dict[str, Any] = {
106
+ "type": "object",
107
+ "properties": {},
108
+ "required": [],
109
+ }
110
+
111
+ if task.env and task.scenario:
112
+ scenario_fn = task.env._scenarios.get(task.scenario)
113
+ if scenario_fn:
114
+ sig = inspect.signature(scenario_fn)
115
+ visible = {name: p for name, p in sig.parameters.items() if not _is_eval_only(p)}
116
+ self._visible_params = set(visible.keys())
117
+ self._param_schema = self._build_schema(visible)
118
+
119
+ tool_name = name or task.scenario or "agent_tool"
120
+ tool_desc = description or f"Run scenario: {task.scenario}"
121
+
122
+ super().__init__(name=tool_name, description=tool_desc)
123
+
124
+ def _build_schema(self, params: dict[str, inspect.Parameter]) -> dict[str, Any]:
125
+ """Build JSON schema using Pydantic TypeAdapter."""
126
+ from pydantic import TypeAdapter
127
+
128
+ properties: dict[str, Any] = {}
129
+ required: list[str] = []
130
+
131
+ for name, param in params.items():
132
+ if param.annotation is not inspect.Parameter.empty:
133
+ try:
134
+ # Handle string annotations
135
+ annotation = param.annotation
136
+ if isinstance(annotation, str):
137
+ # Try to evaluate the annotation
138
+ try:
139
+ annotation = eval(annotation) # noqa: S307
140
+ except Exception:
141
+ # Fall back to string type but don't skip required handling
142
+ annotation = None
143
+
144
+ if annotation is not None:
145
+ adapter = TypeAdapter(annotation)
146
+ properties[name] = adapter.json_schema()
147
+ else:
148
+ properties[name] = {"type": "string"}
149
+ except Exception:
150
+ properties[name] = {"type": "string"}
151
+ else:
152
+ properties[name] = {"type": "string"}
153
+
154
+ if param.default is inspect.Parameter.empty:
155
+ required.append(name)
156
+ elif param.default is not None:
157
+ properties[name]["default"] = param.default
158
+
159
+ return {"type": "object", "properties": properties, "required": required}
160
+
161
+ @property
162
+ def mcp(self) -> FunctionTool:
163
+ """Get as FastMCP FunctionTool with filtered schema."""
164
+ if not hasattr(self, "_mcp_tool"):
165
+ # Directly instantiate FunctionTool with our callable and schema
166
+ # This bypasses from_function's signature parsing
167
+ self._mcp_tool = FunctionTool(
168
+ name=self.name,
169
+ description=self.description or "",
170
+ parameters=self._param_schema,
171
+ fn=self._execute_with_args,
172
+ )
173
+ return self._mcp_tool
174
+
175
+ async def _execute_with_args(self, **kwargs: Any) -> ToolResult:
176
+ """Internal executor that FastMCP calls with parsed arguments."""
177
+ return await self(**kwargs)
178
+
179
+ async def __call__(self, **kwargs: Any) -> ToolResult:
180
+ """Execute the task with a fresh agent."""
181
+ from hud.eval.context import get_current_trace_id
182
+ from hud.eval.manager import run_eval
183
+ from hud.telemetry.instrument import instrument
184
+
185
+ # Filter to visible params only
186
+ filtered = {k: v for k, v in kwargs.items() if k in self._visible_params}
187
+
188
+ # Merge with template args
189
+ base_args = self._task.args or {}
190
+ task = self._task.model_copy(update={"args": {**base_args, **filtered}})
191
+
192
+ # Use parent trace if available (for hierarchical agents)
193
+ parent_trace_id = get_current_trace_id()
194
+
195
+ # If nested (has parent), skip subagent's enter/exit registration
196
+ # Tool calls are still recorded via the shared trace_id's context
197
+ is_nested = parent_trace_id is not None
198
+
199
+ # Trace if explicitly requested AND not nested (nested uses parent trace)
200
+ should_trace = self._trace and not is_nested
201
+
202
+ # Wrap execution with instrumentation to mark as subagent
203
+ # Platform uses category="subagent" to detect and render subagent tool calls
204
+ @instrument(category="subagent", name=self.name)
205
+ async def _run_subagent() -> ToolResult:
206
+ async with run_eval(
207
+ task,
208
+ trace=should_trace,
209
+ trace_id=parent_trace_id,
210
+ quiet=True,
211
+ ) as ctx:
212
+ if self._model:
213
+ from hud.agents import create_agent
214
+
215
+ agent = create_agent(self._model, **self._agent_params)
216
+ else:
217
+ agent = self._agent_cls.create(**self._agent_params) # type: ignore
218
+
219
+ result = await agent.run(ctx)
220
+ content = result.content if hasattr(result, "content") and result.content else ""
221
+ return ToolResult(content=[TextContent(type="text", text=content)])
222
+
223
+ return await _run_subagent()
@@ -2,13 +2,17 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from .anthropic import AnthropicComputerTool
6
- from .gemini import GeminiComputerTool
7
- from .hud import HudComputerTool
8
- from .openai import OpenAIComputerTool
9
- from .qwen import QwenComputerTool
5
+ from typing import TYPE_CHECKING
6
+
10
7
  from .settings import computer_settings
11
8
 
9
+ if TYPE_CHECKING:
10
+ from .anthropic import AnthropicComputerTool
11
+ from .gemini import GeminiComputerTool
12
+ from .hud import HudComputerTool
13
+ from .openai import OpenAIComputerTool
14
+ from .qwen import QwenComputerTool
15
+
12
16
  __all__ = [
13
17
  "AnthropicComputerTool",
14
18
  "GeminiComputerTool",
@@ -17,3 +21,28 @@ __all__ = [
17
21
  "QwenComputerTool",
18
22
  "computer_settings",
19
23
  ]
24
+
25
+
26
+ def __getattr__(name: str) -> type:
27
+ """Lazy import computer tools."""
28
+ if name == "AnthropicComputerTool":
29
+ from .anthropic import AnthropicComputerTool
30
+
31
+ return AnthropicComputerTool
32
+ elif name == "GeminiComputerTool":
33
+ from .gemini import GeminiComputerTool
34
+
35
+ return GeminiComputerTool
36
+ elif name == "HudComputerTool":
37
+ from .hud import HudComputerTool
38
+
39
+ return HudComputerTool
40
+ elif name == "OpenAIComputerTool":
41
+ from .openai import OpenAIComputerTool
42
+
43
+ return OpenAIComputerTool
44
+ elif name == "QwenComputerTool":
45
+ from .qwen import QwenComputerTool
46
+
47
+ return QwenComputerTool
48
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
hud/tools/shell.py CHANGED
@@ -82,10 +82,10 @@ class _BashSession:
82
82
  await asyncio.sleep(0)
83
83
  return
84
84
 
85
- # preexec_fn and user demotion only available on Unix
85
+ # preexec_fn and user demotion only available on Unix when running as root
86
86
  preexec_fn = None
87
- if sys.platform != "win32":
88
-
87
+ if sys.platform != "win32" and os.getuid() == 0:
88
+ # Only demote when running as root (e.g., inside Docker containers)
89
89
  def demote() -> None:
90
90
  # This only runs in the child process (Unix only)
91
91
  os.setsid() # type: ignore[attr-defined]