hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (86) hide show
  1. hud/__init__.py +20 -8
  2. hud/adapters/common/adapter.py +14 -3
  3. hud/adapters/common/tests/test_adapter.py +16 -4
  4. hud/datasets.py +188 -0
  5. hud/env/docker_client.py +15 -3
  6. hud/env/environment.py +10 -7
  7. hud/env/local_docker_client.py +29 -7
  8. hud/env/remote_client.py +1 -1
  9. hud/env/remote_docker_client.py +2 -2
  10. hud/exceptions.py +2 -1
  11. hud/gym.py +0 -9
  12. hud/mcp/__init__.py +17 -0
  13. hud/mcp/base.py +631 -0
  14. hud/mcp/claude.py +321 -0
  15. hud/mcp/client.py +312 -0
  16. hud/mcp/langchain.py +250 -0
  17. hud/mcp/openai.py +334 -0
  18. hud/mcp/tests/__init__.py +1 -0
  19. hud/mcp/tests/test_base.py +512 -0
  20. hud/mcp/tests/test_claude.py +294 -0
  21. hud/mcp/tests/test_client.py +324 -0
  22. hud/mcp/tests/test_openai.py +238 -0
  23. hud/settings.py +20 -2
  24. hud/task.py +5 -88
  25. hud/taskset.py +2 -23
  26. hud/telemetry/__init__.py +16 -7
  27. hud/telemetry/_trace.py +246 -72
  28. hud/telemetry/context.py +88 -27
  29. hud/telemetry/exporter.py +171 -11
  30. hud/telemetry/instrumentation/mcp.py +174 -410
  31. hud/telemetry/job.py +141 -0
  32. hud/telemetry/mcp_models.py +13 -74
  33. hud/telemetry/tests/test_context.py +9 -6
  34. hud/telemetry/tests/test_trace.py +120 -78
  35. hud/tools/__init__.py +34 -0
  36. hud/tools/base.py +65 -0
  37. hud/tools/bash.py +137 -0
  38. hud/tools/computer/__init__.py +13 -0
  39. hud/tools/computer/anthropic.py +411 -0
  40. hud/tools/computer/hud.py +315 -0
  41. hud/tools/computer/openai.py +283 -0
  42. hud/tools/edit.py +290 -0
  43. hud/tools/executors/__init__.py +30 -0
  44. hud/tools/executors/base.py +331 -0
  45. hud/tools/executors/pyautogui.py +619 -0
  46. hud/tools/executors/tests/__init__.py +1 -0
  47. hud/tools/executors/tests/test_base_executor.py +338 -0
  48. hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
  49. hud/tools/executors/xdo.py +503 -0
  50. hud/tools/helper/README.md +56 -0
  51. hud/tools/helper/__init__.py +9 -0
  52. hud/tools/helper/mcp_server.py +78 -0
  53. hud/tools/helper/server_initialization.py +115 -0
  54. hud/tools/helper/utils.py +58 -0
  55. hud/tools/playwright_tool.py +379 -0
  56. hud/tools/tests/__init__.py +3 -0
  57. hud/tools/tests/test_bash.py +152 -0
  58. hud/tools/tests/test_computer.py +52 -0
  59. hud/tools/tests/test_computer_actions.py +34 -0
  60. hud/tools/tests/test_edit.py +240 -0
  61. hud/tools/tests/test_init.py +27 -0
  62. hud/tools/tests/test_playwright_tool.py +183 -0
  63. hud/tools/tests/test_tools.py +157 -0
  64. hud/tools/tests/test_utils.py +156 -0
  65. hud/tools/utils.py +50 -0
  66. hud/trajectory.py +5 -1
  67. hud/types.py +10 -1
  68. hud/utils/tests/test_init.py +21 -0
  69. hud/utils/tests/test_version.py +1 -1
  70. hud/version.py +1 -1
  71. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
  72. hud_python-0.3.1.dist-info/RECORD +119 -0
  73. hud/evaluators/__init__.py +0 -9
  74. hud/evaluators/base.py +0 -32
  75. hud/evaluators/inspect.py +0 -24
  76. hud/evaluators/judge.py +0 -189
  77. hud/evaluators/match.py +0 -156
  78. hud/evaluators/remote.py +0 -65
  79. hud/evaluators/tests/__init__.py +0 -0
  80. hud/evaluators/tests/test_inspect.py +0 -12
  81. hud/evaluators/tests/test_judge.py +0 -231
  82. hud/evaluators/tests/test_match.py +0 -115
  83. hud/evaluators/tests/test_remote.py +0 -98
  84. hud_python-0.2.10.dist-info/RECORD +0 -85
  85. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
  86. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/mcp/claude.py ADDED
@@ -0,0 +1,321 @@
1
+ """Claude MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import logging
7
+ from typing import TYPE_CHECKING, Any, cast
8
+
9
+ from anthropic import AsyncAnthropic, BadRequestError
10
+
11
+ if TYPE_CHECKING:
12
+ from anthropic.types.beta import (
13
+ BetaCacheControlEphemeralParam,
14
+ BetaImageBlockParam,
15
+ BetaMessageParam,
16
+ BetaTextBlockParam,
17
+ BetaToolResultBlockParam,
18
+ )
19
+
20
+ from hud.datasets import TaskConfig
21
+
22
+ import mcp.types as types
23
+ from mcp.types import CallToolRequestParams as MCPToolCall
24
+ from mcp.types import CallToolResult as MCPToolResult
25
+
26
+ from hud.settings import settings
27
+
28
+ from .base import BaseMCPAgent, ModelResponse
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def base64_to_content_block(base64: str) -> BetaImageBlockParam:
34
+ """Convert base64 image to Claude content block."""
35
+ return {
36
+ "type": "image",
37
+ "source": {"type": "base64", "media_type": "image/png", "data": base64},
38
+ }
39
+
40
+
41
+ def text_to_content_block(text: str) -> BetaTextBlockParam:
42
+ """Convert text to Claude content block."""
43
+ return {"type": "text", "text": text}
44
+
45
+
46
+ def tool_use_content_block(
47
+ tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
48
+ ) -> BetaToolResultBlockParam:
49
+ """Create tool result content block."""
50
+ return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
51
+
52
+
53
+ class ClaudeMCPAgent(BaseMCPAgent):
54
+ """
55
+ Claude agent that uses MCP servers for tool execution.
56
+
57
+ This agent uses Claude's native tool calling capabilities but executes
58
+ tools through MCP servers instead of direct implementation.
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ model_client: AsyncAnthropic | None = None,
64
+ model: str = "claude-3-7-sonnet-20250219",
65
+ max_tokens: int = 4096,
66
+ display_width_px: int = 1400,
67
+ display_height_px: int = 850,
68
+ use_computer_beta: bool = True,
69
+ **kwargs: Any,
70
+ ) -> None:
71
+ """
72
+ Initialize Claude MCP agent.
73
+
74
+ Args:
75
+ model_client: AsyncAnthropic client (created if not provided)
76
+ model: Claude model to use
77
+ max_tokens: Maximum tokens for response
78
+ display_width_px: Display width for computer use tools
79
+ display_height_px: Display height for computer use tools
80
+ use_computer_beta: Whether to use computer-use beta features
81
+ **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
82
+ """
83
+ super().__init__(**kwargs)
84
+
85
+ # Initialize client if not provided
86
+ if model_client is None:
87
+ api_key = settings.anthropic_api_key
88
+ if not api_key:
89
+ raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
90
+ model_client = AsyncAnthropic(api_key=api_key)
91
+
92
+ self.anthropic_client = model_client
93
+ self.model = model
94
+ self.max_tokens = max_tokens
95
+ self.display_width_px = display_width_px
96
+ self.display_height_px = display_height_px
97
+ self.use_computer_beta = use_computer_beta
98
+
99
+ self.model_name = self.model
100
+
101
+ # Track mapping from Claude tool names to MCP tool names
102
+ self._claude_to_mcp_tool_map: dict[str, str] = {}
103
+
104
+ async def initialize(self, task: str | TaskConfig | None = None) -> None:
105
+ """Initialize the agent and build tool mappings."""
106
+ await super().initialize(task)
107
+ # Build tool mappings after tools are discovered
108
+ self._convert_tools_for_claude()
109
+
110
+ async def create_initial_messages(
111
+ self, prompt: str, screenshot: str | None = None
112
+ ) -> list[BetaMessageParam]:
113
+ """Create initial messages for Claude."""
114
+ user_content: list[BetaImageBlockParam | BetaTextBlockParam] = []
115
+
116
+ # Add prompt text
117
+ user_content.append(text_to_content_block(prompt))
118
+
119
+ # Add screenshot if available
120
+ if screenshot:
121
+ user_content.append(base64_to_content_block(screenshot))
122
+
123
+ # Return initial user message
124
+ return [
125
+ cast(
126
+ "BetaMessageParam",
127
+ {
128
+ "role": "user",
129
+ "content": user_content,
130
+ },
131
+ )
132
+ ]
133
+
134
+ async def get_model_response(self, messages: list[BetaMessageParam]) -> ModelResponse:
135
+ """Get response from Claude including any tool calls."""
136
+ # Get Claude tools
137
+ claude_tools = self._convert_tools_for_claude()
138
+
139
+ # Make API call with retry for prompt length
140
+ current_messages = messages.copy()
141
+
142
+ while True:
143
+ messages_cached = self._add_prompt_caching(current_messages)
144
+
145
+ # Build create kwargs
146
+ create_kwargs = {
147
+ "model": self.model,
148
+ "max_tokens": self.max_tokens,
149
+ "system": self.get_system_prompt(),
150
+ "messages": messages_cached,
151
+ "tools": claude_tools,
152
+ "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
153
+ }
154
+
155
+ # Add beta features if using computer tools
156
+ if self.use_computer_beta and any(
157
+ t.get("type") == "computer_20250124" for t in claude_tools
158
+ ):
159
+ create_kwargs["betas"] = ["computer-use-2025-01-24"]
160
+
161
+ try:
162
+ response = await self.anthropic_client.beta.messages.create(**create_kwargs)
163
+ break
164
+ except BadRequestError as e:
165
+ if e.message.startswith("prompt is too long"):
166
+ logger.warning("Prompt too long, truncating message history")
167
+ # Keep first message and last 20 messages
168
+ if len(current_messages) > 21:
169
+ current_messages = [current_messages[0]] + current_messages[-20:]
170
+ else:
171
+ raise
172
+ else:
173
+ raise
174
+
175
+ messages.append(
176
+ cast(
177
+ "BetaMessageParam",
178
+ {
179
+ "role": "assistant",
180
+ "content": response.content,
181
+ },
182
+ )
183
+ )
184
+
185
+ # Process response
186
+ result = ModelResponse(content="", tool_calls=[], done=True)
187
+
188
+ # Extract text content and reasoning
189
+ text_content = ""
190
+ thinking_content = ""
191
+
192
+ for block in response.content:
193
+ if block.type == "tool_use":
194
+ # Map Claude tool name back to MCP tool name
195
+ mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
196
+
197
+ # Create MCPToolCall object with Claude metadata as extra fields
198
+ # Pyright will complain but the tool class accepts extra fields
199
+ tool_call = MCPToolCall(
200
+ name=mcp_tool_name,
201
+ arguments=block.input,
202
+ tool_use_id=block.id, # type: ignore
203
+ claude_name=block.name, # type: ignore
204
+ )
205
+ result.tool_calls.append(tool_call)
206
+ result.done = False
207
+ elif block.type == "text":
208
+ text_content += block.text
209
+ elif hasattr(block, "type") and block.type == "thinking":
210
+ thinking_content += f"Thinking: {block.thinking}\n"
211
+
212
+ # Combine text and thinking for final content
213
+ if thinking_content:
214
+ result.content = thinking_content + text_content
215
+ else:
216
+ result.content = text_content
217
+
218
+ return result
219
+
220
+ async def format_tool_results(
221
+ self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
222
+ ) -> list[BetaMessageParam]:
223
+ """Format tool results into Claude messages."""
224
+ # Process each tool result
225
+ user_content = []
226
+
227
+ for tool_call, result in zip(tool_calls, tool_results, strict=True):
228
+ # Extract Claude-specific metadata from extra fields
229
+ tool_use_id = getattr(tool_call, "tool_use_id", None)
230
+ if not tool_use_id:
231
+ logger.warning("No tool_use_id found for %s", tool_call.name)
232
+ continue
233
+
234
+ # Convert MCP tool results to Claude format
235
+ claude_blocks = []
236
+
237
+ if result.isError:
238
+ # Extract error message from content
239
+ error_msg = "Tool execution failed"
240
+ for content in result.content:
241
+ if isinstance(content, types.TextContent):
242
+ error_msg = content.text
243
+ break
244
+ claude_blocks.append(text_to_content_block(f"Error: {error_msg}"))
245
+ else:
246
+ # Process success content
247
+ for content in result.content:
248
+ if isinstance(content, types.TextContent):
249
+ claude_blocks.append(text_to_content_block(content.text))
250
+ elif isinstance(content, types.ImageContent):
251
+ claude_blocks.append(base64_to_content_block(content.data))
252
+
253
+ # Add tool result
254
+ user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
255
+
256
+ # Return as a user message containing all tool results
257
+ return [
258
+ cast(
259
+ "BetaMessageParam",
260
+ {
261
+ "role": "user",
262
+ "content": user_content,
263
+ },
264
+ )
265
+ ]
266
+
267
+ async def create_user_message(self, text: str) -> BetaMessageParam:
268
+ """Create a user message in Claude's format."""
269
+ return cast("BetaMessageParam", {"role": "user", "content": text})
270
+
271
+ def _convert_tools_for_claude(self) -> list[dict]:
272
+ """Convert MCP tools to Claude tool format."""
273
+ claude_tools = []
274
+ self._claude_to_mcp_tool_map = {} # Reset mapping
275
+
276
+ for tool in self._available_tools:
277
+ # Special handling for computer use tools
278
+ if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
279
+ # Use Claude's native computer use format with configurable dimensions
280
+ claude_tool = {
281
+ "type": "computer_20250124",
282
+ "name": "computer",
283
+ "display_width_px": self.display_width_px,
284
+ "display_height_px": self.display_height_px,
285
+ }
286
+ # Map Claude's "computer" back to the actual MCP tool name
287
+ self._claude_to_mcp_tool_map["computer"] = tool.name
288
+ elif tool.name not in self.lifecycle_tools:
289
+ # Convert regular tools
290
+ claude_tool = {
291
+ "name": tool.name,
292
+ "description": tool.description or f"Execute {tool.name}",
293
+ "input_schema": tool.inputSchema
294
+ or {
295
+ "type": "object",
296
+ "properties": {},
297
+ },
298
+ }
299
+ # Direct mapping for non-computer tools
300
+ self._claude_to_mcp_tool_map[tool.name] = tool.name
301
+ else:
302
+ continue
303
+
304
+ claude_tools.append(claude_tool)
305
+
306
+ return claude_tools
307
+
308
+ def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
309
+ """Add prompt caching to messages."""
310
+ messages_cached = copy.deepcopy(messages)
311
+
312
+ # Mark last user message with cache control
313
+ if messages_cached and messages_cached[-1].get("role") == "user":
314
+ last_content = messages_cached[-1]["content"]
315
+ if isinstance(last_content, list):
316
+ for block in last_content:
317
+ if block.get("type") not in ["thinking", "redacted_thinking"]:
318
+ cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
319
+ block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
320
+
321
+ return messages_cached
hud/mcp/client.py ADDED
@@ -0,0 +1,312 @@
1
+ """MCP Client wrapper with automatic initialization and debugging capabilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from mcp_use.client import MCPClient as MCPUseClient
10
+ from pydantic import AnyUrl
11
+
12
+ if TYPE_CHECKING:
13
+ from typing import Self
14
+
15
+ from mcp import types
16
+ from mcp_use.session import MCPSession as MCPUseSession
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class MCPClient:
22
+ """
23
+ High-level MCP client wrapper that handles initialization, tool discovery,
24
+ and provides debugging capabilities.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ mcp_config: dict[str, dict[str, Any]],
30
+ verbose: bool = False,
31
+ ) -> None:
32
+ """
33
+ Initialize the MCP client.
34
+
35
+ Args:
36
+ mcp_config: MCP server configuration dict (required)
37
+ verbose: Enable verbose logging of server communications
38
+ auto_initialize: Whether to automatically initialize on construction
39
+ """
40
+ self.verbose = verbose
41
+
42
+ # Initialize mcp_use client with proper config
43
+ # Use from_dict to properly initialize with config
44
+ config = {"mcpServers": mcp_config}
45
+ self._mcp_client = MCPUseClient.from_dict(config)
46
+
47
+ self._sessions: dict[str, MCPUseSession] = {}
48
+ self._available_tools: list[types.Tool] = []
49
+ self._tool_map: dict[str, tuple[str, types.Tool]] = {}
50
+ self._telemetry_data: dict[str, Any] = {}
51
+
52
+ # Set up verbose logging if requested
53
+ if self.verbose:
54
+ self._setup_verbose_logging()
55
+
56
+ def _setup_verbose_logging(self) -> None:
57
+ """Configure verbose logging for debugging."""
58
+ # Set MCP-related loggers to DEBUG
59
+ logging.getLogger("mcp").setLevel(logging.DEBUG)
60
+ logging.getLogger("mcp_use").setLevel(logging.DEBUG)
61
+ logging.getLogger("mcp.client.stdio").setLevel(logging.DEBUG)
62
+
63
+ # Add handler for server communications
64
+ if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
65
+ handler = logging.StreamHandler()
66
+ handler.setFormatter(
67
+ logging.Formatter("[%(levelname)s] %(asctime)s - %(name)s - %(message)s")
68
+ )
69
+ logger.addHandler(handler)
70
+ logger.setLevel(logging.DEBUG)
71
+
72
+ async def initialize(self) -> None:
73
+ """Perform async initialization tasks."""
74
+ await self.create_sessions()
75
+ await self.discover_tools()
76
+ await self.fetch_telemetry()
77
+
78
+ async def create_sessions(self) -> dict[str, MCPUseSession]:
79
+ # Create all sessions at once
80
+ try:
81
+ self._sessions = await self._mcp_client.create_all_sessions()
82
+ except Exception as e:
83
+ # If session creation fails, try to get Docker logs
84
+ logger.error("Failed to create sessions: %s", e)
85
+ if self.verbose:
86
+ logger.info("Attempting to check Docker container status...")
87
+ # await self._check_docker_containers()
88
+ raise
89
+
90
+ # Log session details in verbose mode
91
+ if self.verbose and self._sessions:
92
+ for name, session in self._sessions.items():
93
+ logger.debug(" - %s: %s", name, type(session).__name__)
94
+
95
+ return self._sessions
96
+
97
+ async def discover_tools(self) -> list[types.Tool]:
98
+ """Discover all available tools from connected servers."""
99
+ logger.info("Discovering available tools...")
100
+
101
+ self._available_tools = []
102
+ self._tool_map = {}
103
+
104
+ for server_name, session in self._sessions.items():
105
+ try:
106
+ # Ensure session is initialized
107
+ if not hasattr(session, "connector") or not hasattr(
108
+ session.connector, "client_session"
109
+ ):
110
+ await session.initialize()
111
+
112
+ if session.connector.client_session is None:
113
+ logger.warning("Client session not initialized for %s", server_name)
114
+ continue
115
+
116
+ # List tools
117
+ tools_result = await session.connector.client_session.list_tools()
118
+
119
+ logger.info(
120
+ "Discovered %d tools from '%s': %s",
121
+ len(tools_result.tools),
122
+ server_name,
123
+ [tool.name for tool in tools_result.tools],
124
+ )
125
+
126
+ # Add to collections
127
+ for tool in tools_result.tools:
128
+ self._available_tools.append(tool)
129
+ self._tool_map[tool.name] = (server_name, tool)
130
+
131
+ # Log detailed tool info in verbose mode
132
+ if self.verbose:
133
+ for tool in tools_result.tools:
134
+ description = tool.description or ""
135
+ logger.debug(
136
+ " Tool '%s': %s",
137
+ tool.name,
138
+ description[:100] + "..." if len(description) > 100 else description,
139
+ )
140
+
141
+ except Exception as e:
142
+ logger.error("Error discovering tools from '%s': %s", server_name, e)
143
+ if self.verbose:
144
+ logger.exception("Full error details:")
145
+
146
+ logger.info("Total tools discovered: %d", len(self._available_tools))
147
+ return self._available_tools
148
+
149
+ async def fetch_telemetry(self) -> dict[str, Any]:
150
+ """Fetch telemetry resource from all servers that provide it."""
151
+ logger.info("Fetching telemetry resources...")
152
+
153
+ for server_name, session in self._sessions.items():
154
+ try:
155
+ if not hasattr(session, "connector") or not hasattr(
156
+ session.connector, "client_session"
157
+ ):
158
+ continue
159
+
160
+ if session.connector.client_session is None:
161
+ continue
162
+
163
+ # Try to read telemetry resource
164
+ try:
165
+ result = await session.connector.client_session.read_resource(
166
+ AnyUrl("telemetry://live")
167
+ )
168
+ if result and result.contents and len(result.contents) > 0:
169
+ telemetry_data = json.loads(result.contents[0].text) # type: ignore
170
+ self._telemetry_data[server_name] = telemetry_data
171
+
172
+ logger.info("📡 Telemetry data from server '%s':", server_name)
173
+ if "live_url" in telemetry_data:
174
+ logger.info(" 🖥️ Live URL: %s", telemetry_data["live_url"])
175
+ if "status" in telemetry_data:
176
+ logger.info(" 📊 Status: %s", telemetry_data["status"])
177
+ if "services" in telemetry_data:
178
+ logger.info(" 📋 Services:")
179
+ for service, status in telemetry_data["services"].items():
180
+ status_icon = "✅" if status == "running" else "❌"
181
+ logger.info(" %s %s: %s", status_icon, service, status)
182
+
183
+ if self.verbose:
184
+ logger.debug(
185
+ "Full telemetry data:\n%s", json.dumps(telemetry_data, indent=2)
186
+ )
187
+
188
+ except Exception as e:
189
+ # Resource might not exist, which is fine
190
+ if self.verbose:
191
+ logger.debug("No telemetry resource from '%s': %s", server_name, e)
192
+
193
+ except Exception as e:
194
+ logger.error("Error fetching telemetry from '%s': %s", server_name, e)
195
+
196
+ return self._telemetry_data
197
+
198
+ async def call_tool(
199
+ self, tool_name: str, arguments: dict[str, Any] | None = None
200
+ ) -> types.CallToolResult:
201
+ """
202
+ Call a tool by name with the given arguments.
203
+
204
+ Args:
205
+ tool_name: Name of the tool to call
206
+ arguments: Tool arguments
207
+
208
+ Returns:
209
+ Tool execution result
210
+
211
+ Raises:
212
+ ValueError: If tool not found
213
+ """
214
+ if tool_name not in self._tool_map:
215
+ raise ValueError(f"Tool '{tool_name}' not found")
216
+
217
+ server_name, tool = self._tool_map[tool_name]
218
+ session = self._sessions[server_name]
219
+
220
+ if self.verbose:
221
+ logger.debug(
222
+ "Calling tool '%s' on server '%s' with arguments: %s",
223
+ tool_name,
224
+ server_name,
225
+ json.dumps(arguments, indent=2) if arguments else "None",
226
+ )
227
+
228
+ if session.connector.client_session is None:
229
+ raise ValueError(f"Client session not initialized for {server_name}")
230
+
231
+ result = await session.connector.client_session.call_tool(
232
+ name=tool_name, arguments=arguments or {}
233
+ )
234
+
235
+ if self.verbose:
236
+ logger.debug("Tool '%s' result: %s", tool_name, result)
237
+
238
+ return result
239
+
240
+ async def read_resource(self, uri: AnyUrl) -> types.ReadResourceResult | None:
241
+ """
242
+ Read a resource by URI from any server that provides it.
243
+
244
+ Args:
245
+ uri: Resource URI (e.g., "telemetry://live")
246
+
247
+ Returns:
248
+ Resource contents or None if not found
249
+ """
250
+ for server_name, session in self._sessions.items():
251
+ try:
252
+ if not hasattr(session, "connector") or not hasattr(
253
+ session.connector, "client_session"
254
+ ):
255
+ continue
256
+
257
+ if session.connector.client_session is None:
258
+ continue
259
+
260
+ result = await session.connector.client_session.read_resource(uri)
261
+
262
+ if self.verbose:
263
+ logger.debug(
264
+ "Successfully read resource '%s' from server '%s'", uri, server_name
265
+ )
266
+
267
+ return result
268
+
269
+ except Exception as e:
270
+ if self.verbose:
271
+ logger.debug(
272
+ "Could not read resource '%s' from server '%s': %s", uri, server_name, e
273
+ )
274
+ continue
275
+
276
+ return None
277
+
278
+ def get_available_tools(self) -> list[types.Tool]:
279
+ """Get list of all available tools."""
280
+ return self._available_tools
281
+
282
+ def get_tool_map(self) -> dict[str, tuple[str, types.Tool]]:
283
+ """Get mapping of tool names to (server_name, tool) tuples."""
284
+ return self._tool_map
285
+
286
+ def get_sessions(self) -> dict[str, MCPUseSession]:
287
+ """Get active MCP sessions."""
288
+ return self._sessions
289
+
290
+ def get_telemetry_data(self) -> dict[str, Any]:
291
+ """Get collected telemetry data from all servers."""
292
+ return self._telemetry_data
293
+
294
+ def get_all_active_sessions(self) -> dict[str, MCPUseSession]:
295
+ """Get all active sessions (compatibility method)."""
296
+ return self._sessions
297
+
298
+ async def close(self) -> None:
299
+ """Close all active sessions."""
300
+ await self._mcp_client.close_all_sessions()
301
+
302
+ self._sessions = {}
303
+ self._available_tools = []
304
+ self._tool_map = {}
305
+
306
+ async def __aenter__(self) -> Self:
307
+ """Async context manager entry."""
308
+ return self
309
+
310
+ async def __aexit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
311
+ """Async context manager exit."""
312
+ await self.close()