hud-python 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +7 -4
- hud/adapters/common/adapter.py +14 -3
- hud/adapters/common/tests/test_adapter.py +16 -4
- hud/datasets.py +188 -0
- hud/env/docker_client.py +14 -2
- hud/env/local_docker_client.py +28 -6
- hud/gym.py +0 -9
- hud/{mcp_agent → mcp}/__init__.py +2 -0
- hud/mcp/base.py +631 -0
- hud/{mcp_agent → mcp}/claude.py +52 -47
- hud/mcp/client.py +312 -0
- hud/{mcp_agent → mcp}/langchain.py +52 -33
- hud/{mcp_agent → mcp}/openai.py +56 -40
- hud/{mcp_agent → mcp}/tests/test_base.py +129 -54
- hud/mcp/tests/test_claude.py +294 -0
- hud/mcp/tests/test_client.py +324 -0
- hud/mcp/tests/test_openai.py +238 -0
- hud/settings.py +6 -0
- hud/task.py +1 -88
- hud/taskset.py +2 -23
- hud/telemetry/__init__.py +5 -0
- hud/telemetry/_trace.py +180 -17
- hud/telemetry/context.py +79 -0
- hud/telemetry/exporter.py +165 -6
- hud/telemetry/job.py +141 -0
- hud/telemetry/tests/test_trace.py +36 -25
- hud/tools/__init__.py +14 -1
- hud/tools/executors/__init__.py +19 -2
- hud/tools/executors/pyautogui.py +84 -50
- hud/tools/executors/tests/test_pyautogui_executor.py +4 -1
- hud/tools/playwright_tool.py +73 -67
- hud/tools/tests/test_edit.py +8 -1
- hud/tools/tests/test_tools.py +3 -0
- hud/trajectory.py +5 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/METADATA +20 -14
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/RECORD +41 -46
- hud/evaluators/__init__.py +0 -9
- hud/evaluators/base.py +0 -32
- hud/evaluators/inspect.py +0 -24
- hud/evaluators/judge.py +0 -189
- hud/evaluators/match.py +0 -156
- hud/evaluators/remote.py +0 -65
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +0 -12
- hud/evaluators/tests/test_judge.py +0 -231
- hud/evaluators/tests/test_match.py +0 -115
- hud/evaluators/tests/test_remote.py +0 -98
- hud/mcp_agent/base.py +0 -723
- /hud/{mcp_agent → mcp}/tests/__init__.py +0 -0
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
- {hud_python-0.3.0.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Tests for Claude MCP Agent implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, cast
|
|
6
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
from anthropic import BadRequestError
|
|
10
|
+
from mcp import types
|
|
11
|
+
from mcp.types import CallToolRequestParams as MCPToolCall
|
|
12
|
+
|
|
13
|
+
from hud.mcp.claude import (
|
|
14
|
+
ClaudeMCPAgent,
|
|
15
|
+
base64_to_content_block,
|
|
16
|
+
text_to_content_block,
|
|
17
|
+
tool_use_content_block,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from anthropic.types.beta import BetaImageBlockParam, BetaMessageParam, BetaTextBlockParam
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestClaudeHelperFunctions:
|
|
25
|
+
"""Test helper functions for Claude message formatting."""
|
|
26
|
+
|
|
27
|
+
def test_base64_to_content_block(self):
|
|
28
|
+
"""Test base64 image conversion."""
|
|
29
|
+
base64_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
|
|
30
|
+
result = base64_to_content_block(base64_data)
|
|
31
|
+
|
|
32
|
+
assert result["type"] == "image"
|
|
33
|
+
assert result["source"]["type"] == "base64"
|
|
34
|
+
assert result["source"]["media_type"] == "image/png"
|
|
35
|
+
assert result["source"]["data"] == base64_data
|
|
36
|
+
|
|
37
|
+
def test_text_to_content_block(self):
|
|
38
|
+
"""Test text conversion."""
|
|
39
|
+
text = "Hello, world!"
|
|
40
|
+
result = text_to_content_block(text)
|
|
41
|
+
|
|
42
|
+
assert result["type"] == "text"
|
|
43
|
+
assert result["text"] == text
|
|
44
|
+
|
|
45
|
+
def test_tool_use_content_block(self):
|
|
46
|
+
"""Test tool result content block creation."""
|
|
47
|
+
tool_use_id = "tool_123"
|
|
48
|
+
content: list[BetaTextBlockParam | BetaImageBlockParam] = [
|
|
49
|
+
text_to_content_block("Result text")
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
result = tool_use_content_block(tool_use_id, content)
|
|
53
|
+
|
|
54
|
+
assert result["type"] == "tool_result"
|
|
55
|
+
assert result["tool_use_id"] == tool_use_id
|
|
56
|
+
assert result["content"] == content # type: ignore
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class TestClaudeMCPAgent:
|
|
60
|
+
"""Test ClaudeMCPAgent class."""
|
|
61
|
+
|
|
62
|
+
@pytest.fixture
|
|
63
|
+
def mock_mcp_client(self):
|
|
64
|
+
"""Create a mock MCP client."""
|
|
65
|
+
mcp_client = MagicMock()
|
|
66
|
+
mcp_client.get_all_active_sessions = MagicMock(return_value={})
|
|
67
|
+
mcp_client.get_tool_map = MagicMock(return_value={})
|
|
68
|
+
return mcp_client
|
|
69
|
+
|
|
70
|
+
@pytest.fixture
|
|
71
|
+
def mock_anthropic(self):
|
|
72
|
+
"""Create a mock Anthropic client."""
|
|
73
|
+
with patch("hud.mcp.claude.AsyncAnthropic") as mock:
|
|
74
|
+
client = AsyncMock()
|
|
75
|
+
# Add beta attribute with messages
|
|
76
|
+
client.beta = AsyncMock()
|
|
77
|
+
client.beta.messages = AsyncMock()
|
|
78
|
+
mock.return_value = client
|
|
79
|
+
yield client
|
|
80
|
+
|
|
81
|
+
@pytest.mark.asyncio
|
|
82
|
+
async def test_init(self, mock_mcp_client, mock_anthropic):
|
|
83
|
+
"""Test agent initialization."""
|
|
84
|
+
# Test with provided model_client
|
|
85
|
+
mock_model_client = MagicMock()
|
|
86
|
+
agent = ClaudeMCPAgent(
|
|
87
|
+
mcp_client=mock_mcp_client,
|
|
88
|
+
model_client=mock_model_client,
|
|
89
|
+
model="claude-3-opus-20240229",
|
|
90
|
+
max_tokens=1000,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
assert agent.model_name == "claude-3-opus-20240229"
|
|
94
|
+
assert agent.max_tokens == 1000
|
|
95
|
+
assert agent.anthropic_client == mock_model_client
|
|
96
|
+
|
|
97
|
+
@pytest.mark.asyncio
|
|
98
|
+
async def test_init_without_model_client(self, mock_mcp_client):
|
|
99
|
+
"""Test agent initialization without model client."""
|
|
100
|
+
with patch("hud.mcp.claude.settings.anthropic_api_key", "test_key"):
|
|
101
|
+
agent = ClaudeMCPAgent(mcp_client=mock_mcp_client, model="claude-3-opus-20240229")
|
|
102
|
+
|
|
103
|
+
assert agent.model_name == "claude-3-opus-20240229"
|
|
104
|
+
assert agent.anthropic_client is not None
|
|
105
|
+
|
|
106
|
+
@pytest.mark.asyncio
|
|
107
|
+
async def test_create_initial_messages(self, mock_mcp_client):
|
|
108
|
+
"""Test creating initial messages."""
|
|
109
|
+
mock_model_client = MagicMock()
|
|
110
|
+
agent = ClaudeMCPAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
|
|
111
|
+
|
|
112
|
+
# Test with text only
|
|
113
|
+
messages = await agent.create_initial_messages("Hello, Claude!")
|
|
114
|
+
assert len(messages) == 1
|
|
115
|
+
assert messages[0]["role"] == "user"
|
|
116
|
+
content = list(messages[0]["content"])
|
|
117
|
+
assert content[0]["type"] == "text" # type: ignore
|
|
118
|
+
assert content[0]["text"] == "Hello, Claude!" # type: ignore
|
|
119
|
+
|
|
120
|
+
# Test with screenshot
|
|
121
|
+
messages = await agent.create_initial_messages("Look at this", screenshot="base64data")
|
|
122
|
+
assert len(messages) == 1
|
|
123
|
+
assert messages[0]["role"] == "user"
|
|
124
|
+
content = list(messages[0]["content"])
|
|
125
|
+
assert len(content) == 2
|
|
126
|
+
# Claude puts text first, then image
|
|
127
|
+
assert content[0]["type"] == "text" # type: ignore
|
|
128
|
+
assert content[0]["text"] == "Look at this" # type: ignore
|
|
129
|
+
assert content[1]["type"] == "image" # type: ignore
|
|
130
|
+
|
|
131
|
+
@pytest.mark.asyncio
|
|
132
|
+
async def test_format_tool_results_method(self, mock_mcp_client):
|
|
133
|
+
"""Test the agent's format_tool_results method."""
|
|
134
|
+
mock_model_client = MagicMock()
|
|
135
|
+
agent = ClaudeMCPAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
|
|
136
|
+
|
|
137
|
+
tool_calls = [
|
|
138
|
+
MCPToolCall(name="test_tool", arguments={}, tool_use_id="id1"), # type: ignore
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
tool_results = [
|
|
142
|
+
types.CallToolResult(
|
|
143
|
+
content=[types.TextContent(type="text", text="Success")], isError=False
|
|
144
|
+
),
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
148
|
+
|
|
149
|
+
# format_tool_results returns a single user message with tool result content
|
|
150
|
+
assert len(messages) == 1
|
|
151
|
+
assert messages[0]["role"] == "user"
|
|
152
|
+
# The content is wrapped in a tool result block
|
|
153
|
+
content = list(messages[0]["content"])
|
|
154
|
+
assert len(content) == 1
|
|
155
|
+
assert content[0]["type"] == "tool_result" # type: ignore
|
|
156
|
+
assert content[0]["tool_use_id"] == "id1" # type: ignore
|
|
157
|
+
# The actual content is nested inside
|
|
158
|
+
inner_content = list(content[0]["content"]) # type: ignore
|
|
159
|
+
assert inner_content[0]["type"] == "text" # type: ignore
|
|
160
|
+
assert inner_content[0]["text"] == "Success" # type: ignore
|
|
161
|
+
|
|
162
|
+
@pytest.mark.asyncio
|
|
163
|
+
async def test_get_model_response(self, mock_mcp_client, mock_anthropic):
|
|
164
|
+
"""Test getting model response from Claude API."""
|
|
165
|
+
agent = ClaudeMCPAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
166
|
+
|
|
167
|
+
# Mock the API response
|
|
168
|
+
mock_response = MagicMock()
|
|
169
|
+
|
|
170
|
+
# Create text block
|
|
171
|
+
text_block = MagicMock()
|
|
172
|
+
text_block.type = "text"
|
|
173
|
+
text_block.text = "Hello!"
|
|
174
|
+
|
|
175
|
+
# Create tool use block
|
|
176
|
+
tool_block = MagicMock()
|
|
177
|
+
tool_block.type = "tool_use"
|
|
178
|
+
tool_block.id = "tool_123"
|
|
179
|
+
tool_block.name = "test_tool"
|
|
180
|
+
tool_block.input = {"param": "value"}
|
|
181
|
+
|
|
182
|
+
mock_response.content = [text_block, tool_block]
|
|
183
|
+
mock_response.usage = MagicMock(input_tokens=10, output_tokens=20)
|
|
184
|
+
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
185
|
+
|
|
186
|
+
messages = [
|
|
187
|
+
cast("BetaMessageParam", {"role": "user", "content": [{"type": "text", "text": "Hi"}]})
|
|
188
|
+
]
|
|
189
|
+
response = await agent.get_model_response(messages)
|
|
190
|
+
|
|
191
|
+
assert response.content == "Hello!"
|
|
192
|
+
assert len(response.tool_calls) == 1
|
|
193
|
+
assert response.tool_calls[0].name == "test_tool"
|
|
194
|
+
assert response.tool_calls[0].arguments == {"param": "value"}
|
|
195
|
+
# The test was checking for Claude-specific attributes that aren't part of ModelResponse
|
|
196
|
+
# These would need to be accessed from the original Claude response if needed
|
|
197
|
+
|
|
198
|
+
# Verify API was called correctly
|
|
199
|
+
mock_anthropic.beta.messages.create.assert_called_once()
|
|
200
|
+
|
|
201
|
+
@pytest.mark.asyncio
|
|
202
|
+
async def test_get_model_response_text_only(self, mock_mcp_client, mock_anthropic):
|
|
203
|
+
"""Test getting text-only response."""
|
|
204
|
+
agent = ClaudeMCPAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
205
|
+
|
|
206
|
+
mock_response = MagicMock()
|
|
207
|
+
# Create text block
|
|
208
|
+
text_block = MagicMock()
|
|
209
|
+
text_block.type = "text"
|
|
210
|
+
text_block.text = "Just text"
|
|
211
|
+
mock_response.content = [text_block]
|
|
212
|
+
mock_response.usage = MagicMock(input_tokens=5, output_tokens=10)
|
|
213
|
+
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
214
|
+
|
|
215
|
+
messages = [
|
|
216
|
+
cast("BetaMessageParam", {"role": "user", "content": [{"type": "text", "text": "Hi"}]})
|
|
217
|
+
]
|
|
218
|
+
response = await agent.get_model_response(messages)
|
|
219
|
+
|
|
220
|
+
assert response.content == "Just text"
|
|
221
|
+
assert response.tool_calls == []
|
|
222
|
+
|
|
223
|
+
@pytest.mark.asyncio
|
|
224
|
+
async def test_get_model_response_error(self, mock_mcp_client, mock_anthropic):
|
|
225
|
+
"""Test handling API errors."""
|
|
226
|
+
agent = ClaudeMCPAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
227
|
+
|
|
228
|
+
# Mock API error
|
|
229
|
+
mock_anthropic.beta.messages.create = AsyncMock(
|
|
230
|
+
side_effect=BadRequestError(
|
|
231
|
+
message="Invalid request",
|
|
232
|
+
response=MagicMock(status_code=400),
|
|
233
|
+
body={"error": {"message": "Invalid request"}},
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
messages = [{"role": "user", "content": [{"type": "text", "text": "Hi"}]}]
|
|
238
|
+
|
|
239
|
+
with pytest.raises(BadRequestError):
|
|
240
|
+
await agent.get_model_response(messages) # type: ignore
|
|
241
|
+
|
|
242
|
+
@pytest.mark.asyncio
|
|
243
|
+
async def test_run_with_tools(self, mock_mcp_client, mock_anthropic):
|
|
244
|
+
"""Test running agent with tool usage."""
|
|
245
|
+
agent = ClaudeMCPAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
246
|
+
|
|
247
|
+
# Mock tool availability
|
|
248
|
+
agent._available_tools = [
|
|
249
|
+
types.Tool(name="calculator", description="Calculator", inputSchema={"type": "object"})
|
|
250
|
+
]
|
|
251
|
+
agent._tool_map = {
|
|
252
|
+
"calculator": (
|
|
253
|
+
"server1",
|
|
254
|
+
types.Tool(
|
|
255
|
+
name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
256
|
+
),
|
|
257
|
+
)
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# Mock initial response with tool use
|
|
261
|
+
initial_response = MagicMock()
|
|
262
|
+
# Create tool use block
|
|
263
|
+
tool_block = MagicMock()
|
|
264
|
+
tool_block.type = "tool_use"
|
|
265
|
+
tool_block.id = "calc_123"
|
|
266
|
+
tool_block.name = "calculator"
|
|
267
|
+
tool_block.input = {"operation": "add", "a": 2, "b": 3}
|
|
268
|
+
initial_response.content = [tool_block]
|
|
269
|
+
initial_response.usage = MagicMock(input_tokens=10, output_tokens=15)
|
|
270
|
+
|
|
271
|
+
# Mock follow-up response
|
|
272
|
+
final_response = MagicMock()
|
|
273
|
+
text_block = MagicMock()
|
|
274
|
+
text_block.type = "text"
|
|
275
|
+
text_block.text = "2 + 3 = 5"
|
|
276
|
+
final_response.content = [text_block]
|
|
277
|
+
final_response.usage = MagicMock(input_tokens=20, output_tokens=10)
|
|
278
|
+
|
|
279
|
+
mock_anthropic.beta.messages.create = AsyncMock(
|
|
280
|
+
side_effect=[initial_response, final_response]
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Mock tool execution
|
|
284
|
+
agent.mcp_client.call_tool = AsyncMock(
|
|
285
|
+
return_value=types.CallToolResult(
|
|
286
|
+
content=[types.TextContent(type="text", text="5")], isError=False
|
|
287
|
+
)
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Use a string prompt instead of a task
|
|
291
|
+
result = await agent.run("What is 2 + 3?")
|
|
292
|
+
|
|
293
|
+
assert result.content == "2 + 3 = 5"
|
|
294
|
+
assert result.done is True
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""Tests for MCP Client implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
from mcp import types
|
|
9
|
+
from pydantic import AnyUrl
|
|
10
|
+
|
|
11
|
+
from hud.mcp.client import MCPClient
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestMCPClient:
|
|
15
|
+
"""Test MCPClient class."""
|
|
16
|
+
|
|
17
|
+
@pytest.fixture
|
|
18
|
+
def mock_mcp_use_client(self):
|
|
19
|
+
"""Create a mock MCPUseClient (the internal mcp_use client)."""
|
|
20
|
+
# Create a mock instance that will be returned by from_dict
|
|
21
|
+
mock_instance = MagicMock()
|
|
22
|
+
mock_instance.create_session = AsyncMock()
|
|
23
|
+
mock_instance.create_all_sessions = AsyncMock(return_value={})
|
|
24
|
+
mock_instance.close_all_sessions = AsyncMock()
|
|
25
|
+
mock_instance.get_all_active_sessions = MagicMock(return_value={})
|
|
26
|
+
|
|
27
|
+
# Patch MCPUseClient that's imported in hud.mcp.client
|
|
28
|
+
with patch("hud.mcp.client.MCPUseClient") as mock_class:
|
|
29
|
+
mock_class.from_dict = MagicMock(return_value=mock_instance)
|
|
30
|
+
yield mock_instance
|
|
31
|
+
|
|
32
|
+
@pytest.mark.asyncio
|
|
33
|
+
async def test_init_with_config(self):
|
|
34
|
+
"""Test client initialization with config dictionary."""
|
|
35
|
+
mcp_config = {
|
|
36
|
+
"test_server": {
|
|
37
|
+
"command": "python",
|
|
38
|
+
"args": ["-m", "test_server"],
|
|
39
|
+
"env": {"TEST": "true"},
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
with patch("hud.mcp.client.MCPUseClient") as mock_use_client:
|
|
44
|
+
client = MCPClient(mcp_config=mcp_config, verbose=True)
|
|
45
|
+
|
|
46
|
+
assert client.verbose is True
|
|
47
|
+
# Verify MCPUseClient.from_dict was called with proper config
|
|
48
|
+
mock_use_client.from_dict.assert_called_once_with({"mcpServers": mcp_config})
|
|
49
|
+
|
|
50
|
+
@pytest.mark.asyncio
|
|
51
|
+
async def test_connect_single_server(self, mock_mcp_use_client):
|
|
52
|
+
"""Test connecting to a single server."""
|
|
53
|
+
config = {"test_server": {"command": "python", "args": ["-m", "test_server"]}}
|
|
54
|
+
|
|
55
|
+
# Create the MCPClient - the fixture already patches MCPUseClient
|
|
56
|
+
client = MCPClient(mcp_config=config, verbose=True)
|
|
57
|
+
|
|
58
|
+
# Verify internal client was created properly
|
|
59
|
+
assert client._mcp_client == mock_mcp_use_client
|
|
60
|
+
|
|
61
|
+
# Mock session
|
|
62
|
+
mock_session = MagicMock()
|
|
63
|
+
mock_session.connector = MagicMock()
|
|
64
|
+
mock_session.connector.client_session = MagicMock()
|
|
65
|
+
|
|
66
|
+
# Mock list_tools response
|
|
67
|
+
async def mock_list_tools():
|
|
68
|
+
return types.ListToolsResult(
|
|
69
|
+
tools=[
|
|
70
|
+
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
71
|
+
types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
|
|
72
|
+
]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
76
|
+
|
|
77
|
+
# Mock create_all_sessions to return a dict with our session
|
|
78
|
+
mock_mcp_use_client.create_all_sessions = AsyncMock(
|
|
79
|
+
return_value={"test_server": mock_session}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Initialize the client (creates sessions and discovers tools)
|
|
83
|
+
await client.initialize()
|
|
84
|
+
|
|
85
|
+
# Verify session was created
|
|
86
|
+
mock_mcp_use_client.create_all_sessions.assert_called_once()
|
|
87
|
+
|
|
88
|
+
# Check tools were discovered
|
|
89
|
+
assert len(client._available_tools) == 2
|
|
90
|
+
assert len(client._tool_map) == 2
|
|
91
|
+
assert "tool1" in client._tool_map
|
|
92
|
+
assert "tool2" in client._tool_map
|
|
93
|
+
|
|
94
|
+
@pytest.mark.asyncio
|
|
95
|
+
async def test_connect_multiple_servers(self, mock_mcp_use_client):
|
|
96
|
+
"""Test connecting to multiple servers."""
|
|
97
|
+
config = {
|
|
98
|
+
"server1": {"command": "python", "args": ["-m", "server1"]},
|
|
99
|
+
"server2": {"command": "node", "args": ["server2.js"]},
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
client = MCPClient(mcp_config=config)
|
|
103
|
+
|
|
104
|
+
# Mock sessions
|
|
105
|
+
mock_session1 = MagicMock()
|
|
106
|
+
mock_session1.connector = MagicMock()
|
|
107
|
+
mock_session1.connector.client_session = MagicMock()
|
|
108
|
+
|
|
109
|
+
mock_session2 = MagicMock()
|
|
110
|
+
mock_session2.connector = MagicMock()
|
|
111
|
+
mock_session2.connector.client_session = MagicMock()
|
|
112
|
+
|
|
113
|
+
# Mock tools for each server
|
|
114
|
+
async def mock_list_tools1():
|
|
115
|
+
return types.ListToolsResult(
|
|
116
|
+
tools=[
|
|
117
|
+
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"})
|
|
118
|
+
]
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
async def mock_list_tools2():
|
|
122
|
+
return types.ListToolsResult(
|
|
123
|
+
tools=[
|
|
124
|
+
types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"})
|
|
125
|
+
]
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
mock_session1.connector.client_session.list_tools = mock_list_tools1
|
|
129
|
+
mock_session2.connector.client_session.list_tools = mock_list_tools2
|
|
130
|
+
|
|
131
|
+
# Mock create_all_sessions to return both sessions
|
|
132
|
+
mock_mcp_use_client.create_all_sessions = AsyncMock(
|
|
133
|
+
return_value={"server1": mock_session1, "server2": mock_session2}
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
await client.initialize()
|
|
137
|
+
|
|
138
|
+
# Verify sessions were created
|
|
139
|
+
mock_mcp_use_client.create_all_sessions.assert_called_once()
|
|
140
|
+
|
|
141
|
+
# Check tools from both servers
|
|
142
|
+
assert len(client._tool_map) == 2
|
|
143
|
+
assert "tool1" in client._tool_map
|
|
144
|
+
assert "tool2" in client._tool_map
|
|
145
|
+
|
|
146
|
+
@pytest.mark.asyncio
|
|
147
|
+
async def test_call_tool(self, mock_mcp_use_client):
|
|
148
|
+
"""Test calling a tool."""
|
|
149
|
+
config = {"test": {"command": "test"}}
|
|
150
|
+
client = MCPClient(mcp_config=config)
|
|
151
|
+
|
|
152
|
+
# Setup mock session
|
|
153
|
+
mock_session = MagicMock()
|
|
154
|
+
mock_session.connector = MagicMock()
|
|
155
|
+
mock_session.connector.client_session = MagicMock()
|
|
156
|
+
|
|
157
|
+
# Mock tool
|
|
158
|
+
tool = types.Tool(
|
|
159
|
+
name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
async def mock_list_tools():
|
|
163
|
+
return types.ListToolsResult(tools=[tool])
|
|
164
|
+
|
|
165
|
+
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
166
|
+
|
|
167
|
+
# Mock tool execution
|
|
168
|
+
mock_result = types.CallToolResult(
|
|
169
|
+
content=[types.TextContent(type="text", text="Result: 42")], isError=False
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
mock_session.connector.client_session.call_tool = AsyncMock(return_value=mock_result)
|
|
173
|
+
|
|
174
|
+
mock_mcp_use_client.create_all_sessions = AsyncMock(return_value={"test": mock_session})
|
|
175
|
+
|
|
176
|
+
await client.initialize()
|
|
177
|
+
|
|
178
|
+
# Call the tool
|
|
179
|
+
result = await client.call_tool("calculator", {"operation": "add", "a": 20, "b": 22})
|
|
180
|
+
|
|
181
|
+
assert result == mock_result
|
|
182
|
+
mock_session.connector.client_session.call_tool.assert_called_once_with(
|
|
183
|
+
name="calculator", arguments={"operation": "add", "a": 20, "b": 22}
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
@pytest.mark.asyncio
|
|
187
|
+
async def test_call_tool_not_found(self, mock_mcp_use_client):
|
|
188
|
+
"""Test calling a non-existent tool."""
|
|
189
|
+
config = {"test": {"command": "test"}}
|
|
190
|
+
client = MCPClient(mcp_config=config)
|
|
191
|
+
|
|
192
|
+
mock_session = MagicMock()
|
|
193
|
+
mock_session.connector = MagicMock()
|
|
194
|
+
mock_session.connector.client_session = MagicMock()
|
|
195
|
+
|
|
196
|
+
async def mock_list_tools():
|
|
197
|
+
return types.ListToolsResult(tools=[])
|
|
198
|
+
|
|
199
|
+
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
200
|
+
mock_mcp_use_client.create_all_sessions = AsyncMock(return_value={"test": mock_session})
|
|
201
|
+
|
|
202
|
+
await client.initialize()
|
|
203
|
+
|
|
204
|
+
with pytest.raises(ValueError, match="Tool 'nonexistent' not found"):
|
|
205
|
+
await client.call_tool("nonexistent", {})
|
|
206
|
+
|
|
207
|
+
@pytest.mark.asyncio
|
|
208
|
+
async def test_get_telemetry_data(self, mock_mcp_use_client):
|
|
209
|
+
"""Test getting telemetry data."""
|
|
210
|
+
config = {"test": {"command": "test"}}
|
|
211
|
+
client = MCPClient(mcp_config=config)
|
|
212
|
+
|
|
213
|
+
mock_session = MagicMock()
|
|
214
|
+
mock_session.connector = MagicMock()
|
|
215
|
+
mock_session.connector.client_session = MagicMock()
|
|
216
|
+
|
|
217
|
+
# Mock tools
|
|
218
|
+
async def mock_list_tools():
|
|
219
|
+
return types.ListToolsResult(tools=[])
|
|
220
|
+
|
|
221
|
+
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
222
|
+
|
|
223
|
+
# Mock telemetry resource
|
|
224
|
+
mock_telemetry = types.ReadResourceResult(
|
|
225
|
+
contents=[
|
|
226
|
+
types.TextResourceContents(
|
|
227
|
+
uri=AnyUrl("telemetry://live"),
|
|
228
|
+
mimeType="application/json",
|
|
229
|
+
text='{"events": [{"type": "test", "data": "value"}]}',
|
|
230
|
+
)
|
|
231
|
+
]
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
mock_session.connector.client_session.read_resource = AsyncMock(return_value=mock_telemetry)
|
|
235
|
+
|
|
236
|
+
mock_mcp_use_client.create_all_sessions = AsyncMock(return_value={"test": mock_session})
|
|
237
|
+
|
|
238
|
+
await client.initialize()
|
|
239
|
+
|
|
240
|
+
telemetry_data = client.get_telemetry_data()
|
|
241
|
+
|
|
242
|
+
assert "test" in telemetry_data
|
|
243
|
+
assert telemetry_data["test"]["events"][0]["type"] == "test"
|
|
244
|
+
|
|
245
|
+
@pytest.mark.asyncio
|
|
246
|
+
async def test_close(self, mock_mcp_use_client):
|
|
247
|
+
"""Test closing client connections."""
|
|
248
|
+
config = {"test": {"command": "test"}}
|
|
249
|
+
client = MCPClient(mcp_config=config)
|
|
250
|
+
|
|
251
|
+
mock_session = MagicMock()
|
|
252
|
+
mock_session.connector = MagicMock()
|
|
253
|
+
mock_session.connector.client_session = MagicMock()
|
|
254
|
+
|
|
255
|
+
async def mock_list_tools():
|
|
256
|
+
return types.ListToolsResult(tools=[])
|
|
257
|
+
|
|
258
|
+
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
259
|
+
mock_mcp_use_client.create_session = AsyncMock(return_value=mock_session)
|
|
260
|
+
mock_mcp_use_client.close_all_sessions = AsyncMock()
|
|
261
|
+
|
|
262
|
+
await client.initialize()
|
|
263
|
+
await client.close()
|
|
264
|
+
|
|
265
|
+
mock_mcp_use_client.close_all_sessions.assert_called_once()
|
|
266
|
+
|
|
267
|
+
@pytest.mark.asyncio
|
|
268
|
+
async def test_context_manager(self, mock_mcp_use_client):
|
|
269
|
+
"""Test using client as context manager."""
|
|
270
|
+
mock_session = MagicMock()
|
|
271
|
+
mock_session.connector = MagicMock()
|
|
272
|
+
mock_session.connector.client_session = MagicMock()
|
|
273
|
+
|
|
274
|
+
async def mock_list_tools():
|
|
275
|
+
return types.ListToolsResult(tools=[])
|
|
276
|
+
|
|
277
|
+
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
278
|
+
mock_mcp_use_client.create_all_sessions = AsyncMock(return_value={"test": mock_session})
|
|
279
|
+
mock_mcp_use_client.close_all_sessions = AsyncMock()
|
|
280
|
+
|
|
281
|
+
config = {"test": {"command": "test"}}
|
|
282
|
+
# The fixture already patches MCPUseClient
|
|
283
|
+
async with MCPClient(mcp_config=config) as client:
|
|
284
|
+
assert client._mcp_client is not None
|
|
285
|
+
# Verify that the client uses our mock
|
|
286
|
+
assert client._mcp_client == mock_mcp_use_client
|
|
287
|
+
|
|
288
|
+
# Verify cleanup was called
|
|
289
|
+
mock_mcp_use_client.close_all_sessions.assert_called_once()
|
|
290
|
+
|
|
291
|
+
def test_get_available_tools(self, mock_mcp_use_client):
|
|
292
|
+
"""Test getting available tools."""
|
|
293
|
+
config = {"test": {"command": "test"}}
|
|
294
|
+
client = MCPClient(mcp_config=config)
|
|
295
|
+
|
|
296
|
+
# Manually set tools
|
|
297
|
+
client._available_tools = [
|
|
298
|
+
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
299
|
+
types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
|
|
300
|
+
]
|
|
301
|
+
|
|
302
|
+
tools = client.get_available_tools()
|
|
303
|
+
assert len(tools) == 2
|
|
304
|
+
assert tools[0].name == "tool1"
|
|
305
|
+
assert tools[1].name == "tool2"
|
|
306
|
+
|
|
307
|
+
def test_get_tool_map(self, mock_mcp_use_client):
|
|
308
|
+
"""Test getting tool map."""
|
|
309
|
+
config = {"test": {"command": "test"}}
|
|
310
|
+
client = MCPClient(mcp_config=config)
|
|
311
|
+
|
|
312
|
+
# Manually set tool map
|
|
313
|
+
tool1 = types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"})
|
|
314
|
+
tool2 = types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"})
|
|
315
|
+
|
|
316
|
+
client._tool_map = {
|
|
317
|
+
"tool1": ("server1", tool1),
|
|
318
|
+
"tool2": ("server2", tool2),
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
tool_map = client.get_tool_map()
|
|
322
|
+
assert len(tool_map) == 2
|
|
323
|
+
assert tool_map["tool1"][0] == "server1"
|
|
324
|
+
assert tool_map["tool2"][0] == "server2"
|