hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -89
- hud/agents/__init__.py +17 -0
- hud/agents/art.py +101 -0
- hud/agents/base.py +599 -0
- hud/{mcp → agents}/claude.py +373 -321
- hud/{mcp → agents}/langchain.py +250 -250
- hud/agents/misc/__init__.py +7 -0
- hud/{agent → agents}/misc/response_agent.py +80 -80
- hud/{mcp → agents}/openai.py +352 -334
- hud/agents/openai_chat_generic.py +154 -0
- hud/{mcp → agents}/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -0
- hud/agents/tests/test_claude.py +324 -0
- hud/{mcp → agents}/tests/test_client.py +363 -324
- hud/{mcp → agents}/tests/test_openai.py +237 -238
- hud/cli/__init__.py +617 -0
- hud/cli/__main__.py +8 -0
- hud/cli/analyze.py +371 -0
- hud/cli/analyze_metadata.py +230 -0
- hud/cli/build.py +427 -0
- hud/cli/clone.py +185 -0
- hud/cli/cursor.py +92 -0
- hud/cli/debug.py +392 -0
- hud/cli/docker_utils.py +83 -0
- hud/cli/init.py +281 -0
- hud/cli/interactive.py +353 -0
- hud/cli/mcp_server.py +756 -0
- hud/cli/pull.py +336 -0
- hud/cli/push.py +379 -0
- hud/cli/remote_runner.py +311 -0
- hud/cli/runner.py +160 -0
- hud/cli/tests/__init__.py +3 -0
- hud/cli/tests/test_analyze.py +284 -0
- hud/cli/tests/test_cli_init.py +265 -0
- hud/cli/tests/test_cli_main.py +27 -0
- hud/cli/tests/test_clone.py +142 -0
- hud/cli/tests/test_cursor.py +253 -0
- hud/cli/tests/test_debug.py +453 -0
- hud/cli/tests/test_mcp_server.py +139 -0
- hud/cli/tests/test_utils.py +388 -0
- hud/cli/utils.py +263 -0
- hud/clients/README.md +143 -0
- hud/clients/__init__.py +16 -0
- hud/clients/base.py +354 -0
- hud/clients/fastmcp.py +202 -0
- hud/clients/mcp_use.py +278 -0
- hud/clients/tests/__init__.py +1 -0
- hud/clients/tests/test_client_integration.py +111 -0
- hud/clients/tests/test_fastmcp.py +342 -0
- hud/clients/tests/test_protocol.py +188 -0
- hud/clients/utils/__init__.py +1 -0
- hud/clients/utils/retry_transport.py +160 -0
- hud/datasets.py +322 -192
- hud/misc/__init__.py +1 -0
- hud/{agent → misc}/claude_plays_pokemon.py +292 -283
- hud/otel/__init__.py +35 -0
- hud/otel/collector.py +142 -0
- hud/otel/config.py +164 -0
- hud/otel/context.py +536 -0
- hud/otel/exporters.py +366 -0
- hud/otel/instrumentation.py +97 -0
- hud/otel/processors.py +118 -0
- hud/otel/tests/__init__.py +1 -0
- hud/otel/tests/test_processors.py +197 -0
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -0
- hud/server/helper/__init__.py +5 -0
- hud/server/low_level.py +132 -0
- hud/server/server.py +166 -0
- hud/server/tests/__init__.py +3 -0
- hud/settings.py +73 -79
- hud/shared/__init__.py +5 -0
- hud/{exceptions.py → shared/exceptions.py} +180 -180
- hud/{server → shared}/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -0
- hud/{server → shared}/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -30
- hud/telemetry/instrument.py +379 -0
- hud/telemetry/job.py +309 -141
- hud/telemetry/replay.py +74 -0
- hud/telemetry/trace.py +83 -0
- hud/tools/__init__.py +33 -34
- hud/tools/base.py +365 -65
- hud/tools/bash.py +161 -137
- hud/tools/computer/__init__.py +15 -13
- hud/tools/computer/anthropic.py +437 -414
- hud/tools/computer/hud.py +376 -328
- hud/tools/computer/openai.py +295 -286
- hud/tools/computer/settings.py +82 -0
- hud/tools/edit.py +314 -290
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -532
- hud/tools/executors/pyautogui.py +621 -619
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -503
- hud/tools/{playwright_tool.py → playwright.py} +412 -379
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -0
- hud/tools/tests/test_bash.py +158 -152
- hud/tools/tests/test_bash_extended.py +197 -0
- hud/tools/tests/test_computer.py +425 -52
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -240
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -157
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -0
- hud/tools/utils.py +50 -50
- hud/types.py +136 -89
- hud/utils/__init__.py +10 -16
- hud/utils/async_utils.py +65 -0
- hud/utils/design.py +168 -0
- hud/utils/mcp.py +55 -0
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -0
- hud/utils/tests/test_init.py +17 -21
- hud/utils/tests/test_progress.py +261 -225
- hud/utils/tests/test_telemetry.py +82 -37
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- hud_python-0.4.0.dist-info/METADATA +474 -0
- hud_python-0.4.0.dist-info/RECORD +132 -0
- hud_python-0.4.0.dist-info/entry_points.txt +3 -0
- {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
- hud/adapters/__init__.py +0 -8
- hud/adapters/claude/__init__.py +0 -5
- hud/adapters/claude/adapter.py +0 -180
- hud/adapters/claude/tests/__init__.py +0 -1
- hud/adapters/claude/tests/test_adapter.py +0 -519
- hud/adapters/common/__init__.py +0 -6
- hud/adapters/common/adapter.py +0 -178
- hud/adapters/common/tests/test_adapter.py +0 -289
- hud/adapters/common/types.py +0 -446
- hud/adapters/operator/__init__.py +0 -5
- hud/adapters/operator/adapter.py +0 -108
- hud/adapters/operator/tests/__init__.py +0 -1
- hud/adapters/operator/tests/test_adapter.py +0 -370
- hud/agent/__init__.py +0 -19
- hud/agent/base.py +0 -126
- hud/agent/claude.py +0 -271
- hud/agent/langchain.py +0 -215
- hud/agent/misc/__init__.py +0 -3
- hud/agent/operator.py +0 -268
- hud/agent/tests/__init__.py +0 -1
- hud/agent/tests/test_base.py +0 -202
- hud/env/__init__.py +0 -11
- hud/env/client.py +0 -35
- hud/env/docker_client.py +0 -349
- hud/env/environment.py +0 -446
- hud/env/local_docker_client.py +0 -358
- hud/env/remote_client.py +0 -212
- hud/env/remote_docker_client.py +0 -292
- hud/gym.py +0 -130
- hud/job.py +0 -773
- hud/mcp/__init__.py +0 -17
- hud/mcp/base.py +0 -631
- hud/mcp/client.py +0 -312
- hud/mcp/tests/test_base.py +0 -512
- hud/mcp/tests/test_claude.py +0 -294
- hud/task.py +0 -149
- hud/taskset.py +0 -237
- hud/telemetry/_trace.py +0 -347
- hud/telemetry/context.py +0 -230
- hud/telemetry/exporter.py +0 -575
- hud/telemetry/instrumentation/__init__.py +0 -3
- hud/telemetry/instrumentation/mcp.py +0 -259
- hud/telemetry/instrumentation/registry.py +0 -59
- hud/telemetry/mcp_models.py +0 -270
- hud/telemetry/tests/__init__.py +0 -1
- hud/telemetry/tests/test_context.py +0 -210
- hud/telemetry/tests/test_trace.py +0 -312
- hud/tools/helper/README.md +0 -56
- hud/tools/helper/__init__.py +0 -9
- hud/tools/helper/mcp_server.py +0 -78
- hud/tools/helper/server_initialization.py +0 -115
- hud/tools/helper/utils.py +0 -58
- hud/trajectory.py +0 -94
- hud/utils/agent.py +0 -37
- hud/utils/common.py +0 -256
- hud/utils/config.py +0 -120
- hud/utils/deprecation.py +0 -115
- hud/utils/misc.py +0 -53
- hud/utils/tests/test_common.py +0 -277
- hud/utils/tests/test_config.py +0 -129
- hud_python-0.3.4.dist-info/METADATA +0 -284
- hud_python-0.3.4.dist-info/RECORD +0 -120
- /hud/{adapters/common → shared}/tests/__init__.py +0 -0
- {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
hud/mcp/tests/test_base.py
DELETED
|
@@ -1,512 +0,0 @@
|
|
|
1
|
-
"""Tests for BaseMCPAgent using simulated actions."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
-
from unittest.mock import MagicMock
|
|
7
|
-
|
|
8
|
-
# Import AsyncMock from unittest.mock if available (Python 3.8+)
|
|
9
|
-
try:
|
|
10
|
-
from unittest.mock import AsyncMock
|
|
11
|
-
except ImportError:
|
|
12
|
-
# Fallback for older Python versions
|
|
13
|
-
from unittest.mock import MagicMock as AsyncMock
|
|
14
|
-
|
|
15
|
-
import pytest
|
|
16
|
-
from mcp import types
|
|
17
|
-
from mcp.types import CallToolRequestParams as MCPToolCall
|
|
18
|
-
|
|
19
|
-
from hud.mcp.base import BaseMCPAgent
|
|
20
|
-
from hud.tools.executors.base import BaseExecutor
|
|
21
|
-
|
|
22
|
-
if TYPE_CHECKING:
|
|
23
|
-
from hud.task import Task
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class MockMCPAgent(BaseMCPAgent):
|
|
27
|
-
"""Concrete implementation of BaseMCPAgent for testing."""
|
|
28
|
-
|
|
29
|
-
def __init__(self, mcp_client: Any = None, **kwargs: Any) -> None:
|
|
30
|
-
if mcp_client is None:
|
|
31
|
-
# Create a mock client if none provided
|
|
32
|
-
mcp_client = MagicMock()
|
|
33
|
-
mcp_client.get_all_active_sessions = MagicMock(return_value={})
|
|
34
|
-
mcp_client.get_available_tools = MagicMock(return_value=[])
|
|
35
|
-
super().__init__(mcp_client=mcp_client, **kwargs)
|
|
36
|
-
self.executor = BaseExecutor() # Use simulated executor
|
|
37
|
-
self._messages = []
|
|
38
|
-
|
|
39
|
-
async def run(self, task: Task) -> list[dict[str, Any]]:
|
|
40
|
-
"""Mock run method."""
|
|
41
|
-
return self._messages
|
|
42
|
-
|
|
43
|
-
def create_initial_messages(
|
|
44
|
-
self, prompt: str, screenshot: str | None = None
|
|
45
|
-
) -> list[dict[str, Any]]:
|
|
46
|
-
"""Mock create initial messages."""
|
|
47
|
-
messages = [{"role": "user", "content": prompt}]
|
|
48
|
-
if screenshot:
|
|
49
|
-
messages.append({"role": "assistant", "content": f"Screenshot: {screenshot}"})
|
|
50
|
-
return messages
|
|
51
|
-
|
|
52
|
-
def get_model_response(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
|
|
53
|
-
"""Mock get model response."""
|
|
54
|
-
return {"role": "assistant", "content": "Mock response"}
|
|
55
|
-
|
|
56
|
-
def format_tool_results(
|
|
57
|
-
self,
|
|
58
|
-
results: list[tuple[str, Any]],
|
|
59
|
-
screenshot: str | None = None,
|
|
60
|
-
assistant_msg: dict[str, Any] | None = None,
|
|
61
|
-
) -> list[dict[str, Any]]:
|
|
62
|
-
"""Mock format tool results."""
|
|
63
|
-
formatted = []
|
|
64
|
-
for tool_name, result in results:
|
|
65
|
-
formatted.append({"role": "tool", "name": tool_name, "content": str(result)})
|
|
66
|
-
if screenshot:
|
|
67
|
-
formatted.append({"role": "screenshot", "content": screenshot})
|
|
68
|
-
return formatted
|
|
69
|
-
|
|
70
|
-
async def create_user_message(self, text: str) -> Any:
|
|
71
|
-
"""Mock create user message."""
|
|
72
|
-
return {"role": "user", "content": text}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class TestBaseMCPAgent:
|
|
76
|
-
"""Tests for BaseMCPAgent with simulated actions."""
|
|
77
|
-
|
|
78
|
-
def test_init_defaults(self):
|
|
79
|
-
"""Test initialization with default values."""
|
|
80
|
-
agent = MockMCPAgent()
|
|
81
|
-
|
|
82
|
-
assert agent.mcp_client is not None
|
|
83
|
-
assert agent.allowed_tools is None
|
|
84
|
-
assert agent.disallowed_tools == []
|
|
85
|
-
assert agent.initial_screenshot is False
|
|
86
|
-
assert agent.max_screenshot_history == 3
|
|
87
|
-
assert agent.append_tool_system_prompt is True
|
|
88
|
-
assert agent.custom_system_prompt is None
|
|
89
|
-
assert agent.lifecycle_tools == []
|
|
90
|
-
|
|
91
|
-
def test_init_with_params(self):
|
|
92
|
-
"""Test initialization with custom parameters."""
|
|
93
|
-
client = MagicMock()
|
|
94
|
-
agent = MockMCPAgent(
|
|
95
|
-
mcp_client=client,
|
|
96
|
-
allowed_tools=["tool1", "tool2"],
|
|
97
|
-
disallowed_tools=["bad_tool"],
|
|
98
|
-
initial_screenshot=True,
|
|
99
|
-
max_screenshot_history=5,
|
|
100
|
-
append_tool_system_prompt=False,
|
|
101
|
-
custom_system_prompt="Custom prompt",
|
|
102
|
-
lifecycle_tools=["custom_setup", "custom_eval"],
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
assert agent.mcp_client == client
|
|
106
|
-
assert agent.allowed_tools == ["tool1", "tool2"]
|
|
107
|
-
assert agent.disallowed_tools == ["bad_tool"]
|
|
108
|
-
assert agent.initial_screenshot is True
|
|
109
|
-
assert agent.max_screenshot_history == 5
|
|
110
|
-
assert agent.append_tool_system_prompt is False
|
|
111
|
-
assert agent.custom_system_prompt == "Custom prompt"
|
|
112
|
-
assert agent.lifecycle_tools == ["custom_setup", "custom_eval"]
|
|
113
|
-
|
|
114
|
-
def test_init_no_client(self):
|
|
115
|
-
"""Test init fails without client."""
|
|
116
|
-
|
|
117
|
-
# Create a minimal concrete implementation to test the ValueError
|
|
118
|
-
class TestAgent(BaseMCPAgent):
|
|
119
|
-
def create_initial_messages(
|
|
120
|
-
self, prompt: str, screenshot: str | None = None
|
|
121
|
-
) -> list[dict[str, Any]]:
|
|
122
|
-
return []
|
|
123
|
-
|
|
124
|
-
def format_tool_results(
|
|
125
|
-
self, results: list[tuple[str, Any]], screenshot: str | None = None
|
|
126
|
-
) -> list[dict[str, Any]]:
|
|
127
|
-
return []
|
|
128
|
-
|
|
129
|
-
async def get_model_response(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
|
|
130
|
-
return {"content": "test"}
|
|
131
|
-
|
|
132
|
-
with pytest.raises(ValueError, match="MCPClient is required"):
|
|
133
|
-
TestAgent(mcp_client=None)
|
|
134
|
-
|
|
135
|
-
@pytest.mark.asyncio
|
|
136
|
-
async def test_initialize_with_sessions(self):
|
|
137
|
-
"""Test initialize with existing sessions."""
|
|
138
|
-
agent = MockMCPAgent()
|
|
139
|
-
|
|
140
|
-
# Create proper async mock for session
|
|
141
|
-
mock_session = MagicMock()
|
|
142
|
-
|
|
143
|
-
# Set up the connector and client_session structure
|
|
144
|
-
mock_session.connector = MagicMock()
|
|
145
|
-
mock_session.connector.client_session = MagicMock()
|
|
146
|
-
|
|
147
|
-
# Mock list_tools on the client_session
|
|
148
|
-
async def mock_list_tools():
|
|
149
|
-
return types.ListToolsResult(
|
|
150
|
-
tools=[
|
|
151
|
-
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
152
|
-
types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
|
|
153
|
-
types.Tool(
|
|
154
|
-
name="setup", description="Setup tool", inputSchema={"type": "object"}
|
|
155
|
-
),
|
|
156
|
-
]
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
160
|
-
|
|
161
|
-
assert agent.mcp_client is not None
|
|
162
|
-
agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
|
|
163
|
-
|
|
164
|
-
# Mock get_tool_map to return tools discovered from sessions
|
|
165
|
-
tool_map = {
|
|
166
|
-
"tool1": (
|
|
167
|
-
"server1",
|
|
168
|
-
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
169
|
-
),
|
|
170
|
-
"tool2": (
|
|
171
|
-
"server1",
|
|
172
|
-
types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
|
|
173
|
-
),
|
|
174
|
-
"setup": (
|
|
175
|
-
"server1",
|
|
176
|
-
types.Tool(name="setup", description="Setup tool", inputSchema={"type": "object"}),
|
|
177
|
-
),
|
|
178
|
-
}
|
|
179
|
-
agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
|
|
180
|
-
|
|
181
|
-
await agent.initialize()
|
|
182
|
-
|
|
183
|
-
# Check available tools were populated (excludes lifecycle tools)
|
|
184
|
-
tools = agent.get_available_tools()
|
|
185
|
-
assert len(tools) == 3 # All tools (setup is not in default lifecycle tools)
|
|
186
|
-
|
|
187
|
-
# Check tool map was populated (includes all tools)
|
|
188
|
-
tool_map = agent.get_tool_map()
|
|
189
|
-
assert len(tool_map) == 3
|
|
190
|
-
assert "tool1" in tool_map
|
|
191
|
-
assert "tool2" in tool_map
|
|
192
|
-
assert "setup" in tool_map
|
|
193
|
-
|
|
194
|
-
@pytest.mark.asyncio
|
|
195
|
-
async def test_initialize_with_filtering(self):
|
|
196
|
-
"""Test initialize with tool filtering."""
|
|
197
|
-
agent = MockMCPAgent(allowed_tools=["tool1"], disallowed_tools=["tool3"])
|
|
198
|
-
|
|
199
|
-
# Create proper async mock for session
|
|
200
|
-
mock_session = MagicMock()
|
|
201
|
-
|
|
202
|
-
# Set up the connector and client_session structure
|
|
203
|
-
mock_session.connector = MagicMock()
|
|
204
|
-
mock_session.connector.client_session = MagicMock()
|
|
205
|
-
|
|
206
|
-
async def mock_list_tools():
|
|
207
|
-
return types.ListToolsResult(
|
|
208
|
-
tools=[
|
|
209
|
-
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
210
|
-
types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
|
|
211
|
-
types.Tool(name="tool3", description="Tool 3", inputSchema={"type": "object"}),
|
|
212
|
-
types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
|
|
213
|
-
]
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
217
|
-
|
|
218
|
-
assert agent.mcp_client is not None
|
|
219
|
-
agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
|
|
220
|
-
|
|
221
|
-
# Mock get_tool_map to return tools discovered from sessions
|
|
222
|
-
tool_map = {
|
|
223
|
-
"tool1": (
|
|
224
|
-
"server1",
|
|
225
|
-
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
226
|
-
),
|
|
227
|
-
"tool2": (
|
|
228
|
-
"server1",
|
|
229
|
-
types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
|
|
230
|
-
),
|
|
231
|
-
"tool3": (
|
|
232
|
-
"server1",
|
|
233
|
-
types.Tool(name="tool3", description="Tool 3", inputSchema={"type": "object"}),
|
|
234
|
-
),
|
|
235
|
-
"setup": (
|
|
236
|
-
"server1",
|
|
237
|
-
types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
|
|
238
|
-
),
|
|
239
|
-
}
|
|
240
|
-
agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
|
|
241
|
-
|
|
242
|
-
await agent.initialize()
|
|
243
|
-
|
|
244
|
-
# Check filtering worked - get_available_tools excludes lifecycle tools
|
|
245
|
-
tools = agent.get_available_tools()
|
|
246
|
-
tool_names = [t.name for t in tools]
|
|
247
|
-
assert len(tools) == 1 # Only tool1 (tool2 and tool3 are filtered out)
|
|
248
|
-
assert "tool1" in tool_names
|
|
249
|
-
assert "setup" not in tool_names # Lifecycle tool excluded from available tools
|
|
250
|
-
assert "tool2" not in tool_names # Not in allowed list
|
|
251
|
-
assert "tool3" not in tool_names # In disallowed list
|
|
252
|
-
|
|
253
|
-
@pytest.mark.asyncio
|
|
254
|
-
async def test_call_tool_success(self):
|
|
255
|
-
"""Test successful tool call."""
|
|
256
|
-
agent = MockMCPAgent()
|
|
257
|
-
|
|
258
|
-
# Initialize with a tool
|
|
259
|
-
mock_session = MagicMock()
|
|
260
|
-
mock_session.connector = MagicMock()
|
|
261
|
-
mock_session.connector.client_session = MagicMock()
|
|
262
|
-
|
|
263
|
-
async def mock_list_tools():
|
|
264
|
-
return types.ListToolsResult(
|
|
265
|
-
tools=[
|
|
266
|
-
types.Tool(name="test_tool", description="Test", inputSchema={"type": "object"})
|
|
267
|
-
]
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
271
|
-
|
|
272
|
-
# Mock the call_tool method on the client session
|
|
273
|
-
mock_result = types.CallToolResult(
|
|
274
|
-
content=[types.TextContent(type="text", text="Tool result")], isError=False
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
async def mock_call_tool(name, args):
|
|
278
|
-
return mock_result
|
|
279
|
-
|
|
280
|
-
mock_session.connector.client_session.call_tool = mock_call_tool
|
|
281
|
-
|
|
282
|
-
assert agent.mcp_client is not None
|
|
283
|
-
agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
|
|
284
|
-
|
|
285
|
-
# Mock get_tool_map to return tools discovered from sessions
|
|
286
|
-
tool_map = {
|
|
287
|
-
"test_tool": (
|
|
288
|
-
"server1",
|
|
289
|
-
types.Tool(name="test_tool", description="Test", inputSchema={"type": "object"}),
|
|
290
|
-
)
|
|
291
|
-
}
|
|
292
|
-
agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
|
|
293
|
-
|
|
294
|
-
# Mock the client's call_tool method directly
|
|
295
|
-
agent.mcp_client.call_tool = AsyncMock(return_value=mock_result)
|
|
296
|
-
|
|
297
|
-
await agent.initialize()
|
|
298
|
-
|
|
299
|
-
# Call the tool
|
|
300
|
-
tool_call = MCPToolCall(name="test_tool", arguments={"param": "value"})
|
|
301
|
-
result = await agent.call_tool(tool_call)
|
|
302
|
-
|
|
303
|
-
assert result == mock_result
|
|
304
|
-
assert not result.isError
|
|
305
|
-
|
|
306
|
-
@pytest.mark.asyncio
|
|
307
|
-
async def test_call_tool_not_found(self):
|
|
308
|
-
"""Test calling non-existent tool."""
|
|
309
|
-
agent = MockMCPAgent()
|
|
310
|
-
|
|
311
|
-
# Initialize without tools
|
|
312
|
-
mock_session = MagicMock()
|
|
313
|
-
|
|
314
|
-
async def mock_list_tools():
|
|
315
|
-
return types.ListToolsResult(tools=[])
|
|
316
|
-
|
|
317
|
-
mock_session.list_tools = mock_list_tools
|
|
318
|
-
assert agent.mcp_client is not None
|
|
319
|
-
agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
|
|
320
|
-
|
|
321
|
-
await agent.initialize()
|
|
322
|
-
|
|
323
|
-
# Try to call unknown tool
|
|
324
|
-
with pytest.raises(ValueError, match="Tool 'unknown_tool' not found"):
|
|
325
|
-
tool_call = MCPToolCall(name="unknown_tool", arguments={})
|
|
326
|
-
await agent.call_tool(tool_call)
|
|
327
|
-
|
|
328
|
-
@pytest.mark.asyncio
|
|
329
|
-
async def test_call_tool_no_name(self):
|
|
330
|
-
"""Test calling tool without name."""
|
|
331
|
-
# MCPToolCall accepts empty names, but the agent should validate
|
|
332
|
-
agent = MockMCPAgent()
|
|
333
|
-
tool_call = MCPToolCall(name="", arguments={})
|
|
334
|
-
|
|
335
|
-
with pytest.raises(ValueError, match="Tool call must have a 'name' field"):
|
|
336
|
-
await agent.call_tool(tool_call)
|
|
337
|
-
|
|
338
|
-
def test_get_system_prompt_default(self):
|
|
339
|
-
"""Test get_system_prompt with default settings."""
|
|
340
|
-
agent = MockMCPAgent()
|
|
341
|
-
|
|
342
|
-
# Add some tools
|
|
343
|
-
agent._available_tools = [
|
|
344
|
-
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
345
|
-
types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
|
|
346
|
-
]
|
|
347
|
-
|
|
348
|
-
prompt = agent.get_system_prompt()
|
|
349
|
-
|
|
350
|
-
# Should include ALL tool descriptions (including lifecycle tools)
|
|
351
|
-
assert "tool1" in prompt
|
|
352
|
-
assert "Tool 1" in prompt
|
|
353
|
-
assert "setup" in prompt
|
|
354
|
-
assert "Setup" in prompt
|
|
355
|
-
|
|
356
|
-
def test_get_system_prompt_custom(self):
|
|
357
|
-
"""Test get_system_prompt with custom prompt."""
|
|
358
|
-
agent = MockMCPAgent(
|
|
359
|
-
custom_system_prompt="My custom prompt", append_tool_system_prompt=False
|
|
360
|
-
)
|
|
361
|
-
|
|
362
|
-
prompt = agent.get_system_prompt()
|
|
363
|
-
assert prompt == "My custom prompt"
|
|
364
|
-
|
|
365
|
-
def test_has_computer_tools(self):
|
|
366
|
-
"""Test checking for computer tools."""
|
|
367
|
-
agent = MockMCPAgent()
|
|
368
|
-
|
|
369
|
-
# No tools
|
|
370
|
-
assert not agent.has_computer_tools()
|
|
371
|
-
|
|
372
|
-
# With computer tool
|
|
373
|
-
agent._available_tools = [
|
|
374
|
-
types.Tool(name="computer", description="Computer", inputSchema={"type": "object"})
|
|
375
|
-
]
|
|
376
|
-
assert agent.has_computer_tools()
|
|
377
|
-
|
|
378
|
-
# With screenshot tool
|
|
379
|
-
agent._available_tools = [
|
|
380
|
-
types.Tool(name="screenshot", description="Screenshot", inputSchema={"type": "object"})
|
|
381
|
-
]
|
|
382
|
-
assert agent.has_computer_tools()
|
|
383
|
-
|
|
384
|
-
def test_get_tool_schemas(self):
|
|
385
|
-
"""Test getting tool schemas."""
|
|
386
|
-
agent = MockMCPAgent()
|
|
387
|
-
|
|
388
|
-
# Add setup to lifecycle tools to test filtering
|
|
389
|
-
agent.lifecycle_tools = ["setup"]
|
|
390
|
-
|
|
391
|
-
agent._available_tools = [
|
|
392
|
-
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
393
|
-
types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
|
|
394
|
-
]
|
|
395
|
-
|
|
396
|
-
schemas = agent.get_tool_schemas()
|
|
397
|
-
|
|
398
|
-
# Should include non-lifecycle tools
|
|
399
|
-
assert len(schemas) == 1
|
|
400
|
-
assert schemas[0]["name"] == "tool1"
|
|
401
|
-
|
|
402
|
-
@pytest.mark.asyncio
|
|
403
|
-
async def test_capture_screenshot_no_tool(self):
|
|
404
|
-
"""Test screenshot capture without screenshot tool."""
|
|
405
|
-
agent = MockMCPAgent()
|
|
406
|
-
|
|
407
|
-
screenshot = await agent.capture_screenshot()
|
|
408
|
-
assert screenshot is None
|
|
409
|
-
|
|
410
|
-
@pytest.mark.asyncio
|
|
411
|
-
async def test_capture_screenshot_with_tool(self):
|
|
412
|
-
"""Test screenshot capture with screenshot tool."""
|
|
413
|
-
agent = MockMCPAgent()
|
|
414
|
-
|
|
415
|
-
# Set up screenshot tool
|
|
416
|
-
mock_session = MagicMock()
|
|
417
|
-
mock_session.connector = MagicMock()
|
|
418
|
-
mock_session.connector.client_session = MagicMock()
|
|
419
|
-
|
|
420
|
-
async def mock_list_tools():
|
|
421
|
-
return types.ListToolsResult(
|
|
422
|
-
tools=[
|
|
423
|
-
types.Tool(
|
|
424
|
-
name="screenshot", description="Screenshot", inputSchema={"type": "object"}
|
|
425
|
-
)
|
|
426
|
-
]
|
|
427
|
-
)
|
|
428
|
-
|
|
429
|
-
mock_session.connector.client_session.list_tools = mock_list_tools
|
|
430
|
-
|
|
431
|
-
# Mock screenshot result
|
|
432
|
-
mock_result = types.CallToolResult(
|
|
433
|
-
content=[
|
|
434
|
-
types.ImageContent(type="image", data="base64imagedata", mimeType="image/png")
|
|
435
|
-
],
|
|
436
|
-
isError=False,
|
|
437
|
-
)
|
|
438
|
-
|
|
439
|
-
async def mock_call_tool(name, args):
|
|
440
|
-
return mock_result
|
|
441
|
-
|
|
442
|
-
mock_session.connector.client_session.call_tool = mock_call_tool
|
|
443
|
-
|
|
444
|
-
assert agent.mcp_client is not None
|
|
445
|
-
agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
|
|
446
|
-
|
|
447
|
-
# Mock get_tool_map to return tools discovered from sessions
|
|
448
|
-
tool_map = {
|
|
449
|
-
"screenshot": (
|
|
450
|
-
"server1",
|
|
451
|
-
types.Tool(
|
|
452
|
-
name="screenshot", description="Screenshot", inputSchema={"type": "object"}
|
|
453
|
-
),
|
|
454
|
-
)
|
|
455
|
-
}
|
|
456
|
-
agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
|
|
457
|
-
|
|
458
|
-
# Mock the client's call_tool method directly
|
|
459
|
-
agent.mcp_client.call_tool = AsyncMock(return_value=mock_result)
|
|
460
|
-
|
|
461
|
-
await agent.initialize()
|
|
462
|
-
|
|
463
|
-
screenshot = await agent.capture_screenshot()
|
|
464
|
-
assert screenshot == "base64imagedata"
|
|
465
|
-
|
|
466
|
-
# process_tool_results method was removed from base class
|
|
467
|
-
# This functionality is now handled internally
|
|
468
|
-
|
|
469
|
-
def test_get_tools_by_server(self):
|
|
470
|
-
"""Test getting tools grouped by server."""
|
|
471
|
-
agent = MockMCPAgent()
|
|
472
|
-
|
|
473
|
-
# Set up tools from different servers
|
|
474
|
-
tool1 = types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"})
|
|
475
|
-
tool2 = types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"})
|
|
476
|
-
|
|
477
|
-
agent._available_tools = [tool1, tool2]
|
|
478
|
-
agent._tool_map = {
|
|
479
|
-
"tool1": ("server1", tool1),
|
|
480
|
-
"tool2": ("server2", tool2),
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
tools_by_server = agent.get_tools_by_server()
|
|
484
|
-
|
|
485
|
-
assert len(tools_by_server) == 2
|
|
486
|
-
assert "server1" in tools_by_server
|
|
487
|
-
assert "server2" in tools_by_server
|
|
488
|
-
assert tools_by_server["server1"] == [tool1]
|
|
489
|
-
assert tools_by_server["server2"] == [tool2]
|
|
490
|
-
|
|
491
|
-
@pytest.mark.asyncio
|
|
492
|
-
async def test_executor_integration(self):
|
|
493
|
-
"""Test integration with BaseExecutor for simulated actions."""
|
|
494
|
-
agent = MockMCPAgent()
|
|
495
|
-
|
|
496
|
-
# Test various executor actions
|
|
497
|
-
click_result = await agent.executor.click(100, 200, take_screenshot=False)
|
|
498
|
-
assert click_result.output is not None
|
|
499
|
-
assert "[SIMULATED] Click at (100, 200)" in click_result.output
|
|
500
|
-
|
|
501
|
-
type_result = await agent.executor.type("Test input", take_screenshot=False)
|
|
502
|
-
assert type_result.output is not None
|
|
503
|
-
assert "[SIMULATED] Type 'Test input'" in type_result.output
|
|
504
|
-
|
|
505
|
-
scroll_result = await agent.executor.scroll(x=50, y=50, scroll_y=5, take_screenshot=False)
|
|
506
|
-
assert scroll_result.output is not None
|
|
507
|
-
assert "[SIMULATED] Scroll" in scroll_result.output
|
|
508
|
-
|
|
509
|
-
# Test screenshot
|
|
510
|
-
screenshot = await agent.executor.screenshot()
|
|
511
|
-
assert isinstance(screenshot, str)
|
|
512
|
-
assert screenshot.startswith("iVBORw0KGgo") # PNG header
|