hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,512 +0,0 @@
1
- """Tests for BaseMCPAgent using simulated actions."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import TYPE_CHECKING, Any
6
- from unittest.mock import MagicMock
7
-
8
- # Import AsyncMock from unittest.mock if available (Python 3.8+)
9
- try:
10
- from unittest.mock import AsyncMock
11
- except ImportError:
12
- # Fallback for older Python versions
13
- from unittest.mock import MagicMock as AsyncMock
14
-
15
- import pytest
16
- from mcp import types
17
- from mcp.types import CallToolRequestParams as MCPToolCall
18
-
19
- from hud.mcp.base import BaseMCPAgent
20
- from hud.tools.executors.base import BaseExecutor
21
-
22
- if TYPE_CHECKING:
23
- from hud.task import Task
24
-
25
-
26
- class MockMCPAgent(BaseMCPAgent):
27
- """Concrete implementation of BaseMCPAgent for testing."""
28
-
29
- def __init__(self, mcp_client: Any = None, **kwargs: Any) -> None:
30
- if mcp_client is None:
31
- # Create a mock client if none provided
32
- mcp_client = MagicMock()
33
- mcp_client.get_all_active_sessions = MagicMock(return_value={})
34
- mcp_client.get_available_tools = MagicMock(return_value=[])
35
- super().__init__(mcp_client=mcp_client, **kwargs)
36
- self.executor = BaseExecutor() # Use simulated executor
37
- self._messages = []
38
-
39
- async def run(self, task: Task) -> list[dict[str, Any]]:
40
- """Mock run method."""
41
- return self._messages
42
-
43
- def create_initial_messages(
44
- self, prompt: str, screenshot: str | None = None
45
- ) -> list[dict[str, Any]]:
46
- """Mock create initial messages."""
47
- messages = [{"role": "user", "content": prompt}]
48
- if screenshot:
49
- messages.append({"role": "assistant", "content": f"Screenshot: {screenshot}"})
50
- return messages
51
-
52
- def get_model_response(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
53
- """Mock get model response."""
54
- return {"role": "assistant", "content": "Mock response"}
55
-
56
- def format_tool_results(
57
- self,
58
- results: list[tuple[str, Any]],
59
- screenshot: str | None = None,
60
- assistant_msg: dict[str, Any] | None = None,
61
- ) -> list[dict[str, Any]]:
62
- """Mock format tool results."""
63
- formatted = []
64
- for tool_name, result in results:
65
- formatted.append({"role": "tool", "name": tool_name, "content": str(result)})
66
- if screenshot:
67
- formatted.append({"role": "screenshot", "content": screenshot})
68
- return formatted
69
-
70
- async def create_user_message(self, text: str) -> Any:
71
- """Mock create user message."""
72
- return {"role": "user", "content": text}
73
-
74
-
75
- class TestBaseMCPAgent:
76
- """Tests for BaseMCPAgent with simulated actions."""
77
-
78
- def test_init_defaults(self):
79
- """Test initialization with default values."""
80
- agent = MockMCPAgent()
81
-
82
- assert agent.mcp_client is not None
83
- assert agent.allowed_tools is None
84
- assert agent.disallowed_tools == []
85
- assert agent.initial_screenshot is False
86
- assert agent.max_screenshot_history == 3
87
- assert agent.append_tool_system_prompt is True
88
- assert agent.custom_system_prompt is None
89
- assert agent.lifecycle_tools == []
90
-
91
- def test_init_with_params(self):
92
- """Test initialization with custom parameters."""
93
- client = MagicMock()
94
- agent = MockMCPAgent(
95
- mcp_client=client,
96
- allowed_tools=["tool1", "tool2"],
97
- disallowed_tools=["bad_tool"],
98
- initial_screenshot=True,
99
- max_screenshot_history=5,
100
- append_tool_system_prompt=False,
101
- custom_system_prompt="Custom prompt",
102
- lifecycle_tools=["custom_setup", "custom_eval"],
103
- )
104
-
105
- assert agent.mcp_client == client
106
- assert agent.allowed_tools == ["tool1", "tool2"]
107
- assert agent.disallowed_tools == ["bad_tool"]
108
- assert agent.initial_screenshot is True
109
- assert agent.max_screenshot_history == 5
110
- assert agent.append_tool_system_prompt is False
111
- assert agent.custom_system_prompt == "Custom prompt"
112
- assert agent.lifecycle_tools == ["custom_setup", "custom_eval"]
113
-
114
- def test_init_no_client(self):
115
- """Test init fails without client."""
116
-
117
- # Create a minimal concrete implementation to test the ValueError
118
- class TestAgent(BaseMCPAgent):
119
- def create_initial_messages(
120
- self, prompt: str, screenshot: str | None = None
121
- ) -> list[dict[str, Any]]:
122
- return []
123
-
124
- def format_tool_results(
125
- self, results: list[tuple[str, Any]], screenshot: str | None = None
126
- ) -> list[dict[str, Any]]:
127
- return []
128
-
129
- async def get_model_response(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
130
- return {"content": "test"}
131
-
132
- with pytest.raises(ValueError, match="MCPClient is required"):
133
- TestAgent(mcp_client=None)
134
-
135
- @pytest.mark.asyncio
136
- async def test_initialize_with_sessions(self):
137
- """Test initialize with existing sessions."""
138
- agent = MockMCPAgent()
139
-
140
- # Create proper async mock for session
141
- mock_session = MagicMock()
142
-
143
- # Set up the connector and client_session structure
144
- mock_session.connector = MagicMock()
145
- mock_session.connector.client_session = MagicMock()
146
-
147
- # Mock list_tools on the client_session
148
- async def mock_list_tools():
149
- return types.ListToolsResult(
150
- tools=[
151
- types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
152
- types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
153
- types.Tool(
154
- name="setup", description="Setup tool", inputSchema={"type": "object"}
155
- ),
156
- ]
157
- )
158
-
159
- mock_session.connector.client_session.list_tools = mock_list_tools
160
-
161
- assert agent.mcp_client is not None
162
- agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
163
-
164
- # Mock get_tool_map to return tools discovered from sessions
165
- tool_map = {
166
- "tool1": (
167
- "server1",
168
- types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
169
- ),
170
- "tool2": (
171
- "server1",
172
- types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
173
- ),
174
- "setup": (
175
- "server1",
176
- types.Tool(name="setup", description="Setup tool", inputSchema={"type": "object"}),
177
- ),
178
- }
179
- agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
180
-
181
- await agent.initialize()
182
-
183
- # Check available tools were populated (excludes lifecycle tools)
184
- tools = agent.get_available_tools()
185
- assert len(tools) == 3 # All tools (setup is not in default lifecycle tools)
186
-
187
- # Check tool map was populated (includes all tools)
188
- tool_map = agent.get_tool_map()
189
- assert len(tool_map) == 3
190
- assert "tool1" in tool_map
191
- assert "tool2" in tool_map
192
- assert "setup" in tool_map
193
-
194
- @pytest.mark.asyncio
195
- async def test_initialize_with_filtering(self):
196
- """Test initialize with tool filtering."""
197
- agent = MockMCPAgent(allowed_tools=["tool1"], disallowed_tools=["tool3"])
198
-
199
- # Create proper async mock for session
200
- mock_session = MagicMock()
201
-
202
- # Set up the connector and client_session structure
203
- mock_session.connector = MagicMock()
204
- mock_session.connector.client_session = MagicMock()
205
-
206
- async def mock_list_tools():
207
- return types.ListToolsResult(
208
- tools=[
209
- types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
210
- types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
211
- types.Tool(name="tool3", description="Tool 3", inputSchema={"type": "object"}),
212
- types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
213
- ]
214
- )
215
-
216
- mock_session.connector.client_session.list_tools = mock_list_tools
217
-
218
- assert agent.mcp_client is not None
219
- agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
220
-
221
- # Mock get_tool_map to return tools discovered from sessions
222
- tool_map = {
223
- "tool1": (
224
- "server1",
225
- types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
226
- ),
227
- "tool2": (
228
- "server1",
229
- types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"}),
230
- ),
231
- "tool3": (
232
- "server1",
233
- types.Tool(name="tool3", description="Tool 3", inputSchema={"type": "object"}),
234
- ),
235
- "setup": (
236
- "server1",
237
- types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
238
- ),
239
- }
240
- agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
241
-
242
- await agent.initialize()
243
-
244
- # Check filtering worked - get_available_tools excludes lifecycle tools
245
- tools = agent.get_available_tools()
246
- tool_names = [t.name for t in tools]
247
- assert len(tools) == 1 # Only tool1 (tool2 and tool3 are filtered out)
248
- assert "tool1" in tool_names
249
- assert "setup" not in tool_names # Lifecycle tool excluded from available tools
250
- assert "tool2" not in tool_names # Not in allowed list
251
- assert "tool3" not in tool_names # In disallowed list
252
-
253
- @pytest.mark.asyncio
254
- async def test_call_tool_success(self):
255
- """Test successful tool call."""
256
- agent = MockMCPAgent()
257
-
258
- # Initialize with a tool
259
- mock_session = MagicMock()
260
- mock_session.connector = MagicMock()
261
- mock_session.connector.client_session = MagicMock()
262
-
263
- async def mock_list_tools():
264
- return types.ListToolsResult(
265
- tools=[
266
- types.Tool(name="test_tool", description="Test", inputSchema={"type": "object"})
267
- ]
268
- )
269
-
270
- mock_session.connector.client_session.list_tools = mock_list_tools
271
-
272
- # Mock the call_tool method on the client session
273
- mock_result = types.CallToolResult(
274
- content=[types.TextContent(type="text", text="Tool result")], isError=False
275
- )
276
-
277
- async def mock_call_tool(name, args):
278
- return mock_result
279
-
280
- mock_session.connector.client_session.call_tool = mock_call_tool
281
-
282
- assert agent.mcp_client is not None
283
- agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
284
-
285
- # Mock get_tool_map to return tools discovered from sessions
286
- tool_map = {
287
- "test_tool": (
288
- "server1",
289
- types.Tool(name="test_tool", description="Test", inputSchema={"type": "object"}),
290
- )
291
- }
292
- agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
293
-
294
- # Mock the client's call_tool method directly
295
- agent.mcp_client.call_tool = AsyncMock(return_value=mock_result)
296
-
297
- await agent.initialize()
298
-
299
- # Call the tool
300
- tool_call = MCPToolCall(name="test_tool", arguments={"param": "value"})
301
- result = await agent.call_tool(tool_call)
302
-
303
- assert result == mock_result
304
- assert not result.isError
305
-
306
- @pytest.mark.asyncio
307
- async def test_call_tool_not_found(self):
308
- """Test calling non-existent tool."""
309
- agent = MockMCPAgent()
310
-
311
- # Initialize without tools
312
- mock_session = MagicMock()
313
-
314
- async def mock_list_tools():
315
- return types.ListToolsResult(tools=[])
316
-
317
- mock_session.list_tools = mock_list_tools
318
- assert agent.mcp_client is not None
319
- agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
320
-
321
- await agent.initialize()
322
-
323
- # Try to call unknown tool
324
- with pytest.raises(ValueError, match="Tool 'unknown_tool' not found"):
325
- tool_call = MCPToolCall(name="unknown_tool", arguments={})
326
- await agent.call_tool(tool_call)
327
-
328
- @pytest.mark.asyncio
329
- async def test_call_tool_no_name(self):
330
- """Test calling tool without name."""
331
- # MCPToolCall accepts empty names, but the agent should validate
332
- agent = MockMCPAgent()
333
- tool_call = MCPToolCall(name="", arguments={})
334
-
335
- with pytest.raises(ValueError, match="Tool call must have a 'name' field"):
336
- await agent.call_tool(tool_call)
337
-
338
- def test_get_system_prompt_default(self):
339
- """Test get_system_prompt with default settings."""
340
- agent = MockMCPAgent()
341
-
342
- # Add some tools
343
- agent._available_tools = [
344
- types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
345
- types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
346
- ]
347
-
348
- prompt = agent.get_system_prompt()
349
-
350
- # Should include ALL tool descriptions (including lifecycle tools)
351
- assert "tool1" in prompt
352
- assert "Tool 1" in prompt
353
- assert "setup" in prompt
354
- assert "Setup" in prompt
355
-
356
- def test_get_system_prompt_custom(self):
357
- """Test get_system_prompt with custom prompt."""
358
- agent = MockMCPAgent(
359
- custom_system_prompt="My custom prompt", append_tool_system_prompt=False
360
- )
361
-
362
- prompt = agent.get_system_prompt()
363
- assert prompt == "My custom prompt"
364
-
365
- def test_has_computer_tools(self):
366
- """Test checking for computer tools."""
367
- agent = MockMCPAgent()
368
-
369
- # No tools
370
- assert not agent.has_computer_tools()
371
-
372
- # With computer tool
373
- agent._available_tools = [
374
- types.Tool(name="computer", description="Computer", inputSchema={"type": "object"})
375
- ]
376
- assert agent.has_computer_tools()
377
-
378
- # With screenshot tool
379
- agent._available_tools = [
380
- types.Tool(name="screenshot", description="Screenshot", inputSchema={"type": "object"})
381
- ]
382
- assert agent.has_computer_tools()
383
-
384
- def test_get_tool_schemas(self):
385
- """Test getting tool schemas."""
386
- agent = MockMCPAgent()
387
-
388
- # Add setup to lifecycle tools to test filtering
389
- agent.lifecycle_tools = ["setup"]
390
-
391
- agent._available_tools = [
392
- types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
393
- types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
394
- ]
395
-
396
- schemas = agent.get_tool_schemas()
397
-
398
- # Should include non-lifecycle tools
399
- assert len(schemas) == 1
400
- assert schemas[0]["name"] == "tool1"
401
-
402
- @pytest.mark.asyncio
403
- async def test_capture_screenshot_no_tool(self):
404
- """Test screenshot capture without screenshot tool."""
405
- agent = MockMCPAgent()
406
-
407
- screenshot = await agent.capture_screenshot()
408
- assert screenshot is None
409
-
410
- @pytest.mark.asyncio
411
- async def test_capture_screenshot_with_tool(self):
412
- """Test screenshot capture with screenshot tool."""
413
- agent = MockMCPAgent()
414
-
415
- # Set up screenshot tool
416
- mock_session = MagicMock()
417
- mock_session.connector = MagicMock()
418
- mock_session.connector.client_session = MagicMock()
419
-
420
- async def mock_list_tools():
421
- return types.ListToolsResult(
422
- tools=[
423
- types.Tool(
424
- name="screenshot", description="Screenshot", inputSchema={"type": "object"}
425
- )
426
- ]
427
- )
428
-
429
- mock_session.connector.client_session.list_tools = mock_list_tools
430
-
431
- # Mock screenshot result
432
- mock_result = types.CallToolResult(
433
- content=[
434
- types.ImageContent(type="image", data="base64imagedata", mimeType="image/png")
435
- ],
436
- isError=False,
437
- )
438
-
439
- async def mock_call_tool(name, args):
440
- return mock_result
441
-
442
- mock_session.connector.client_session.call_tool = mock_call_tool
443
-
444
- assert agent.mcp_client is not None
445
- agent.mcp_client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
446
-
447
- # Mock get_tool_map to return tools discovered from sessions
448
- tool_map = {
449
- "screenshot": (
450
- "server1",
451
- types.Tool(
452
- name="screenshot", description="Screenshot", inputSchema={"type": "object"}
453
- ),
454
- )
455
- }
456
- agent.mcp_client.get_tool_map = MagicMock(return_value=tool_map)
457
-
458
- # Mock the client's call_tool method directly
459
- agent.mcp_client.call_tool = AsyncMock(return_value=mock_result)
460
-
461
- await agent.initialize()
462
-
463
- screenshot = await agent.capture_screenshot()
464
- assert screenshot == "base64imagedata"
465
-
466
- # process_tool_results method was removed from base class
467
- # This functionality is now handled internally
468
-
469
- def test_get_tools_by_server(self):
470
- """Test getting tools grouped by server."""
471
- agent = MockMCPAgent()
472
-
473
- # Set up tools from different servers
474
- tool1 = types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"})
475
- tool2 = types.Tool(name="tool2", description="Tool 2", inputSchema={"type": "object"})
476
-
477
- agent._available_tools = [tool1, tool2]
478
- agent._tool_map = {
479
- "tool1": ("server1", tool1),
480
- "tool2": ("server2", tool2),
481
- }
482
-
483
- tools_by_server = agent.get_tools_by_server()
484
-
485
- assert len(tools_by_server) == 2
486
- assert "server1" in tools_by_server
487
- assert "server2" in tools_by_server
488
- assert tools_by_server["server1"] == [tool1]
489
- assert tools_by_server["server2"] == [tool2]
490
-
491
- @pytest.mark.asyncio
492
- async def test_executor_integration(self):
493
- """Test integration with BaseExecutor for simulated actions."""
494
- agent = MockMCPAgent()
495
-
496
- # Test various executor actions
497
- click_result = await agent.executor.click(100, 200, take_screenshot=False)
498
- assert click_result.output is not None
499
- assert "[SIMULATED] Click at (100, 200)" in click_result.output
500
-
501
- type_result = await agent.executor.type("Test input", take_screenshot=False)
502
- assert type_result.output is not None
503
- assert "[SIMULATED] Type 'Test input'" in type_result.output
504
-
505
- scroll_result = await agent.executor.scroll(x=50, y=50, scroll_y=5, take_screenshot=False)
506
- assert scroll_result.output is not None
507
- assert "[SIMULATED] Scroll" in scroll_result.output
508
-
509
- # Test screenshot
510
- screenshot = await agent.executor.screenshot()
511
- assert isinstance(screenshot, str)
512
- assert screenshot.startswith("iVBORw0KGgo") # PNG header