hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/agents/tests/test_claude.py
CHANGED
|
@@ -1,324 +1,324 @@
|
|
|
1
|
-
"""Tests for Claude MCP Agent implementation."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import TYPE_CHECKING, cast
|
|
6
|
-
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
|
-
|
|
8
|
-
import pytest
|
|
9
|
-
from anthropic import BadRequestError
|
|
10
|
-
from mcp import types
|
|
11
|
-
|
|
12
|
-
from hud.agents.claude import (
|
|
13
|
-
ClaudeAgent,
|
|
14
|
-
base64_to_content_block,
|
|
15
|
-
text_to_content_block,
|
|
16
|
-
tool_use_content_block,
|
|
17
|
-
)
|
|
18
|
-
from hud.types import MCPToolCall, MCPToolResult
|
|
19
|
-
|
|
20
|
-
if TYPE_CHECKING:
|
|
21
|
-
from anthropic.types.beta import BetaImageBlockParam, BetaMessageParam, BetaTextBlockParam
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class TestClaudeHelperFunctions:
|
|
25
|
-
"""Test helper functions for Claude message formatting."""
|
|
26
|
-
|
|
27
|
-
def test_base64_to_content_block(self):
|
|
28
|
-
"""Test base64 image conversion."""
|
|
29
|
-
base64_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
|
|
30
|
-
result = base64_to_content_block(base64_data)
|
|
31
|
-
|
|
32
|
-
assert result["type"] == "image"
|
|
33
|
-
assert result["source"]["type"] == "base64"
|
|
34
|
-
assert result["source"]["media_type"] == "image/png"
|
|
35
|
-
assert result["source"]["data"] == base64_data
|
|
36
|
-
|
|
37
|
-
def test_text_to_content_block(self):
|
|
38
|
-
"""Test text conversion."""
|
|
39
|
-
text = "Hello, world!"
|
|
40
|
-
result = text_to_content_block(text)
|
|
41
|
-
|
|
42
|
-
assert result["type"] == "text"
|
|
43
|
-
assert result["text"] == text
|
|
44
|
-
|
|
45
|
-
def test_tool_use_content_block(self):
|
|
46
|
-
"""Test tool result content block creation."""
|
|
47
|
-
tool_use_id = "tool_123"
|
|
48
|
-
content: list[BetaTextBlockParam | BetaImageBlockParam] = [
|
|
49
|
-
text_to_content_block("Result text")
|
|
50
|
-
]
|
|
51
|
-
|
|
52
|
-
result = tool_use_content_block(tool_use_id, content)
|
|
53
|
-
|
|
54
|
-
assert result["type"] == "tool_result"
|
|
55
|
-
assert result["tool_use_id"] == tool_use_id
|
|
56
|
-
assert result["content"] == content # type: ignore
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class TestClaudeAgent:
|
|
60
|
-
"""Test ClaudeAgent class."""
|
|
61
|
-
|
|
62
|
-
@pytest.fixture
|
|
63
|
-
def mock_mcp_client(self):
|
|
64
|
-
"""Create a mock MCP client."""
|
|
65
|
-
mcp_client = MagicMock()
|
|
66
|
-
return mcp_client
|
|
67
|
-
|
|
68
|
-
@pytest.fixture
|
|
69
|
-
def mock_anthropic(self):
|
|
70
|
-
"""Create a mock Anthropic client."""
|
|
71
|
-
with patch("hud.agents.claude.AsyncAnthropic") as mock:
|
|
72
|
-
client = AsyncMock()
|
|
73
|
-
# Add beta attribute with messages
|
|
74
|
-
client.beta = AsyncMock()
|
|
75
|
-
client.beta.messages = AsyncMock()
|
|
76
|
-
mock.return_value = client
|
|
77
|
-
yield client
|
|
78
|
-
|
|
79
|
-
@pytest.mark.asyncio
|
|
80
|
-
async def test_init(self, mock_mcp_client, mock_anthropic):
|
|
81
|
-
"""Test agent initialization."""
|
|
82
|
-
# Test with provided model_client
|
|
83
|
-
mock_model_client = MagicMock()
|
|
84
|
-
agent = ClaudeAgent(
|
|
85
|
-
mcp_client=mock_mcp_client,
|
|
86
|
-
model_client=mock_model_client,
|
|
87
|
-
model="claude-3-opus-20240229",
|
|
88
|
-
max_tokens=1000,
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
assert agent.model_name == "claude-3-opus-20240229"
|
|
92
|
-
assert agent.max_tokens == 1000
|
|
93
|
-
assert agent.anthropic_client == mock_model_client
|
|
94
|
-
|
|
95
|
-
@pytest.mark.asyncio
|
|
96
|
-
async def test_init_without_model_client(self, mock_mcp_client):
|
|
97
|
-
"""Test agent initialization without model client."""
|
|
98
|
-
with patch("hud.settings.settings.anthropic_api_key", "test_key"):
|
|
99
|
-
agent = ClaudeAgent(mcp_client=mock_mcp_client, model="claude-3-opus-20240229")
|
|
100
|
-
|
|
101
|
-
assert agent.model_name == "claude-3-opus-20240229"
|
|
102
|
-
assert agent.anthropic_client is not None
|
|
103
|
-
|
|
104
|
-
@pytest.mark.asyncio
|
|
105
|
-
async def test_format_blocks(self, mock_mcp_client):
|
|
106
|
-
"""Test formatting content blocks into Claude messages."""
|
|
107
|
-
mock_model_client = MagicMock()
|
|
108
|
-
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
|
|
109
|
-
|
|
110
|
-
# Test with text only
|
|
111
|
-
text_blocks: list[types.ContentBlock] = [
|
|
112
|
-
types.TextContent(type="text", text="Hello, Claude!")
|
|
113
|
-
]
|
|
114
|
-
messages = await agent.format_blocks(text_blocks)
|
|
115
|
-
assert len(messages) == 1
|
|
116
|
-
assert messages[0]["role"] == "user"
|
|
117
|
-
content = messages[0]["content"]
|
|
118
|
-
assert isinstance(content, list)
|
|
119
|
-
assert len(content) == 1
|
|
120
|
-
assert content[0]["type"] == "text"
|
|
121
|
-
assert content[0]["text"] == "Hello, Claude!"
|
|
122
|
-
|
|
123
|
-
# Test with screenshot
|
|
124
|
-
image_blocks: list[types.ContentBlock] = [
|
|
125
|
-
types.TextContent(type="text", text="Look at this"),
|
|
126
|
-
types.ImageContent(type="image", data="base64data", mimeType="image/png"),
|
|
127
|
-
]
|
|
128
|
-
messages = await agent.format_blocks(image_blocks)
|
|
129
|
-
assert len(messages) == 1
|
|
130
|
-
assert messages[0]["role"] == "user"
|
|
131
|
-
content = messages[0]["content"]
|
|
132
|
-
assert isinstance(content, list)
|
|
133
|
-
assert len(content) == 2
|
|
134
|
-
# Content blocks are in order
|
|
135
|
-
assert content[0]["type"] == "text"
|
|
136
|
-
assert content[0]["text"] == "Look at this"
|
|
137
|
-
assert content[1]["type"] == "image"
|
|
138
|
-
assert content[1]["source"]["data"] == "base64data"
|
|
139
|
-
|
|
140
|
-
@pytest.mark.asyncio
|
|
141
|
-
async def test_format_tool_results_method(self, mock_mcp_client):
|
|
142
|
-
"""Test the agent's format_tool_results method."""
|
|
143
|
-
mock_model_client = MagicMock()
|
|
144
|
-
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
|
|
145
|
-
|
|
146
|
-
tool_calls = [
|
|
147
|
-
MCPToolCall(name="test_tool", arguments={}, id="id1"),
|
|
148
|
-
]
|
|
149
|
-
|
|
150
|
-
tool_results = [
|
|
151
|
-
MCPToolResult(content=[types.TextContent(type="text", text="Success")], isError=False),
|
|
152
|
-
]
|
|
153
|
-
|
|
154
|
-
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
155
|
-
|
|
156
|
-
# format_tool_results returns a single user message with tool result content
|
|
157
|
-
assert len(messages) == 1
|
|
158
|
-
assert messages[0]["role"] == "user"
|
|
159
|
-
# The content is wrapped in a tool result block
|
|
160
|
-
content = list(messages[0]["content"])
|
|
161
|
-
assert len(content) == 1
|
|
162
|
-
assert content[0]["type"] == "tool_result" # type: ignore
|
|
163
|
-
assert content[0]["tool_use_id"] == "id1" # type: ignore
|
|
164
|
-
# The actual content is nested inside
|
|
165
|
-
inner_content = list(content[0]["content"]) # type: ignore
|
|
166
|
-
assert inner_content[0]["type"] == "text" # type: ignore
|
|
167
|
-
assert inner_content[0]["text"] == "Success" # type: ignore
|
|
168
|
-
|
|
169
|
-
@pytest.mark.asyncio
|
|
170
|
-
async def test_get_response(self, mock_mcp_client, mock_anthropic):
|
|
171
|
-
"""Test getting model response from Claude API."""
|
|
172
|
-
# Disable telemetry for this test to avoid backend configuration issues
|
|
173
|
-
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
174
|
-
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
175
|
-
|
|
176
|
-
# Mock the API response
|
|
177
|
-
mock_response = MagicMock()
|
|
178
|
-
|
|
179
|
-
# Create text block
|
|
180
|
-
text_block = MagicMock()
|
|
181
|
-
text_block.type = "text"
|
|
182
|
-
text_block.text = "Hello!"
|
|
183
|
-
|
|
184
|
-
# Create tool use block
|
|
185
|
-
tool_block = MagicMock()
|
|
186
|
-
tool_block.type = "tool_use"
|
|
187
|
-
tool_block.id = "tool_123"
|
|
188
|
-
tool_block.name = "test_tool"
|
|
189
|
-
tool_block.input = {"param": "value"}
|
|
190
|
-
|
|
191
|
-
mock_response.content = [text_block, tool_block]
|
|
192
|
-
mock_response.usage = MagicMock(input_tokens=10, output_tokens=20)
|
|
193
|
-
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
194
|
-
|
|
195
|
-
messages = [
|
|
196
|
-
cast(
|
|
197
|
-
"BetaMessageParam",
|
|
198
|
-
{"role": "user", "content": [{"type": "text", "text": "Hi"}]},
|
|
199
|
-
)
|
|
200
|
-
]
|
|
201
|
-
response = await agent.get_response(messages)
|
|
202
|
-
|
|
203
|
-
assert response.content == "Hello!"
|
|
204
|
-
assert len(response.tool_calls) == 1
|
|
205
|
-
assert response.tool_calls[0].name == "test_tool"
|
|
206
|
-
assert response.tool_calls[0].arguments == {"param": "value"}
|
|
207
|
-
# The test was checking for Claude-specific attributes that aren't part of ModelResponse
|
|
208
|
-
# These would need to be accessed from the original Claude response if needed
|
|
209
|
-
|
|
210
|
-
# Verify API was called correctly
|
|
211
|
-
mock_anthropic.beta.messages.create.assert_called_once()
|
|
212
|
-
|
|
213
|
-
@pytest.mark.asyncio
|
|
214
|
-
async def test_get_model_response_text_only(self, mock_mcp_client, mock_anthropic):
|
|
215
|
-
"""Test getting text-only response."""
|
|
216
|
-
# Disable telemetry for this test to avoid backend configuration issues
|
|
217
|
-
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
218
|
-
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
219
|
-
|
|
220
|
-
mock_response = MagicMock()
|
|
221
|
-
# Create text block
|
|
222
|
-
text_block = MagicMock()
|
|
223
|
-
text_block.type = "text"
|
|
224
|
-
text_block.text = "Just text"
|
|
225
|
-
mock_response.content = [text_block]
|
|
226
|
-
mock_response.usage = MagicMock(input_tokens=5, output_tokens=10)
|
|
227
|
-
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
228
|
-
|
|
229
|
-
messages = [
|
|
230
|
-
cast(
|
|
231
|
-
"BetaMessageParam",
|
|
232
|
-
{"role": "user", "content": [{"type": "text", "text": "Hi"}]},
|
|
233
|
-
)
|
|
234
|
-
]
|
|
235
|
-
response = await agent.get_response(messages)
|
|
236
|
-
|
|
237
|
-
assert response.content == "Just text"
|
|
238
|
-
assert response.tool_calls == []
|
|
239
|
-
|
|
240
|
-
@pytest.mark.asyncio
|
|
241
|
-
async def test_get_model_response_error(self, mock_mcp_client, mock_anthropic):
|
|
242
|
-
"""Test handling API errors."""
|
|
243
|
-
# Disable telemetry for this test to avoid backend configuration issues
|
|
244
|
-
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
245
|
-
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
246
|
-
|
|
247
|
-
# Mock API error
|
|
248
|
-
mock_anthropic.beta.messages.create = AsyncMock(
|
|
249
|
-
side_effect=BadRequestError(
|
|
250
|
-
message="Invalid request",
|
|
251
|
-
response=MagicMock(status_code=400),
|
|
252
|
-
body={"error": {"message": "Invalid request"}},
|
|
253
|
-
)
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
messages = [{"role": "user", "content": [{"type": "text", "text": "Hi"}]}]
|
|
257
|
-
|
|
258
|
-
with pytest.raises(BadRequestError):
|
|
259
|
-
await agent.get_response(messages) # type: ignore
|
|
260
|
-
|
|
261
|
-
# This test is commented out as it's testing complex integration scenarios
|
|
262
|
-
# that may have changed in the implementation
|
|
263
|
-
# @pytest.mark.asyncio
|
|
264
|
-
# async def test_run_with_tools(self, mock_mcp_client, mock_anthropic):
|
|
265
|
-
# """Test running agent with tool usage."""
|
|
266
|
-
# # Disable telemetry for this test to avoid backend configuration issues
|
|
267
|
-
# with patch("hud.settings.settings.telemetry_enabled", False):
|
|
268
|
-
# agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
269
|
-
|
|
270
|
-
# # Mock tool availability
|
|
271
|
-
# agent._available_tools = [
|
|
272
|
-
# types.Tool(
|
|
273
|
-
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
274
|
-
# )
|
|
275
|
-
# ]
|
|
276
|
-
# agent._tool_map = {
|
|
277
|
-
# "calculator": types.Tool(
|
|
278
|
-
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
279
|
-
# )
|
|
280
|
-
# }
|
|
281
|
-
|
|
282
|
-
# # Mock initial response with tool use
|
|
283
|
-
# initial_response = MagicMock()
|
|
284
|
-
# # Create tool use block
|
|
285
|
-
# tool_block = MagicMock()
|
|
286
|
-
# tool_block.type = "tool_use"
|
|
287
|
-
# tool_block.id = "calc_123"
|
|
288
|
-
# tool_block.name = "calculator"
|
|
289
|
-
# tool_block.input = {"operation": "add", "a": 2, "b": 3}
|
|
290
|
-
# initial_response.content = [tool_block]
|
|
291
|
-
# initial_response.usage = MagicMock(input_tokens=10, output_tokens=15)
|
|
292
|
-
|
|
293
|
-
# # Mock follow-up response
|
|
294
|
-
# final_response = MagicMock()
|
|
295
|
-
# text_block = MagicMock()
|
|
296
|
-
# text_block.type = "text"
|
|
297
|
-
# text_block.text = "2 + 3 = 5"
|
|
298
|
-
# final_response.content = [text_block]
|
|
299
|
-
# final_response.usage = MagicMock(input_tokens=20, output_tokens=10)
|
|
300
|
-
|
|
301
|
-
# mock_anthropic.beta.messages.create = AsyncMock(
|
|
302
|
-
# side_effect=[initial_response, final_response]
|
|
303
|
-
# )
|
|
304
|
-
|
|
305
|
-
# # Mock tool execution
|
|
306
|
-
# mock_mcp_client.call_tool = AsyncMock(
|
|
307
|
-
# return_value=MCPToolResult(
|
|
308
|
-
# content=[types.TextContent(type="text", text="5")], isError=False
|
|
309
|
-
# )
|
|
310
|
-
# )
|
|
311
|
-
|
|
312
|
-
# # Mock the mcp_client properties
|
|
313
|
-
# mock_mcp_client.mcp_config = {"test_server": {"url": "http://localhost"}}
|
|
314
|
-
# mock_mcp_client.list_tools = AsyncMock(return_value=agent._available_tools)
|
|
315
|
-
# mock_mcp_client.initialize = AsyncMock()
|
|
316
|
-
|
|
317
|
-
# # Initialize the agent
|
|
318
|
-
# await agent.initialize()
|
|
319
|
-
|
|
320
|
-
# # Use a string prompt instead of a task
|
|
321
|
-
# result = await agent.run("What is 2 + 3?")
|
|
322
|
-
|
|
323
|
-
# assert result.content == "2 + 3 = 5"
|
|
324
|
-
# assert result.done is True
|
|
1
|
+
"""Tests for Claude MCP Agent implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, cast
|
|
6
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
from anthropic import BadRequestError
|
|
10
|
+
from mcp import types
|
|
11
|
+
|
|
12
|
+
from hud.agents.claude import (
|
|
13
|
+
ClaudeAgent,
|
|
14
|
+
base64_to_content_block,
|
|
15
|
+
text_to_content_block,
|
|
16
|
+
tool_use_content_block,
|
|
17
|
+
)
|
|
18
|
+
from hud.types import MCPToolCall, MCPToolResult
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from anthropic.types.beta import BetaImageBlockParam, BetaMessageParam, BetaTextBlockParam
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestClaudeHelperFunctions:
|
|
25
|
+
"""Test helper functions for Claude message formatting."""
|
|
26
|
+
|
|
27
|
+
def test_base64_to_content_block(self):
|
|
28
|
+
"""Test base64 image conversion."""
|
|
29
|
+
base64_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
|
|
30
|
+
result = base64_to_content_block(base64_data)
|
|
31
|
+
|
|
32
|
+
assert result["type"] == "image"
|
|
33
|
+
assert result["source"]["type"] == "base64"
|
|
34
|
+
assert result["source"]["media_type"] == "image/png"
|
|
35
|
+
assert result["source"]["data"] == base64_data
|
|
36
|
+
|
|
37
|
+
def test_text_to_content_block(self):
|
|
38
|
+
"""Test text conversion."""
|
|
39
|
+
text = "Hello, world!"
|
|
40
|
+
result = text_to_content_block(text)
|
|
41
|
+
|
|
42
|
+
assert result["type"] == "text"
|
|
43
|
+
assert result["text"] == text
|
|
44
|
+
|
|
45
|
+
def test_tool_use_content_block(self):
|
|
46
|
+
"""Test tool result content block creation."""
|
|
47
|
+
tool_use_id = "tool_123"
|
|
48
|
+
content: list[BetaTextBlockParam | BetaImageBlockParam] = [
|
|
49
|
+
text_to_content_block("Result text")
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
result = tool_use_content_block(tool_use_id, content)
|
|
53
|
+
|
|
54
|
+
assert result["type"] == "tool_result"
|
|
55
|
+
assert result["tool_use_id"] == tool_use_id
|
|
56
|
+
assert result["content"] == content # type: ignore
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class TestClaudeAgent:
|
|
60
|
+
"""Test ClaudeAgent class."""
|
|
61
|
+
|
|
62
|
+
@pytest.fixture
|
|
63
|
+
def mock_mcp_client(self):
|
|
64
|
+
"""Create a mock MCP client."""
|
|
65
|
+
mcp_client = MagicMock()
|
|
66
|
+
return mcp_client
|
|
67
|
+
|
|
68
|
+
@pytest.fixture
|
|
69
|
+
def mock_anthropic(self):
|
|
70
|
+
"""Create a mock Anthropic client."""
|
|
71
|
+
with patch("hud.agents.claude.AsyncAnthropic") as mock:
|
|
72
|
+
client = AsyncMock()
|
|
73
|
+
# Add beta attribute with messages
|
|
74
|
+
client.beta = AsyncMock()
|
|
75
|
+
client.beta.messages = AsyncMock()
|
|
76
|
+
mock.return_value = client
|
|
77
|
+
yield client
|
|
78
|
+
|
|
79
|
+
@pytest.mark.asyncio
|
|
80
|
+
async def test_init(self, mock_mcp_client, mock_anthropic):
|
|
81
|
+
"""Test agent initialization."""
|
|
82
|
+
# Test with provided model_client
|
|
83
|
+
mock_model_client = MagicMock()
|
|
84
|
+
agent = ClaudeAgent(
|
|
85
|
+
mcp_client=mock_mcp_client,
|
|
86
|
+
model_client=mock_model_client,
|
|
87
|
+
model="claude-3-opus-20240229",
|
|
88
|
+
max_tokens=1000,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
assert agent.model_name == "claude-3-opus-20240229"
|
|
92
|
+
assert agent.max_tokens == 1000
|
|
93
|
+
assert agent.anthropic_client == mock_model_client
|
|
94
|
+
|
|
95
|
+
@pytest.mark.asyncio
|
|
96
|
+
async def test_init_without_model_client(self, mock_mcp_client):
|
|
97
|
+
"""Test agent initialization without model client."""
|
|
98
|
+
with patch("hud.settings.settings.anthropic_api_key", "test_key"):
|
|
99
|
+
agent = ClaudeAgent(mcp_client=mock_mcp_client, model="claude-3-opus-20240229")
|
|
100
|
+
|
|
101
|
+
assert agent.model_name == "claude-3-opus-20240229"
|
|
102
|
+
assert agent.anthropic_client is not None
|
|
103
|
+
|
|
104
|
+
@pytest.mark.asyncio
|
|
105
|
+
async def test_format_blocks(self, mock_mcp_client):
|
|
106
|
+
"""Test formatting content blocks into Claude messages."""
|
|
107
|
+
mock_model_client = MagicMock()
|
|
108
|
+
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
|
|
109
|
+
|
|
110
|
+
# Test with text only
|
|
111
|
+
text_blocks: list[types.ContentBlock] = [
|
|
112
|
+
types.TextContent(type="text", text="Hello, Claude!")
|
|
113
|
+
]
|
|
114
|
+
messages = await agent.format_blocks(text_blocks)
|
|
115
|
+
assert len(messages) == 1
|
|
116
|
+
assert messages[0]["role"] == "user"
|
|
117
|
+
content = messages[0]["content"]
|
|
118
|
+
assert isinstance(content, list)
|
|
119
|
+
assert len(content) == 1
|
|
120
|
+
assert content[0]["type"] == "text"
|
|
121
|
+
assert content[0]["text"] == "Hello, Claude!"
|
|
122
|
+
|
|
123
|
+
# Test with screenshot
|
|
124
|
+
image_blocks: list[types.ContentBlock] = [
|
|
125
|
+
types.TextContent(type="text", text="Look at this"),
|
|
126
|
+
types.ImageContent(type="image", data="base64data", mimeType="image/png"),
|
|
127
|
+
]
|
|
128
|
+
messages = await agent.format_blocks(image_blocks)
|
|
129
|
+
assert len(messages) == 1
|
|
130
|
+
assert messages[0]["role"] == "user"
|
|
131
|
+
content = messages[0]["content"]
|
|
132
|
+
assert isinstance(content, list)
|
|
133
|
+
assert len(content) == 2
|
|
134
|
+
# Content blocks are in order
|
|
135
|
+
assert content[0]["type"] == "text"
|
|
136
|
+
assert content[0]["text"] == "Look at this"
|
|
137
|
+
assert content[1]["type"] == "image"
|
|
138
|
+
assert content[1]["source"]["data"] == "base64data"
|
|
139
|
+
|
|
140
|
+
@pytest.mark.asyncio
|
|
141
|
+
async def test_format_tool_results_method(self, mock_mcp_client):
|
|
142
|
+
"""Test the agent's format_tool_results method."""
|
|
143
|
+
mock_model_client = MagicMock()
|
|
144
|
+
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
|
|
145
|
+
|
|
146
|
+
tool_calls = [
|
|
147
|
+
MCPToolCall(name="test_tool", arguments={}, id="id1"),
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
tool_results = [
|
|
151
|
+
MCPToolResult(content=[types.TextContent(type="text", text="Success")], isError=False),
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
155
|
+
|
|
156
|
+
# format_tool_results returns a single user message with tool result content
|
|
157
|
+
assert len(messages) == 1
|
|
158
|
+
assert messages[0]["role"] == "user"
|
|
159
|
+
# The content is wrapped in a tool result block
|
|
160
|
+
content = list(messages[0]["content"])
|
|
161
|
+
assert len(content) == 1
|
|
162
|
+
assert content[0]["type"] == "tool_result" # type: ignore
|
|
163
|
+
assert content[0]["tool_use_id"] == "id1" # type: ignore
|
|
164
|
+
# The actual content is nested inside
|
|
165
|
+
inner_content = list(content[0]["content"]) # type: ignore
|
|
166
|
+
assert inner_content[0]["type"] == "text" # type: ignore
|
|
167
|
+
assert inner_content[0]["text"] == "Success" # type: ignore
|
|
168
|
+
|
|
169
|
+
@pytest.mark.asyncio
|
|
170
|
+
async def test_get_response(self, mock_mcp_client, mock_anthropic):
|
|
171
|
+
"""Test getting model response from Claude API."""
|
|
172
|
+
# Disable telemetry for this test to avoid backend configuration issues
|
|
173
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
174
|
+
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
175
|
+
|
|
176
|
+
# Mock the API response
|
|
177
|
+
mock_response = MagicMock()
|
|
178
|
+
|
|
179
|
+
# Create text block
|
|
180
|
+
text_block = MagicMock()
|
|
181
|
+
text_block.type = "text"
|
|
182
|
+
text_block.text = "Hello!"
|
|
183
|
+
|
|
184
|
+
# Create tool use block
|
|
185
|
+
tool_block = MagicMock()
|
|
186
|
+
tool_block.type = "tool_use"
|
|
187
|
+
tool_block.id = "tool_123"
|
|
188
|
+
tool_block.name = "test_tool"
|
|
189
|
+
tool_block.input = {"param": "value"}
|
|
190
|
+
|
|
191
|
+
mock_response.content = [text_block, tool_block]
|
|
192
|
+
mock_response.usage = MagicMock(input_tokens=10, output_tokens=20)
|
|
193
|
+
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
194
|
+
|
|
195
|
+
messages = [
|
|
196
|
+
cast(
|
|
197
|
+
"BetaMessageParam",
|
|
198
|
+
{"role": "user", "content": [{"type": "text", "text": "Hi"}]},
|
|
199
|
+
)
|
|
200
|
+
]
|
|
201
|
+
response = await agent.get_response(messages)
|
|
202
|
+
|
|
203
|
+
assert response.content == "Hello!"
|
|
204
|
+
assert len(response.tool_calls) == 1
|
|
205
|
+
assert response.tool_calls[0].name == "test_tool"
|
|
206
|
+
assert response.tool_calls[0].arguments == {"param": "value"}
|
|
207
|
+
# The test was checking for Claude-specific attributes that aren't part of ModelResponse
|
|
208
|
+
# These would need to be accessed from the original Claude response if needed
|
|
209
|
+
|
|
210
|
+
# Verify API was called correctly
|
|
211
|
+
mock_anthropic.beta.messages.create.assert_called_once()
|
|
212
|
+
|
|
213
|
+
@pytest.mark.asyncio
|
|
214
|
+
async def test_get_model_response_text_only(self, mock_mcp_client, mock_anthropic):
|
|
215
|
+
"""Test getting text-only response."""
|
|
216
|
+
# Disable telemetry for this test to avoid backend configuration issues
|
|
217
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
218
|
+
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
219
|
+
|
|
220
|
+
mock_response = MagicMock()
|
|
221
|
+
# Create text block
|
|
222
|
+
text_block = MagicMock()
|
|
223
|
+
text_block.type = "text"
|
|
224
|
+
text_block.text = "Just text"
|
|
225
|
+
mock_response.content = [text_block]
|
|
226
|
+
mock_response.usage = MagicMock(input_tokens=5, output_tokens=10)
|
|
227
|
+
mock_anthropic.beta.messages.create = AsyncMock(return_value=mock_response)
|
|
228
|
+
|
|
229
|
+
messages = [
|
|
230
|
+
cast(
|
|
231
|
+
"BetaMessageParam",
|
|
232
|
+
{"role": "user", "content": [{"type": "text", "text": "Hi"}]},
|
|
233
|
+
)
|
|
234
|
+
]
|
|
235
|
+
response = await agent.get_response(messages)
|
|
236
|
+
|
|
237
|
+
assert response.content == "Just text"
|
|
238
|
+
assert response.tool_calls == []
|
|
239
|
+
|
|
240
|
+
@pytest.mark.asyncio
|
|
241
|
+
async def test_get_model_response_error(self, mock_mcp_client, mock_anthropic):
|
|
242
|
+
"""Test handling API errors."""
|
|
243
|
+
# Disable telemetry for this test to avoid backend configuration issues
|
|
244
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
245
|
+
agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
246
|
+
|
|
247
|
+
# Mock API error
|
|
248
|
+
mock_anthropic.beta.messages.create = AsyncMock(
|
|
249
|
+
side_effect=BadRequestError(
|
|
250
|
+
message="Invalid request",
|
|
251
|
+
response=MagicMock(status_code=400),
|
|
252
|
+
body={"error": {"message": "Invalid request"}},
|
|
253
|
+
)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
messages = [{"role": "user", "content": [{"type": "text", "text": "Hi"}]}]
|
|
257
|
+
|
|
258
|
+
with pytest.raises(BadRequestError):
|
|
259
|
+
await agent.get_response(messages) # type: ignore
|
|
260
|
+
|
|
261
|
+
# This test is commented out as it's testing complex integration scenarios
|
|
262
|
+
# that may have changed in the implementation
|
|
263
|
+
# @pytest.mark.asyncio
|
|
264
|
+
# async def test_run_with_tools(self, mock_mcp_client, mock_anthropic):
|
|
265
|
+
# """Test running agent with tool usage."""
|
|
266
|
+
# # Disable telemetry for this test to avoid backend configuration issues
|
|
267
|
+
# with patch("hud.settings.settings.telemetry_enabled", False):
|
|
268
|
+
# agent = ClaudeAgent(mcp_client=mock_mcp_client, model_client=mock_anthropic)
|
|
269
|
+
|
|
270
|
+
# # Mock tool availability
|
|
271
|
+
# agent._available_tools = [
|
|
272
|
+
# types.Tool(
|
|
273
|
+
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
274
|
+
# )
|
|
275
|
+
# ]
|
|
276
|
+
# agent._tool_map = {
|
|
277
|
+
# "calculator": types.Tool(
|
|
278
|
+
# name="calculator", description="Calculator", inputSchema={"type": "object"}
|
|
279
|
+
# )
|
|
280
|
+
# }
|
|
281
|
+
|
|
282
|
+
# # Mock initial response with tool use
|
|
283
|
+
# initial_response = MagicMock()
|
|
284
|
+
# # Create tool use block
|
|
285
|
+
# tool_block = MagicMock()
|
|
286
|
+
# tool_block.type = "tool_use"
|
|
287
|
+
# tool_block.id = "calc_123"
|
|
288
|
+
# tool_block.name = "calculator"
|
|
289
|
+
# tool_block.input = {"operation": "add", "a": 2, "b": 3}
|
|
290
|
+
# initial_response.content = [tool_block]
|
|
291
|
+
# initial_response.usage = MagicMock(input_tokens=10, output_tokens=15)
|
|
292
|
+
|
|
293
|
+
# # Mock follow-up response
|
|
294
|
+
# final_response = MagicMock()
|
|
295
|
+
# text_block = MagicMock()
|
|
296
|
+
# text_block.type = "text"
|
|
297
|
+
# text_block.text = "2 + 3 = 5"
|
|
298
|
+
# final_response.content = [text_block]
|
|
299
|
+
# final_response.usage = MagicMock(input_tokens=20, output_tokens=10)
|
|
300
|
+
|
|
301
|
+
# mock_anthropic.beta.messages.create = AsyncMock(
|
|
302
|
+
# side_effect=[initial_response, final_response]
|
|
303
|
+
# )
|
|
304
|
+
|
|
305
|
+
# # Mock tool execution
|
|
306
|
+
# mock_mcp_client.call_tool = AsyncMock(
|
|
307
|
+
# return_value=MCPToolResult(
|
|
308
|
+
# content=[types.TextContent(type="text", text="5")], isError=False
|
|
309
|
+
# )
|
|
310
|
+
# )
|
|
311
|
+
|
|
312
|
+
# # Mock the mcp_client properties
|
|
313
|
+
# mock_mcp_client.mcp_config = {"test_server": {"url": "http://localhost"}}
|
|
314
|
+
# mock_mcp_client.list_tools = AsyncMock(return_value=agent._available_tools)
|
|
315
|
+
# mock_mcp_client.initialize = AsyncMock()
|
|
316
|
+
|
|
317
|
+
# # Initialize the agent
|
|
318
|
+
# await agent.initialize()
|
|
319
|
+
|
|
320
|
+
# # Use a string prompt instead of a task
|
|
321
|
+
# result = await agent.run("What is 2 + 3?")
|
|
322
|
+
|
|
323
|
+
# assert result.content == "2 + 3 = 5"
|
|
324
|
+
# assert result.done is True
|