hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/agents/claude.py
CHANGED
|
@@ -1,373 +1,373 @@
|
|
|
1
|
-
"""Claude MCP Agent implementation."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import copy
|
|
6
|
-
import logging
|
|
7
|
-
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
8
|
-
|
|
9
|
-
from anthropic import AsyncAnthropic, BadRequestError
|
|
10
|
-
from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
|
|
11
|
-
|
|
12
|
-
import hud
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from anthropic.types.beta import (
|
|
16
|
-
BetaCacheControlEphemeralParam,
|
|
17
|
-
BetaContentBlockParam,
|
|
18
|
-
BetaImageBlockParam,
|
|
19
|
-
BetaMessageParam,
|
|
20
|
-
BetaTextBlockParam,
|
|
21
|
-
BetaToolResultBlockParam,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
from hud.datasets import Task
|
|
25
|
-
|
|
26
|
-
import mcp.types as types
|
|
27
|
-
|
|
28
|
-
from hud.settings import settings
|
|
29
|
-
from hud.tools.computer.settings import computer_settings
|
|
30
|
-
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
31
|
-
|
|
32
|
-
from .base import MCPAgent
|
|
33
|
-
|
|
34
|
-
logger = logging.getLogger(__name__)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class ClaudeAgent(MCPAgent):
|
|
38
|
-
"""
|
|
39
|
-
Claude agent that uses MCP servers for tool execution.
|
|
40
|
-
|
|
41
|
-
This agent uses Claude's native tool calling capabilities but executes
|
|
42
|
-
tools through MCP servers instead of direct implementation.
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
|
-
metadata: ClassVar[dict[str, Any]] = {
|
|
46
|
-
"display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
|
|
47
|
-
"display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
def __init__(
|
|
51
|
-
self,
|
|
52
|
-
model_client: AsyncAnthropic | None = None,
|
|
53
|
-
model: str = "claude-3-7-sonnet-20250219",
|
|
54
|
-
max_tokens: int = 4096,
|
|
55
|
-
use_computer_beta: bool = True,
|
|
56
|
-
**kwargs: Any,
|
|
57
|
-
) -> None:
|
|
58
|
-
"""
|
|
59
|
-
Initialize Claude MCP agent.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
model_client: AsyncAnthropic client (created if not provided)
|
|
63
|
-
model: Claude model to use
|
|
64
|
-
max_tokens: Maximum tokens for response
|
|
65
|
-
use_computer_beta: Whether to use computer-use beta features
|
|
66
|
-
**kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
|
|
67
|
-
"""
|
|
68
|
-
super().__init__(**kwargs)
|
|
69
|
-
|
|
70
|
-
# Initialize client if not provided
|
|
71
|
-
if model_client is None:
|
|
72
|
-
api_key = settings.anthropic_api_key
|
|
73
|
-
if not api_key:
|
|
74
|
-
raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
|
|
75
|
-
model_client = AsyncAnthropic(api_key=api_key)
|
|
76
|
-
|
|
77
|
-
self.anthropic_client = model_client
|
|
78
|
-
self.model = model
|
|
79
|
-
self.max_tokens = max_tokens
|
|
80
|
-
self.use_computer_beta = use_computer_beta
|
|
81
|
-
|
|
82
|
-
self.model_name = self.model
|
|
83
|
-
|
|
84
|
-
# Track mapping from Claude tool names to MCP tool names
|
|
85
|
-
self._claude_to_mcp_tool_map: dict[str, str] = {}
|
|
86
|
-
self.claude_tools: list[dict] = []
|
|
87
|
-
|
|
88
|
-
# Base system prompt for autonomous operation
|
|
89
|
-
self.system_prompt = """
|
|
90
|
-
You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
|
|
91
|
-
|
|
92
|
-
When working on tasks:
|
|
93
|
-
1. Be thorough and systematic in your approach
|
|
94
|
-
2. Complete tasks autonomously without asking for confirmation
|
|
95
|
-
3. Use available tools efficiently to accomplish your goals
|
|
96
|
-
4. Verify your actions and ensure task completion
|
|
97
|
-
5. Be precise and accurate in all operations
|
|
98
|
-
|
|
99
|
-
Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
|
|
100
|
-
""".strip() # noqa: E501
|
|
101
|
-
|
|
102
|
-
async def initialize(self, task: str | Task | None = None) -> None:
|
|
103
|
-
"""Initialize the agent and build tool mappings."""
|
|
104
|
-
await super().initialize(task)
|
|
105
|
-
# Build tool mappings after tools are discovered
|
|
106
|
-
self._convert_tools_for_claude()
|
|
107
|
-
|
|
108
|
-
async def get_system_messages(self) -> list[Any]:
|
|
109
|
-
"""No system messages for Claude because applied in get_response"""
|
|
110
|
-
return []
|
|
111
|
-
|
|
112
|
-
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
|
|
113
|
-
"""Format messages for Claude."""
|
|
114
|
-
# Convert MCP content types to Anthropic content types
|
|
115
|
-
anthropic_blocks: list[BetaContentBlockParam] = []
|
|
116
|
-
|
|
117
|
-
for block in blocks:
|
|
118
|
-
if isinstance(block, types.TextContent):
|
|
119
|
-
# Only include fields that Anthropic expects
|
|
120
|
-
anthropic_blocks.append(
|
|
121
|
-
cast(
|
|
122
|
-
"BetaTextBlockParam",
|
|
123
|
-
{
|
|
124
|
-
"type": "text",
|
|
125
|
-
"text": block.text,
|
|
126
|
-
},
|
|
127
|
-
)
|
|
128
|
-
)
|
|
129
|
-
elif isinstance(block, types.ImageContent):
|
|
130
|
-
# Convert MCP ImageContent to Anthropic format
|
|
131
|
-
anthropic_blocks.append(
|
|
132
|
-
cast(
|
|
133
|
-
"BetaImageBlockParam",
|
|
134
|
-
{
|
|
135
|
-
"type": "image",
|
|
136
|
-
"source": {
|
|
137
|
-
"type": "base64",
|
|
138
|
-
"media_type": block.mimeType,
|
|
139
|
-
"data": block.data,
|
|
140
|
-
},
|
|
141
|
-
},
|
|
142
|
-
)
|
|
143
|
-
)
|
|
144
|
-
else:
|
|
145
|
-
# For other types, try to cast but log a warning
|
|
146
|
-
logger.warning("Unknown content block type: %s", type(block))
|
|
147
|
-
anthropic_blocks.append(cast("BetaContentBlockParam", block))
|
|
148
|
-
|
|
149
|
-
return [
|
|
150
|
-
cast(
|
|
151
|
-
"BetaMessageParam",
|
|
152
|
-
{
|
|
153
|
-
"role": "user",
|
|
154
|
-
"content": anthropic_blocks,
|
|
155
|
-
},
|
|
156
|
-
)
|
|
157
|
-
]
|
|
158
|
-
|
|
159
|
-
@hud.instrument(
|
|
160
|
-
span_type="agent",
|
|
161
|
-
record_args=False, # Messages can be large
|
|
162
|
-
record_result=True,
|
|
163
|
-
)
|
|
164
|
-
async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
|
|
165
|
-
"""Get response from Claude including any tool calls."""
|
|
166
|
-
|
|
167
|
-
# Make API call with retry for prompt length
|
|
168
|
-
current_messages = messages.copy()
|
|
169
|
-
|
|
170
|
-
while True:
|
|
171
|
-
messages_cached = self._add_prompt_caching(current_messages)
|
|
172
|
-
|
|
173
|
-
# Build create kwargs
|
|
174
|
-
create_kwargs = {
|
|
175
|
-
"model": self.model,
|
|
176
|
-
"max_tokens": self.max_tokens,
|
|
177
|
-
"system": self.system_prompt,
|
|
178
|
-
"messages": messages_cached,
|
|
179
|
-
"tools": self.claude_tools,
|
|
180
|
-
"tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
# Add beta features if using computer tools
|
|
184
|
-
if self.use_computer_beta and any(
|
|
185
|
-
tool.get("type") == "computer_20250124" for tool in self.claude_tools
|
|
186
|
-
):
|
|
187
|
-
create_kwargs["betas"] = ["computer-use-2025-01-24"]
|
|
188
|
-
|
|
189
|
-
try:
|
|
190
|
-
response = await self.anthropic_client.beta.messages.create(**create_kwargs)
|
|
191
|
-
break
|
|
192
|
-
except BadRequestError as e:
|
|
193
|
-
if e.message.startswith("prompt is too long"):
|
|
194
|
-
logger.warning("Prompt too long, truncating message history")
|
|
195
|
-
# Keep first message and last 20 messages
|
|
196
|
-
if len(current_messages) > 21:
|
|
197
|
-
current_messages = [current_messages[0], *current_messages[-20:]]
|
|
198
|
-
else:
|
|
199
|
-
raise
|
|
200
|
-
else:
|
|
201
|
-
raise
|
|
202
|
-
|
|
203
|
-
messages.append(
|
|
204
|
-
cast(
|
|
205
|
-
"BetaMessageParam",
|
|
206
|
-
{
|
|
207
|
-
"role": "assistant",
|
|
208
|
-
"content": response.content,
|
|
209
|
-
},
|
|
210
|
-
)
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
# Process response
|
|
214
|
-
result = AgentResponse(content="", tool_calls=[], done=True)
|
|
215
|
-
|
|
216
|
-
# Extract text content and reasoning
|
|
217
|
-
text_content = ""
|
|
218
|
-
thinking_content = ""
|
|
219
|
-
|
|
220
|
-
for block in response.content:
|
|
221
|
-
if block.type == "tool_use":
|
|
222
|
-
# Map Claude tool name back to MCP tool name
|
|
223
|
-
mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
|
|
224
|
-
|
|
225
|
-
# Create MCPToolCall object with Claude metadata as extra fields
|
|
226
|
-
# Pyright will complain but the tool class accepts extra fields
|
|
227
|
-
tool_call = MCPToolCall(
|
|
228
|
-
id=block.id, # canonical identifier for telemetry
|
|
229
|
-
name=mcp_tool_name,
|
|
230
|
-
arguments=block.input,
|
|
231
|
-
claude_name=block.name, # type: ignore
|
|
232
|
-
)
|
|
233
|
-
result.tool_calls.append(tool_call)
|
|
234
|
-
result.done = False
|
|
235
|
-
elif block.type == "text":
|
|
236
|
-
text_content += block.text
|
|
237
|
-
elif hasattr(block, "type") and block.type == "thinking":
|
|
238
|
-
thinking_content += f"Thinking: {block.thinking}\n"
|
|
239
|
-
|
|
240
|
-
# Combine text and thinking for final content
|
|
241
|
-
if thinking_content:
|
|
242
|
-
result.content = thinking_content + text_content
|
|
243
|
-
else:
|
|
244
|
-
result.content = text_content
|
|
245
|
-
|
|
246
|
-
return result
|
|
247
|
-
|
|
248
|
-
async def format_tool_results(
|
|
249
|
-
self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
|
|
250
|
-
) -> list[BetaMessageParam]:
|
|
251
|
-
"""Format tool results into Claude messages."""
|
|
252
|
-
# Process each tool result
|
|
253
|
-
user_content = []
|
|
254
|
-
|
|
255
|
-
for tool_call, result in zip(tool_calls, tool_results, strict=True):
|
|
256
|
-
# Extract Claude-specific metadata from extra fields
|
|
257
|
-
tool_use_id = tool_call.id
|
|
258
|
-
if not tool_use_id:
|
|
259
|
-
logger.warning("No tool_use_id found for %s", tool_call.name)
|
|
260
|
-
continue
|
|
261
|
-
|
|
262
|
-
# Convert MCP tool results to Claude format
|
|
263
|
-
claude_blocks = []
|
|
264
|
-
|
|
265
|
-
if result.isError:
|
|
266
|
-
# Extract error message from content
|
|
267
|
-
error_msg = "Tool execution failed"
|
|
268
|
-
for content in result.content:
|
|
269
|
-
if isinstance(content, types.TextContent):
|
|
270
|
-
error_msg = content.text
|
|
271
|
-
break
|
|
272
|
-
claude_blocks.append(text_to_content_block(f"Error: {error_msg}"))
|
|
273
|
-
else:
|
|
274
|
-
# Process success content
|
|
275
|
-
for content in result.content:
|
|
276
|
-
if isinstance(content, types.TextContent):
|
|
277
|
-
claude_blocks.append(text_to_content_block(content.text))
|
|
278
|
-
elif isinstance(content, types.ImageContent):
|
|
279
|
-
claude_blocks.append(base64_to_content_block(content.data))
|
|
280
|
-
|
|
281
|
-
# Add tool result
|
|
282
|
-
user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
|
|
283
|
-
|
|
284
|
-
# Return as a user message containing all tool results
|
|
285
|
-
return [
|
|
286
|
-
cast(
|
|
287
|
-
"BetaMessageParam",
|
|
288
|
-
{
|
|
289
|
-
"role": "user",
|
|
290
|
-
"content": user_content,
|
|
291
|
-
},
|
|
292
|
-
)
|
|
293
|
-
]
|
|
294
|
-
|
|
295
|
-
async def create_user_message(self, text: str) -> BetaMessageParam:
|
|
296
|
-
"""Create a user message in Claude's format."""
|
|
297
|
-
return cast("BetaMessageParam", {"role": "user", "content": text})
|
|
298
|
-
|
|
299
|
-
def _convert_tools_for_claude(self) -> list[dict]:
|
|
300
|
-
"""Convert MCP tools to Claude tool format."""
|
|
301
|
-
claude_tools = []
|
|
302
|
-
self._claude_to_mcp_tool_map = {} # Reset mapping
|
|
303
|
-
|
|
304
|
-
for tool in self._available_tools:
|
|
305
|
-
# Special handling for computer use tools
|
|
306
|
-
if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
|
|
307
|
-
# Use Claude's native computer use format with configurable dimensions
|
|
308
|
-
claude_tool = {
|
|
309
|
-
"type": "computer_20250124",
|
|
310
|
-
"name": "computer",
|
|
311
|
-
"display_width_px": self.metadata["display_width"],
|
|
312
|
-
"display_height_px": self.metadata["display_height"],
|
|
313
|
-
}
|
|
314
|
-
# Map Claude's "computer" back to the actual MCP tool name
|
|
315
|
-
self._claude_to_mcp_tool_map["computer"] = tool.name
|
|
316
|
-
elif tool.name not in self.lifecycle_tools:
|
|
317
|
-
# Convert regular tools
|
|
318
|
-
claude_tool = {
|
|
319
|
-
"name": tool.name,
|
|
320
|
-
"description": tool.description or f"Execute {tool.name}",
|
|
321
|
-
"input_schema": tool.inputSchema
|
|
322
|
-
or {
|
|
323
|
-
"type": "object",
|
|
324
|
-
"properties": {},
|
|
325
|
-
},
|
|
326
|
-
}
|
|
327
|
-
# Direct mapping for non-computer tools
|
|
328
|
-
self._claude_to_mcp_tool_map[tool.name] = tool.name
|
|
329
|
-
else:
|
|
330
|
-
continue
|
|
331
|
-
|
|
332
|
-
claude_tools.append(claude_tool)
|
|
333
|
-
|
|
334
|
-
self.claude_tools = claude_tools
|
|
335
|
-
return claude_tools
|
|
336
|
-
|
|
337
|
-
def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
|
|
338
|
-
"""Add prompt caching to messages."""
|
|
339
|
-
messages_cached = copy.deepcopy(messages)
|
|
340
|
-
|
|
341
|
-
# Mark last user message with cache control
|
|
342
|
-
if messages_cached and messages_cached[-1].get("role") == "user":
|
|
343
|
-
last_content = messages_cached[-1]["content"]
|
|
344
|
-
# Content is formatted to be list of ContentBlock in format_blocks and format_message
|
|
345
|
-
if isinstance(last_content, list):
|
|
346
|
-
for block in last_content:
|
|
347
|
-
# Only add cache control to block types that support it
|
|
348
|
-
block_type = block.get("type")
|
|
349
|
-
if block_type in ["text", "image", "tool_use", "tool_result"]:
|
|
350
|
-
cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
|
|
351
|
-
block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
|
|
352
|
-
|
|
353
|
-
return messages_cached
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def base64_to_content_block(base64: str) -> BetaImageBlockParam:
|
|
357
|
-
"""Convert base64 image to Claude content block."""
|
|
358
|
-
return {
|
|
359
|
-
"type": "image",
|
|
360
|
-
"source": {"type": "base64", "media_type": "image/png", "data": base64},
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def text_to_content_block(text: str) -> BetaTextBlockParam:
|
|
365
|
-
"""Convert text to Claude content block."""
|
|
366
|
-
return {"type": "text", "text": text}
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
def tool_use_content_block(
|
|
370
|
-
tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
|
|
371
|
-
) -> BetaToolResultBlockParam:
|
|
372
|
-
"""Create tool result content block."""
|
|
373
|
-
return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
|
|
1
|
+
"""Claude MCP Agent implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
8
|
+
|
|
9
|
+
from anthropic import AsyncAnthropic, BadRequestError
|
|
10
|
+
from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
|
|
11
|
+
|
|
12
|
+
import hud
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from anthropic.types.beta import (
|
|
16
|
+
BetaCacheControlEphemeralParam,
|
|
17
|
+
BetaContentBlockParam,
|
|
18
|
+
BetaImageBlockParam,
|
|
19
|
+
BetaMessageParam,
|
|
20
|
+
BetaTextBlockParam,
|
|
21
|
+
BetaToolResultBlockParam,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from hud.datasets import Task
|
|
25
|
+
|
|
26
|
+
import mcp.types as types
|
|
27
|
+
|
|
28
|
+
from hud.settings import settings
|
|
29
|
+
from hud.tools.computer.settings import computer_settings
|
|
30
|
+
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
31
|
+
|
|
32
|
+
from .base import MCPAgent
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ClaudeAgent(MCPAgent):
|
|
38
|
+
"""
|
|
39
|
+
Claude agent that uses MCP servers for tool execution.
|
|
40
|
+
|
|
41
|
+
This agent uses Claude's native tool calling capabilities but executes
|
|
42
|
+
tools through MCP servers instead of direct implementation.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
metadata: ClassVar[dict[str, Any]] = {
|
|
46
|
+
"display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
|
|
47
|
+
"display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
model_client: AsyncAnthropic | None = None,
|
|
53
|
+
model: str = "claude-3-7-sonnet-20250219",
|
|
54
|
+
max_tokens: int = 4096,
|
|
55
|
+
use_computer_beta: bool = True,
|
|
56
|
+
**kwargs: Any,
|
|
57
|
+
) -> None:
|
|
58
|
+
"""
|
|
59
|
+
Initialize Claude MCP agent.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
model_client: AsyncAnthropic client (created if not provided)
|
|
63
|
+
model: Claude model to use
|
|
64
|
+
max_tokens: Maximum tokens for response
|
|
65
|
+
use_computer_beta: Whether to use computer-use beta features
|
|
66
|
+
**kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
|
|
67
|
+
"""
|
|
68
|
+
super().__init__(**kwargs)
|
|
69
|
+
|
|
70
|
+
# Initialize client if not provided
|
|
71
|
+
if model_client is None:
|
|
72
|
+
api_key = settings.anthropic_api_key
|
|
73
|
+
if not api_key:
|
|
74
|
+
raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
|
|
75
|
+
model_client = AsyncAnthropic(api_key=api_key)
|
|
76
|
+
|
|
77
|
+
self.anthropic_client = model_client
|
|
78
|
+
self.model = model
|
|
79
|
+
self.max_tokens = max_tokens
|
|
80
|
+
self.use_computer_beta = use_computer_beta
|
|
81
|
+
|
|
82
|
+
self.model_name = self.model
|
|
83
|
+
|
|
84
|
+
# Track mapping from Claude tool names to MCP tool names
|
|
85
|
+
self._claude_to_mcp_tool_map: dict[str, str] = {}
|
|
86
|
+
self.claude_tools: list[dict] = []
|
|
87
|
+
|
|
88
|
+
# Base system prompt for autonomous operation
|
|
89
|
+
self.system_prompt = """
|
|
90
|
+
You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
|
|
91
|
+
|
|
92
|
+
When working on tasks:
|
|
93
|
+
1. Be thorough and systematic in your approach
|
|
94
|
+
2. Complete tasks autonomously without asking for confirmation
|
|
95
|
+
3. Use available tools efficiently to accomplish your goals
|
|
96
|
+
4. Verify your actions and ensure task completion
|
|
97
|
+
5. Be precise and accurate in all operations
|
|
98
|
+
|
|
99
|
+
Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
|
|
100
|
+
""".strip() # noqa: E501
|
|
101
|
+
|
|
102
|
+
async def initialize(self, task: str | Task | None = None) -> None:
|
|
103
|
+
"""Initialize the agent and build tool mappings."""
|
|
104
|
+
await super().initialize(task)
|
|
105
|
+
# Build tool mappings after tools are discovered
|
|
106
|
+
self._convert_tools_for_claude()
|
|
107
|
+
|
|
108
|
+
async def get_system_messages(self) -> list[Any]:
|
|
109
|
+
"""No system messages for Claude because applied in get_response"""
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
|
|
113
|
+
"""Format messages for Claude."""
|
|
114
|
+
# Convert MCP content types to Anthropic content types
|
|
115
|
+
anthropic_blocks: list[BetaContentBlockParam] = []
|
|
116
|
+
|
|
117
|
+
for block in blocks:
|
|
118
|
+
if isinstance(block, types.TextContent):
|
|
119
|
+
# Only include fields that Anthropic expects
|
|
120
|
+
anthropic_blocks.append(
|
|
121
|
+
cast(
|
|
122
|
+
"BetaTextBlockParam",
|
|
123
|
+
{
|
|
124
|
+
"type": "text",
|
|
125
|
+
"text": block.text,
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
elif isinstance(block, types.ImageContent):
|
|
130
|
+
# Convert MCP ImageContent to Anthropic format
|
|
131
|
+
anthropic_blocks.append(
|
|
132
|
+
cast(
|
|
133
|
+
"BetaImageBlockParam",
|
|
134
|
+
{
|
|
135
|
+
"type": "image",
|
|
136
|
+
"source": {
|
|
137
|
+
"type": "base64",
|
|
138
|
+
"media_type": block.mimeType,
|
|
139
|
+
"data": block.data,
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
# For other types, try to cast but log a warning
|
|
146
|
+
logger.warning("Unknown content block type: %s", type(block))
|
|
147
|
+
anthropic_blocks.append(cast("BetaContentBlockParam", block))
|
|
148
|
+
|
|
149
|
+
return [
|
|
150
|
+
cast(
|
|
151
|
+
"BetaMessageParam",
|
|
152
|
+
{
|
|
153
|
+
"role": "user",
|
|
154
|
+
"content": anthropic_blocks,
|
|
155
|
+
},
|
|
156
|
+
)
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
@hud.instrument(
|
|
160
|
+
span_type="agent",
|
|
161
|
+
record_args=False, # Messages can be large
|
|
162
|
+
record_result=True,
|
|
163
|
+
)
|
|
164
|
+
async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
|
|
165
|
+
"""Get response from Claude including any tool calls."""
|
|
166
|
+
|
|
167
|
+
# Make API call with retry for prompt length
|
|
168
|
+
current_messages = messages.copy()
|
|
169
|
+
|
|
170
|
+
while True:
|
|
171
|
+
messages_cached = self._add_prompt_caching(current_messages)
|
|
172
|
+
|
|
173
|
+
# Build create kwargs
|
|
174
|
+
create_kwargs = {
|
|
175
|
+
"model": self.model,
|
|
176
|
+
"max_tokens": self.max_tokens,
|
|
177
|
+
"system": self.system_prompt,
|
|
178
|
+
"messages": messages_cached,
|
|
179
|
+
"tools": self.claude_tools,
|
|
180
|
+
"tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# Add beta features if using computer tools
|
|
184
|
+
if self.use_computer_beta and any(
|
|
185
|
+
tool.get("type") == "computer_20250124" for tool in self.claude_tools
|
|
186
|
+
):
|
|
187
|
+
create_kwargs["betas"] = ["computer-use-2025-01-24"]
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
response = await self.anthropic_client.beta.messages.create(**create_kwargs)
|
|
191
|
+
break
|
|
192
|
+
except BadRequestError as e:
|
|
193
|
+
if e.message.startswith("prompt is too long"):
|
|
194
|
+
logger.warning("Prompt too long, truncating message history")
|
|
195
|
+
# Keep first message and last 20 messages
|
|
196
|
+
if len(current_messages) > 21:
|
|
197
|
+
current_messages = [current_messages[0], *current_messages[-20:]]
|
|
198
|
+
else:
|
|
199
|
+
raise
|
|
200
|
+
else:
|
|
201
|
+
raise
|
|
202
|
+
|
|
203
|
+
messages.append(
|
|
204
|
+
cast(
|
|
205
|
+
"BetaMessageParam",
|
|
206
|
+
{
|
|
207
|
+
"role": "assistant",
|
|
208
|
+
"content": response.content,
|
|
209
|
+
},
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Process response
|
|
214
|
+
result = AgentResponse(content="", tool_calls=[], done=True)
|
|
215
|
+
|
|
216
|
+
# Extract text content and reasoning
|
|
217
|
+
text_content = ""
|
|
218
|
+
thinking_content = ""
|
|
219
|
+
|
|
220
|
+
for block in response.content:
|
|
221
|
+
if block.type == "tool_use":
|
|
222
|
+
# Map Claude tool name back to MCP tool name
|
|
223
|
+
mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
|
|
224
|
+
|
|
225
|
+
# Create MCPToolCall object with Claude metadata as extra fields
|
|
226
|
+
# Pyright will complain but the tool class accepts extra fields
|
|
227
|
+
tool_call = MCPToolCall(
|
|
228
|
+
id=block.id, # canonical identifier for telemetry
|
|
229
|
+
name=mcp_tool_name,
|
|
230
|
+
arguments=block.input,
|
|
231
|
+
claude_name=block.name, # type: ignore
|
|
232
|
+
)
|
|
233
|
+
result.tool_calls.append(tool_call)
|
|
234
|
+
result.done = False
|
|
235
|
+
elif block.type == "text":
|
|
236
|
+
text_content += block.text
|
|
237
|
+
elif hasattr(block, "type") and block.type == "thinking":
|
|
238
|
+
thinking_content += f"Thinking: {block.thinking}\n"
|
|
239
|
+
|
|
240
|
+
# Combine text and thinking for final content
|
|
241
|
+
if thinking_content:
|
|
242
|
+
result.content = thinking_content + text_content
|
|
243
|
+
else:
|
|
244
|
+
result.content = text_content
|
|
245
|
+
|
|
246
|
+
return result
|
|
247
|
+
|
|
248
|
+
async def format_tool_results(
|
|
249
|
+
self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
|
|
250
|
+
) -> list[BetaMessageParam]:
|
|
251
|
+
"""Format tool results into Claude messages."""
|
|
252
|
+
# Process each tool result
|
|
253
|
+
user_content = []
|
|
254
|
+
|
|
255
|
+
for tool_call, result in zip(tool_calls, tool_results, strict=True):
|
|
256
|
+
# Extract Claude-specific metadata from extra fields
|
|
257
|
+
tool_use_id = tool_call.id
|
|
258
|
+
if not tool_use_id:
|
|
259
|
+
logger.warning("No tool_use_id found for %s", tool_call.name)
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# Convert MCP tool results to Claude format
|
|
263
|
+
claude_blocks = []
|
|
264
|
+
|
|
265
|
+
if result.isError:
|
|
266
|
+
# Extract error message from content
|
|
267
|
+
error_msg = "Tool execution failed"
|
|
268
|
+
for content in result.content:
|
|
269
|
+
if isinstance(content, types.TextContent):
|
|
270
|
+
error_msg = content.text
|
|
271
|
+
break
|
|
272
|
+
claude_blocks.append(text_to_content_block(f"Error: {error_msg}"))
|
|
273
|
+
else:
|
|
274
|
+
# Process success content
|
|
275
|
+
for content in result.content:
|
|
276
|
+
if isinstance(content, types.TextContent):
|
|
277
|
+
claude_blocks.append(text_to_content_block(content.text))
|
|
278
|
+
elif isinstance(content, types.ImageContent):
|
|
279
|
+
claude_blocks.append(base64_to_content_block(content.data))
|
|
280
|
+
|
|
281
|
+
# Add tool result
|
|
282
|
+
user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
|
|
283
|
+
|
|
284
|
+
# Return as a user message containing all tool results
|
|
285
|
+
return [
|
|
286
|
+
cast(
|
|
287
|
+
"BetaMessageParam",
|
|
288
|
+
{
|
|
289
|
+
"role": "user",
|
|
290
|
+
"content": user_content,
|
|
291
|
+
},
|
|
292
|
+
)
|
|
293
|
+
]
|
|
294
|
+
|
|
295
|
+
async def create_user_message(self, text: str) -> BetaMessageParam:
|
|
296
|
+
"""Create a user message in Claude's format."""
|
|
297
|
+
return cast("BetaMessageParam", {"role": "user", "content": text})
|
|
298
|
+
|
|
299
|
+
def _convert_tools_for_claude(self) -> list[dict]:
|
|
300
|
+
"""Convert MCP tools to Claude tool format."""
|
|
301
|
+
claude_tools = []
|
|
302
|
+
self._claude_to_mcp_tool_map = {} # Reset mapping
|
|
303
|
+
|
|
304
|
+
for tool in self._available_tools:
|
|
305
|
+
# Special handling for computer use tools
|
|
306
|
+
if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
|
|
307
|
+
# Use Claude's native computer use format with configurable dimensions
|
|
308
|
+
claude_tool = {
|
|
309
|
+
"type": "computer_20250124",
|
|
310
|
+
"name": "computer",
|
|
311
|
+
"display_width_px": self.metadata["display_width"],
|
|
312
|
+
"display_height_px": self.metadata["display_height"],
|
|
313
|
+
}
|
|
314
|
+
# Map Claude's "computer" back to the actual MCP tool name
|
|
315
|
+
self._claude_to_mcp_tool_map["computer"] = tool.name
|
|
316
|
+
elif tool.name not in self.lifecycle_tools:
|
|
317
|
+
# Convert regular tools
|
|
318
|
+
claude_tool = {
|
|
319
|
+
"name": tool.name,
|
|
320
|
+
"description": tool.description or f"Execute {tool.name}",
|
|
321
|
+
"input_schema": tool.inputSchema
|
|
322
|
+
or {
|
|
323
|
+
"type": "object",
|
|
324
|
+
"properties": {},
|
|
325
|
+
},
|
|
326
|
+
}
|
|
327
|
+
# Direct mapping for non-computer tools
|
|
328
|
+
self._claude_to_mcp_tool_map[tool.name] = tool.name
|
|
329
|
+
else:
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
claude_tools.append(claude_tool)
|
|
333
|
+
|
|
334
|
+
self.claude_tools = claude_tools
|
|
335
|
+
return claude_tools
|
|
336
|
+
|
|
337
|
+
def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
|
|
338
|
+
"""Add prompt caching to messages."""
|
|
339
|
+
messages_cached = copy.deepcopy(messages)
|
|
340
|
+
|
|
341
|
+
# Mark last user message with cache control
|
|
342
|
+
if messages_cached and messages_cached[-1].get("role") == "user":
|
|
343
|
+
last_content = messages_cached[-1]["content"]
|
|
344
|
+
# Content is formatted to be list of ContentBlock in format_blocks and format_message
|
|
345
|
+
if isinstance(last_content, list):
|
|
346
|
+
for block in last_content:
|
|
347
|
+
# Only add cache control to block types that support it
|
|
348
|
+
block_type = block.get("type")
|
|
349
|
+
if block_type in ["text", "image", "tool_use", "tool_result"]:
|
|
350
|
+
cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
|
|
351
|
+
block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
|
|
352
|
+
|
|
353
|
+
return messages_cached
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def base64_to_content_block(base64: str) -> BetaImageBlockParam:
|
|
357
|
+
"""Convert base64 image to Claude content block."""
|
|
358
|
+
return {
|
|
359
|
+
"type": "image",
|
|
360
|
+
"source": {"type": "base64", "media_type": "image/png", "data": base64},
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def text_to_content_block(text: str) -> BetaTextBlockParam:
|
|
365
|
+
"""Convert text to Claude content block."""
|
|
366
|
+
return {"type": "text", "text": text}
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def tool_use_content_block(
|
|
370
|
+
tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
|
|
371
|
+
) -> BetaToolResultBlockParam:
|
|
372
|
+
"""Create tool result content block."""
|
|
373
|
+
return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
|