hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,321 +1,373 @@
1
- """Claude MCP Agent implementation."""
2
-
3
- from __future__ import annotations
4
-
5
- import copy
6
- import logging
7
- from typing import TYPE_CHECKING, Any, cast
8
-
9
- from anthropic import AsyncAnthropic, BadRequestError
10
-
11
- if TYPE_CHECKING:
12
- from anthropic.types.beta import (
13
- BetaCacheControlEphemeralParam,
14
- BetaImageBlockParam,
15
- BetaMessageParam,
16
- BetaTextBlockParam,
17
- BetaToolResultBlockParam,
18
- )
19
-
20
- from hud.datasets import TaskConfig
21
-
22
- import mcp.types as types
23
- from mcp.types import CallToolRequestParams as MCPToolCall
24
- from mcp.types import CallToolResult as MCPToolResult
25
-
26
- from hud.settings import settings
27
-
28
- from .base import BaseMCPAgent, ModelResponse
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- def base64_to_content_block(base64: str) -> BetaImageBlockParam:
34
- """Convert base64 image to Claude content block."""
35
- return {
36
- "type": "image",
37
- "source": {"type": "base64", "media_type": "image/png", "data": base64},
38
- }
39
-
40
-
41
- def text_to_content_block(text: str) -> BetaTextBlockParam:
42
- """Convert text to Claude content block."""
43
- return {"type": "text", "text": text}
44
-
45
-
46
- def tool_use_content_block(
47
- tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
48
- ) -> BetaToolResultBlockParam:
49
- """Create tool result content block."""
50
- return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
51
-
52
-
53
- class ClaudeMCPAgent(BaseMCPAgent):
54
- """
55
- Claude agent that uses MCP servers for tool execution.
56
-
57
- This agent uses Claude's native tool calling capabilities but executes
58
- tools through MCP servers instead of direct implementation.
59
- """
60
-
61
- def __init__(
62
- self,
63
- model_client: AsyncAnthropic | None = None,
64
- model: str = "claude-3-7-sonnet-20250219",
65
- max_tokens: int = 4096,
66
- display_width_px: int = 1400,
67
- display_height_px: int = 850,
68
- use_computer_beta: bool = True,
69
- **kwargs: Any,
70
- ) -> None:
71
- """
72
- Initialize Claude MCP agent.
73
-
74
- Args:
75
- model_client: AsyncAnthropic client (created if not provided)
76
- model: Claude model to use
77
- max_tokens: Maximum tokens for response
78
- display_width_px: Display width for computer use tools
79
- display_height_px: Display height for computer use tools
80
- use_computer_beta: Whether to use computer-use beta features
81
- **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
82
- """
83
- super().__init__(**kwargs)
84
-
85
- # Initialize client if not provided
86
- if model_client is None:
87
- api_key = settings.anthropic_api_key
88
- if not api_key:
89
- raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
90
- model_client = AsyncAnthropic(api_key=api_key)
91
-
92
- self.anthropic_client = model_client
93
- self.model = model
94
- self.max_tokens = max_tokens
95
- self.display_width_px = display_width_px
96
- self.display_height_px = display_height_px
97
- self.use_computer_beta = use_computer_beta
98
-
99
- self.model_name = self.model
100
-
101
- # Track mapping from Claude tool names to MCP tool names
102
- self._claude_to_mcp_tool_map: dict[str, str] = {}
103
-
104
- async def initialize(self, task: str | TaskConfig | None = None) -> None:
105
- """Initialize the agent and build tool mappings."""
106
- await super().initialize(task)
107
- # Build tool mappings after tools are discovered
108
- self._convert_tools_for_claude()
109
-
110
- async def create_initial_messages(
111
- self, prompt: str, screenshot: str | None = None
112
- ) -> list[BetaMessageParam]:
113
- """Create initial messages for Claude."""
114
- user_content: list[BetaImageBlockParam | BetaTextBlockParam] = []
115
-
116
- # Add prompt text
117
- user_content.append(text_to_content_block(prompt))
118
-
119
- # Add screenshot if available
120
- if screenshot:
121
- user_content.append(base64_to_content_block(screenshot))
122
-
123
- # Return initial user message
124
- return [
125
- cast(
126
- "BetaMessageParam",
127
- {
128
- "role": "user",
129
- "content": user_content,
130
- },
131
- )
132
- ]
133
-
134
- async def get_model_response(self, messages: list[BetaMessageParam]) -> ModelResponse:
135
- """Get response from Claude including any tool calls."""
136
- # Get Claude tools
137
- claude_tools = self._convert_tools_for_claude()
138
-
139
- # Make API call with retry for prompt length
140
- current_messages = messages.copy()
141
-
142
- while True:
143
- messages_cached = self._add_prompt_caching(current_messages)
144
-
145
- # Build create kwargs
146
- create_kwargs = {
147
- "model": self.model,
148
- "max_tokens": self.max_tokens,
149
- "system": self.get_system_prompt(),
150
- "messages": messages_cached,
151
- "tools": claude_tools,
152
- "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
153
- }
154
-
155
- # Add beta features if using computer tools
156
- if self.use_computer_beta and any(
157
- t.get("type") == "computer_20250124" for t in claude_tools
158
- ):
159
- create_kwargs["betas"] = ["computer-use-2025-01-24"]
160
-
161
- try:
162
- response = await self.anthropic_client.beta.messages.create(**create_kwargs)
163
- break
164
- except BadRequestError as e:
165
- if e.message.startswith("prompt is too long"):
166
- logger.warning("Prompt too long, truncating message history")
167
- # Keep first message and last 20 messages
168
- if len(current_messages) > 21:
169
- current_messages = [current_messages[0]] + current_messages[-20:]
170
- else:
171
- raise
172
- else:
173
- raise
174
-
175
- messages.append(
176
- cast(
177
- "BetaMessageParam",
178
- {
179
- "role": "assistant",
180
- "content": response.content,
181
- },
182
- )
183
- )
184
-
185
- # Process response
186
- result = ModelResponse(content="", tool_calls=[], done=True)
187
-
188
- # Extract text content and reasoning
189
- text_content = ""
190
- thinking_content = ""
191
-
192
- for block in response.content:
193
- if block.type == "tool_use":
194
- # Map Claude tool name back to MCP tool name
195
- mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
196
-
197
- # Create MCPToolCall object with Claude metadata as extra fields
198
- # Pyright will complain but the tool class accepts extra fields
199
- tool_call = MCPToolCall(
200
- name=mcp_tool_name,
201
- arguments=block.input,
202
- tool_use_id=block.id, # type: ignore
203
- claude_name=block.name, # type: ignore
204
- )
205
- result.tool_calls.append(tool_call)
206
- result.done = False
207
- elif block.type == "text":
208
- text_content += block.text
209
- elif hasattr(block, "type") and block.type == "thinking":
210
- thinking_content += f"Thinking: {block.thinking}\n"
211
-
212
- # Combine text and thinking for final content
213
- if thinking_content:
214
- result.content = thinking_content + text_content
215
- else:
216
- result.content = text_content
217
-
218
- return result
219
-
220
- async def format_tool_results(
221
- self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
222
- ) -> list[BetaMessageParam]:
223
- """Format tool results into Claude messages."""
224
- # Process each tool result
225
- user_content = []
226
-
227
- for tool_call, result in zip(tool_calls, tool_results, strict=True):
228
- # Extract Claude-specific metadata from extra fields
229
- tool_use_id = getattr(tool_call, "tool_use_id", None)
230
- if not tool_use_id:
231
- logger.warning("No tool_use_id found for %s", tool_call.name)
232
- continue
233
-
234
- # Convert MCP tool results to Claude format
235
- claude_blocks = []
236
-
237
- if result.isError:
238
- # Extract error message from content
239
- error_msg = "Tool execution failed"
240
- for content in result.content:
241
- if isinstance(content, types.TextContent):
242
- error_msg = content.text
243
- break
244
- claude_blocks.append(text_to_content_block(f"Error: {error_msg}"))
245
- else:
246
- # Process success content
247
- for content in result.content:
248
- if isinstance(content, types.TextContent):
249
- claude_blocks.append(text_to_content_block(content.text))
250
- elif isinstance(content, types.ImageContent):
251
- claude_blocks.append(base64_to_content_block(content.data))
252
-
253
- # Add tool result
254
- user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
255
-
256
- # Return as a user message containing all tool results
257
- return [
258
- cast(
259
- "BetaMessageParam",
260
- {
261
- "role": "user",
262
- "content": user_content,
263
- },
264
- )
265
- ]
266
-
267
- async def create_user_message(self, text: str) -> BetaMessageParam:
268
- """Create a user message in Claude's format."""
269
- return cast("BetaMessageParam", {"role": "user", "content": text})
270
-
271
- def _convert_tools_for_claude(self) -> list[dict]:
272
- """Convert MCP tools to Claude tool format."""
273
- claude_tools = []
274
- self._claude_to_mcp_tool_map = {} # Reset mapping
275
-
276
- for tool in self._available_tools:
277
- # Special handling for computer use tools
278
- if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
279
- # Use Claude's native computer use format with configurable dimensions
280
- claude_tool = {
281
- "type": "computer_20250124",
282
- "name": "computer",
283
- "display_width_px": self.display_width_px,
284
- "display_height_px": self.display_height_px,
285
- }
286
- # Map Claude's "computer" back to the actual MCP tool name
287
- self._claude_to_mcp_tool_map["computer"] = tool.name
288
- elif tool.name not in self.lifecycle_tools:
289
- # Convert regular tools
290
- claude_tool = {
291
- "name": tool.name,
292
- "description": tool.description or f"Execute {tool.name}",
293
- "input_schema": tool.inputSchema
294
- or {
295
- "type": "object",
296
- "properties": {},
297
- },
298
- }
299
- # Direct mapping for non-computer tools
300
- self._claude_to_mcp_tool_map[tool.name] = tool.name
301
- else:
302
- continue
303
-
304
- claude_tools.append(claude_tool)
305
-
306
- return claude_tools
307
-
308
- def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
309
- """Add prompt caching to messages."""
310
- messages_cached = copy.deepcopy(messages)
311
-
312
- # Mark last user message with cache control
313
- if messages_cached and messages_cached[-1].get("role") == "user":
314
- last_content = messages_cached[-1]["content"]
315
- if isinstance(last_content, list):
316
- for block in last_content:
317
- if block.get("type") not in ["thinking", "redacted_thinking"]:
318
- cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
319
- block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
320
-
321
- return messages_cached
1
+ """Claude MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import logging
7
+ from typing import TYPE_CHECKING, Any, ClassVar, cast
8
+
9
+ from anthropic import AsyncAnthropic, BadRequestError
10
+ from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
11
+
12
+ import hud
13
+
14
+ if TYPE_CHECKING:
15
+ from anthropic.types.beta import (
16
+ BetaCacheControlEphemeralParam,
17
+ BetaContentBlockParam,
18
+ BetaImageBlockParam,
19
+ BetaMessageParam,
20
+ BetaTextBlockParam,
21
+ BetaToolResultBlockParam,
22
+ )
23
+
24
+ from hud.datasets import Task
25
+
26
+ import mcp.types as types
27
+
28
+ from hud.settings import settings
29
+ from hud.tools.computer.settings import computer_settings
30
+ from hud.types import AgentResponse, MCPToolCall, MCPToolResult
31
+
32
+ from .base import MCPAgent
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class ClaudeAgent(MCPAgent):
38
+ """
39
+ Claude agent that uses MCP servers for tool execution.
40
+
41
+ This agent uses Claude's native tool calling capabilities but executes
42
+ tools through MCP servers instead of direct implementation.
43
+ """
44
+
45
+ metadata: ClassVar[dict[str, Any]] = {
46
+ "display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
47
+ "display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
48
+ }
49
+
50
+ def __init__(
51
+ self,
52
+ model_client: AsyncAnthropic | None = None,
53
+ model: str = "claude-3-7-sonnet-20250219",
54
+ max_tokens: int = 4096,
55
+ use_computer_beta: bool = True,
56
+ **kwargs: Any,
57
+ ) -> None:
58
+ """
59
+ Initialize Claude MCP agent.
60
+
61
+ Args:
62
+ model_client: AsyncAnthropic client (created if not provided)
63
+ model: Claude model to use
64
+ max_tokens: Maximum tokens for response
65
+ use_computer_beta: Whether to use computer-use beta features
66
+ **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
67
+ """
68
+ super().__init__(**kwargs)
69
+
70
+ # Initialize client if not provided
71
+ if model_client is None:
72
+ api_key = settings.anthropic_api_key
73
+ if not api_key:
74
+ raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
75
+ model_client = AsyncAnthropic(api_key=api_key)
76
+
77
+ self.anthropic_client = model_client
78
+ self.model = model
79
+ self.max_tokens = max_tokens
80
+ self.use_computer_beta = use_computer_beta
81
+
82
+ self.model_name = self.model
83
+
84
+ # Track mapping from Claude tool names to MCP tool names
85
+ self._claude_to_mcp_tool_map: dict[str, str] = {}
86
+ self.claude_tools: list[dict] = []
87
+
88
+ # Base system prompt for autonomous operation
89
+ self.system_prompt = """
90
+ You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
91
+
92
+ When working on tasks:
93
+ 1. Be thorough and systematic in your approach
94
+ 2. Complete tasks autonomously without asking for confirmation
95
+ 3. Use available tools efficiently to accomplish your goals
96
+ 4. Verify your actions and ensure task completion
97
+ 5. Be precise and accurate in all operations
98
+
99
+ Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
100
+ """.strip() # noqa: E501
101
+
102
+ async def initialize(self, task: str | Task | None = None) -> None:
103
+ """Initialize the agent and build tool mappings."""
104
+ await super().initialize(task)
105
+ # Build tool mappings after tools are discovered
106
+ self._convert_tools_for_claude()
107
+
108
+ async def get_system_messages(self) -> list[Any]:
109
+ """No system messages for Claude because applied in get_response"""
110
+ return []
111
+
112
+ async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
113
+ """Format messages for Claude."""
114
+ # Convert MCP content types to Anthropic content types
115
+ anthropic_blocks: list[BetaContentBlockParam] = []
116
+
117
+ for block in blocks:
118
+ if isinstance(block, types.TextContent):
119
+ # Only include fields that Anthropic expects
120
+ anthropic_blocks.append(
121
+ cast(
122
+ "BetaTextBlockParam",
123
+ {
124
+ "type": "text",
125
+ "text": block.text,
126
+ },
127
+ )
128
+ )
129
+ elif isinstance(block, types.ImageContent):
130
+ # Convert MCP ImageContent to Anthropic format
131
+ anthropic_blocks.append(
132
+ cast(
133
+ "BetaImageBlockParam",
134
+ {
135
+ "type": "image",
136
+ "source": {
137
+ "type": "base64",
138
+ "media_type": block.mimeType,
139
+ "data": block.data,
140
+ },
141
+ },
142
+ )
143
+ )
144
+ else:
145
+ # For other types, try to cast but log a warning
146
+ logger.warning("Unknown content block type: %s", type(block))
147
+ anthropic_blocks.append(cast("BetaContentBlockParam", block))
148
+
149
+ return [
150
+ cast(
151
+ "BetaMessageParam",
152
+ {
153
+ "role": "user",
154
+ "content": anthropic_blocks,
155
+ },
156
+ )
157
+ ]
158
+
159
+ @hud.instrument(
160
+ span_type="agent",
161
+ record_args=False, # Messages can be large
162
+ record_result=True,
163
+ )
164
+ async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
165
+ """Get response from Claude including any tool calls."""
166
+
167
+ # Make API call with retry for prompt length
168
+ current_messages = messages.copy()
169
+
170
+ while True:
171
+ messages_cached = self._add_prompt_caching(current_messages)
172
+
173
+ # Build create kwargs
174
+ create_kwargs = {
175
+ "model": self.model,
176
+ "max_tokens": self.max_tokens,
177
+ "system": self.system_prompt,
178
+ "messages": messages_cached,
179
+ "tools": self.claude_tools,
180
+ "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
181
+ }
182
+
183
+ # Add beta features if using computer tools
184
+ if self.use_computer_beta and any(
185
+ tool.get("type") == "computer_20250124" for tool in self.claude_tools
186
+ ):
187
+ create_kwargs["betas"] = ["computer-use-2025-01-24"]
188
+
189
+ try:
190
+ response = await self.anthropic_client.beta.messages.create(**create_kwargs)
191
+ break
192
+ except BadRequestError as e:
193
+ if e.message.startswith("prompt is too long"):
194
+ logger.warning("Prompt too long, truncating message history")
195
+ # Keep first message and last 20 messages
196
+ if len(current_messages) > 21:
197
+ current_messages = [current_messages[0], *current_messages[-20:]]
198
+ else:
199
+ raise
200
+ else:
201
+ raise
202
+
203
+ messages.append(
204
+ cast(
205
+ "BetaMessageParam",
206
+ {
207
+ "role": "assistant",
208
+ "content": response.content,
209
+ },
210
+ )
211
+ )
212
+
213
+ # Process response
214
+ result = AgentResponse(content="", tool_calls=[], done=True)
215
+
216
+ # Extract text content and reasoning
217
+ text_content = ""
218
+ thinking_content = ""
219
+
220
+ for block in response.content:
221
+ if block.type == "tool_use":
222
+ # Map Claude tool name back to MCP tool name
223
+ mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
224
+
225
+ # Create MCPToolCall object with Claude metadata as extra fields
226
+ # Pyright will complain but the tool class accepts extra fields
227
+ tool_call = MCPToolCall(
228
+ id=block.id, # canonical identifier for telemetry
229
+ name=mcp_tool_name,
230
+ arguments=block.input,
231
+ claude_name=block.name, # type: ignore
232
+ )
233
+ result.tool_calls.append(tool_call)
234
+ result.done = False
235
+ elif block.type == "text":
236
+ text_content += block.text
237
+ elif hasattr(block, "type") and block.type == "thinking":
238
+ thinking_content += f"Thinking: {block.thinking}\n"
239
+
240
+ # Combine text and thinking for final content
241
+ if thinking_content:
242
+ result.content = thinking_content + text_content
243
+ else:
244
+ result.content = text_content
245
+
246
+ return result
247
+
248
+ async def format_tool_results(
249
+ self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
250
+ ) -> list[BetaMessageParam]:
251
+ """Format tool results into Claude messages."""
252
+ # Process each tool result
253
+ user_content = []
254
+
255
+ for tool_call, result in zip(tool_calls, tool_results, strict=True):
256
+ # Extract Claude-specific metadata from extra fields
257
+ tool_use_id = tool_call.id
258
+ if not tool_use_id:
259
+ logger.warning("No tool_use_id found for %s", tool_call.name)
260
+ continue
261
+
262
+ # Convert MCP tool results to Claude format
263
+ claude_blocks = []
264
+
265
+ if result.isError:
266
+ # Extract error message from content
267
+ error_msg = "Tool execution failed"
268
+ for content in result.content:
269
+ if isinstance(content, types.TextContent):
270
+ error_msg = content.text
271
+ break
272
+ claude_blocks.append(text_to_content_block(f"Error: {error_msg}"))
273
+ else:
274
+ # Process success content
275
+ for content in result.content:
276
+ if isinstance(content, types.TextContent):
277
+ claude_blocks.append(text_to_content_block(content.text))
278
+ elif isinstance(content, types.ImageContent):
279
+ claude_blocks.append(base64_to_content_block(content.data))
280
+
281
+ # Add tool result
282
+ user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
283
+
284
+ # Return as a user message containing all tool results
285
+ return [
286
+ cast(
287
+ "BetaMessageParam",
288
+ {
289
+ "role": "user",
290
+ "content": user_content,
291
+ },
292
+ )
293
+ ]
294
+
295
+ async def create_user_message(self, text: str) -> BetaMessageParam:
296
+ """Create a user message in Claude's format."""
297
+ return cast("BetaMessageParam", {"role": "user", "content": text})
298
+
299
+ def _convert_tools_for_claude(self) -> list[dict]:
300
+ """Convert MCP tools to Claude tool format."""
301
+ claude_tools = []
302
+ self._claude_to_mcp_tool_map = {} # Reset mapping
303
+
304
+ for tool in self._available_tools:
305
+ # Special handling for computer use tools
306
+ if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
307
+ # Use Claude's native computer use format with configurable dimensions
308
+ claude_tool = {
309
+ "type": "computer_20250124",
310
+ "name": "computer",
311
+ "display_width_px": self.metadata["display_width"],
312
+ "display_height_px": self.metadata["display_height"],
313
+ }
314
+ # Map Claude's "computer" back to the actual MCP tool name
315
+ self._claude_to_mcp_tool_map["computer"] = tool.name
316
+ elif tool.name not in self.lifecycle_tools:
317
+ # Convert regular tools
318
+ claude_tool = {
319
+ "name": tool.name,
320
+ "description": tool.description or f"Execute {tool.name}",
321
+ "input_schema": tool.inputSchema
322
+ or {
323
+ "type": "object",
324
+ "properties": {},
325
+ },
326
+ }
327
+ # Direct mapping for non-computer tools
328
+ self._claude_to_mcp_tool_map[tool.name] = tool.name
329
+ else:
330
+ continue
331
+
332
+ claude_tools.append(claude_tool)
333
+
334
+ self.claude_tools = claude_tools
335
+ return claude_tools
336
+
337
+ def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
338
+ """Add prompt caching to messages."""
339
+ messages_cached = copy.deepcopy(messages)
340
+
341
+ # Mark last user message with cache control
342
+ if messages_cached and messages_cached[-1].get("role") == "user":
343
+ last_content = messages_cached[-1]["content"]
344
+ # Content is formatted to be list of ContentBlock in format_blocks and format_message
345
+ if isinstance(last_content, list):
346
+ for block in last_content:
347
+ # Only add cache control to block types that support it
348
+ block_type = block.get("type")
349
+ if block_type in ["text", "image", "tool_use", "tool_result"]:
350
+ cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
351
+ block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
352
+
353
+ return messages_cached
354
+
355
+
356
+ def base64_to_content_block(base64: str) -> BetaImageBlockParam:
357
+ """Convert base64 image to Claude content block."""
358
+ return {
359
+ "type": "image",
360
+ "source": {"type": "base64", "media_type": "image/png", "data": base64},
361
+ }
362
+
363
+
364
+ def text_to_content_block(text: str) -> BetaTextBlockParam:
365
+ """Convert text to Claude content block."""
366
+ return {"type": "text", "text": text}
367
+
368
+
369
+ def tool_use_content_block(
370
+ tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
371
+ ) -> BetaToolResultBlockParam:
372
+ """Create tool result content block."""
373
+ return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}