hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show
  1. hud/__init__.py +22 -22
  2. hud/agents/__init__.py +13 -15
  3. hud/agents/base.py +599 -599
  4. hud/agents/claude.py +373 -373
  5. hud/agents/langchain.py +261 -250
  6. hud/agents/misc/__init__.py +7 -7
  7. hud/agents/misc/response_agent.py +82 -80
  8. hud/agents/openai.py +352 -352
  9. hud/agents/openai_chat_generic.py +154 -154
  10. hud/agents/tests/__init__.py +1 -1
  11. hud/agents/tests/test_base.py +742 -742
  12. hud/agents/tests/test_claude.py +324 -324
  13. hud/agents/tests/test_client.py +363 -363
  14. hud/agents/tests/test_openai.py +237 -237
  15. hud/cli/__init__.py +617 -617
  16. hud/cli/__main__.py +8 -8
  17. hud/cli/analyze.py +371 -371
  18. hud/cli/analyze_metadata.py +230 -230
  19. hud/cli/build.py +498 -427
  20. hud/cli/clone.py +185 -185
  21. hud/cli/cursor.py +92 -92
  22. hud/cli/debug.py +392 -392
  23. hud/cli/docker_utils.py +83 -83
  24. hud/cli/init.py +280 -281
  25. hud/cli/interactive.py +353 -353
  26. hud/cli/mcp_server.py +764 -756
  27. hud/cli/pull.py +330 -336
  28. hud/cli/push.py +404 -370
  29. hud/cli/remote_runner.py +311 -311
  30. hud/cli/runner.py +160 -160
  31. hud/cli/tests/__init__.py +3 -3
  32. hud/cli/tests/test_analyze.py +284 -284
  33. hud/cli/tests/test_cli_init.py +265 -265
  34. hud/cli/tests/test_cli_main.py +27 -27
  35. hud/cli/tests/test_clone.py +142 -142
  36. hud/cli/tests/test_cursor.py +253 -253
  37. hud/cli/tests/test_debug.py +453 -453
  38. hud/cli/tests/test_mcp_server.py +139 -139
  39. hud/cli/tests/test_utils.py +388 -388
  40. hud/cli/utils.py +263 -263
  41. hud/clients/README.md +143 -143
  42. hud/clients/__init__.py +16 -16
  43. hud/clients/base.py +378 -379
  44. hud/clients/fastmcp.py +222 -222
  45. hud/clients/mcp_use.py +298 -278
  46. hud/clients/tests/__init__.py +1 -1
  47. hud/clients/tests/test_client_integration.py +111 -111
  48. hud/clients/tests/test_fastmcp.py +342 -342
  49. hud/clients/tests/test_protocol.py +188 -188
  50. hud/clients/utils/__init__.py +1 -1
  51. hud/clients/utils/retry_transport.py +160 -160
  52. hud/datasets.py +327 -322
  53. hud/misc/__init__.py +1 -1
  54. hud/misc/claude_plays_pokemon.py +292 -292
  55. hud/otel/__init__.py +35 -35
  56. hud/otel/collector.py +142 -142
  57. hud/otel/config.py +164 -164
  58. hud/otel/context.py +536 -536
  59. hud/otel/exporters.py +366 -366
  60. hud/otel/instrumentation.py +97 -97
  61. hud/otel/processors.py +118 -118
  62. hud/otel/tests/__init__.py +1 -1
  63. hud/otel/tests/test_processors.py +197 -197
  64. hud/server/__init__.py +5 -5
  65. hud/server/context.py +114 -114
  66. hud/server/helper/__init__.py +5 -5
  67. hud/server/low_level.py +132 -132
  68. hud/server/server.py +170 -166
  69. hud/server/tests/__init__.py +3 -3
  70. hud/settings.py +73 -73
  71. hud/shared/__init__.py +5 -5
  72. hud/shared/exceptions.py +180 -180
  73. hud/shared/requests.py +264 -264
  74. hud/shared/tests/test_exceptions.py +157 -157
  75. hud/shared/tests/test_requests.py +275 -275
  76. hud/telemetry/__init__.py +25 -25
  77. hud/telemetry/instrument.py +379 -379
  78. hud/telemetry/job.py +309 -309
  79. hud/telemetry/replay.py +74 -74
  80. hud/telemetry/trace.py +83 -83
  81. hud/tools/__init__.py +33 -33
  82. hud/tools/base.py +365 -365
  83. hud/tools/bash.py +161 -161
  84. hud/tools/computer/__init__.py +15 -15
  85. hud/tools/computer/anthropic.py +437 -437
  86. hud/tools/computer/hud.py +376 -376
  87. hud/tools/computer/openai.py +295 -295
  88. hud/tools/computer/settings.py +82 -82
  89. hud/tools/edit.py +314 -314
  90. hud/tools/executors/__init__.py +30 -30
  91. hud/tools/executors/base.py +539 -539
  92. hud/tools/executors/pyautogui.py +621 -621
  93. hud/tools/executors/tests/__init__.py +1 -1
  94. hud/tools/executors/tests/test_base_executor.py +338 -338
  95. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  96. hud/tools/executors/xdo.py +511 -511
  97. hud/tools/playwright.py +412 -412
  98. hud/tools/tests/__init__.py +3 -3
  99. hud/tools/tests/test_base.py +282 -282
  100. hud/tools/tests/test_bash.py +158 -158
  101. hud/tools/tests/test_bash_extended.py +197 -197
  102. hud/tools/tests/test_computer.py +425 -425
  103. hud/tools/tests/test_computer_actions.py +34 -34
  104. hud/tools/tests/test_edit.py +259 -259
  105. hud/tools/tests/test_init.py +27 -27
  106. hud/tools/tests/test_playwright_tool.py +183 -183
  107. hud/tools/tests/test_tools.py +145 -145
  108. hud/tools/tests/test_utils.py +156 -156
  109. hud/tools/types.py +72 -72
  110. hud/tools/utils.py +50 -50
  111. hud/types.py +136 -136
  112. hud/utils/__init__.py +10 -10
  113. hud/utils/async_utils.py +65 -65
  114. hud/utils/design.py +236 -168
  115. hud/utils/mcp.py +55 -55
  116. hud/utils/progress.py +149 -149
  117. hud/utils/telemetry.py +66 -66
  118. hud/utils/tests/test_async_utils.py +173 -173
  119. hud/utils/tests/test_init.py +17 -17
  120. hud/utils/tests/test_progress.py +261 -261
  121. hud/utils/tests/test_telemetry.py +82 -82
  122. hud/utils/tests/test_version.py +8 -8
  123. hud/version.py +7 -7
  124. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
  125. hud_python-0.4.3.dist-info/RECORD +131 -0
  126. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
  127. hud/agents/art.py +0 -101
  128. hud_python-0.4.1.dist-info/RECORD +0 -132
  129. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
  130. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/agents/claude.py CHANGED
@@ -1,373 +1,373 @@
1
- """Claude MCP Agent implementation."""
2
-
3
- from __future__ import annotations
4
-
5
- import copy
6
- import logging
7
- from typing import TYPE_CHECKING, Any, ClassVar, cast
8
-
9
- from anthropic import AsyncAnthropic, BadRequestError
10
- from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
11
-
12
- import hud
13
-
14
- if TYPE_CHECKING:
15
- from anthropic.types.beta import (
16
- BetaCacheControlEphemeralParam,
17
- BetaContentBlockParam,
18
- BetaImageBlockParam,
19
- BetaMessageParam,
20
- BetaTextBlockParam,
21
- BetaToolResultBlockParam,
22
- )
23
-
24
- from hud.datasets import Task
25
-
26
- import mcp.types as types
27
-
28
- from hud.settings import settings
29
- from hud.tools.computer.settings import computer_settings
30
- from hud.types import AgentResponse, MCPToolCall, MCPToolResult
31
-
32
- from .base import MCPAgent
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
-
37
- class ClaudeAgent(MCPAgent):
38
- """
39
- Claude agent that uses MCP servers for tool execution.
40
-
41
- This agent uses Claude's native tool calling capabilities but executes
42
- tools through MCP servers instead of direct implementation.
43
- """
44
-
45
- metadata: ClassVar[dict[str, Any]] = {
46
- "display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
47
- "display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
48
- }
49
-
50
- def __init__(
51
- self,
52
- model_client: AsyncAnthropic | None = None,
53
- model: str = "claude-3-7-sonnet-20250219",
54
- max_tokens: int = 4096,
55
- use_computer_beta: bool = True,
56
- **kwargs: Any,
57
- ) -> None:
58
- """
59
- Initialize Claude MCP agent.
60
-
61
- Args:
62
- model_client: AsyncAnthropic client (created if not provided)
63
- model: Claude model to use
64
- max_tokens: Maximum tokens for response
65
- use_computer_beta: Whether to use computer-use beta features
66
- **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
67
- """
68
- super().__init__(**kwargs)
69
-
70
- # Initialize client if not provided
71
- if model_client is None:
72
- api_key = settings.anthropic_api_key
73
- if not api_key:
74
- raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
75
- model_client = AsyncAnthropic(api_key=api_key)
76
-
77
- self.anthropic_client = model_client
78
- self.model = model
79
- self.max_tokens = max_tokens
80
- self.use_computer_beta = use_computer_beta
81
-
82
- self.model_name = self.model
83
-
84
- # Track mapping from Claude tool names to MCP tool names
85
- self._claude_to_mcp_tool_map: dict[str, str] = {}
86
- self.claude_tools: list[dict] = []
87
-
88
- # Base system prompt for autonomous operation
89
- self.system_prompt = """
90
- You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
91
-
92
- When working on tasks:
93
- 1. Be thorough and systematic in your approach
94
- 2. Complete tasks autonomously without asking for confirmation
95
- 3. Use available tools efficiently to accomplish your goals
96
- 4. Verify your actions and ensure task completion
97
- 5. Be precise and accurate in all operations
98
-
99
- Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
100
- """.strip() # noqa: E501
101
-
102
- async def initialize(self, task: str | Task | None = None) -> None:
103
- """Initialize the agent and build tool mappings."""
104
- await super().initialize(task)
105
- # Build tool mappings after tools are discovered
106
- self._convert_tools_for_claude()
107
-
108
- async def get_system_messages(self) -> list[Any]:
109
- """No system messages for Claude because applied in get_response"""
110
- return []
111
-
112
- async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
113
- """Format messages for Claude."""
114
- # Convert MCP content types to Anthropic content types
115
- anthropic_blocks: list[BetaContentBlockParam] = []
116
-
117
- for block in blocks:
118
- if isinstance(block, types.TextContent):
119
- # Only include fields that Anthropic expects
120
- anthropic_blocks.append(
121
- cast(
122
- "BetaTextBlockParam",
123
- {
124
- "type": "text",
125
- "text": block.text,
126
- },
127
- )
128
- )
129
- elif isinstance(block, types.ImageContent):
130
- # Convert MCP ImageContent to Anthropic format
131
- anthropic_blocks.append(
132
- cast(
133
- "BetaImageBlockParam",
134
- {
135
- "type": "image",
136
- "source": {
137
- "type": "base64",
138
- "media_type": block.mimeType,
139
- "data": block.data,
140
- },
141
- },
142
- )
143
- )
144
- else:
145
- # For other types, try to cast but log a warning
146
- logger.warning("Unknown content block type: %s", type(block))
147
- anthropic_blocks.append(cast("BetaContentBlockParam", block))
148
-
149
- return [
150
- cast(
151
- "BetaMessageParam",
152
- {
153
- "role": "user",
154
- "content": anthropic_blocks,
155
- },
156
- )
157
- ]
158
-
159
- @hud.instrument(
160
- span_type="agent",
161
- record_args=False, # Messages can be large
162
- record_result=True,
163
- )
164
- async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
165
- """Get response from Claude including any tool calls."""
166
-
167
- # Make API call with retry for prompt length
168
- current_messages = messages.copy()
169
-
170
- while True:
171
- messages_cached = self._add_prompt_caching(current_messages)
172
-
173
- # Build create kwargs
174
- create_kwargs = {
175
- "model": self.model,
176
- "max_tokens": self.max_tokens,
177
- "system": self.system_prompt,
178
- "messages": messages_cached,
179
- "tools": self.claude_tools,
180
- "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
181
- }
182
-
183
- # Add beta features if using computer tools
184
- if self.use_computer_beta and any(
185
- tool.get("type") == "computer_20250124" for tool in self.claude_tools
186
- ):
187
- create_kwargs["betas"] = ["computer-use-2025-01-24"]
188
-
189
- try:
190
- response = await self.anthropic_client.beta.messages.create(**create_kwargs)
191
- break
192
- except BadRequestError as e:
193
- if e.message.startswith("prompt is too long"):
194
- logger.warning("Prompt too long, truncating message history")
195
- # Keep first message and last 20 messages
196
- if len(current_messages) > 21:
197
- current_messages = [current_messages[0], *current_messages[-20:]]
198
- else:
199
- raise
200
- else:
201
- raise
202
-
203
- messages.append(
204
- cast(
205
- "BetaMessageParam",
206
- {
207
- "role": "assistant",
208
- "content": response.content,
209
- },
210
- )
211
- )
212
-
213
- # Process response
214
- result = AgentResponse(content="", tool_calls=[], done=True)
215
-
216
- # Extract text content and reasoning
217
- text_content = ""
218
- thinking_content = ""
219
-
220
- for block in response.content:
221
- if block.type == "tool_use":
222
- # Map Claude tool name back to MCP tool name
223
- mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
224
-
225
- # Create MCPToolCall object with Claude metadata as extra fields
226
- # Pyright will complain but the tool class accepts extra fields
227
- tool_call = MCPToolCall(
228
- id=block.id, # canonical identifier for telemetry
229
- name=mcp_tool_name,
230
- arguments=block.input,
231
- claude_name=block.name, # type: ignore
232
- )
233
- result.tool_calls.append(tool_call)
234
- result.done = False
235
- elif block.type == "text":
236
- text_content += block.text
237
- elif hasattr(block, "type") and block.type == "thinking":
238
- thinking_content += f"Thinking: {block.thinking}\n"
239
-
240
- # Combine text and thinking for final content
241
- if thinking_content:
242
- result.content = thinking_content + text_content
243
- else:
244
- result.content = text_content
245
-
246
- return result
247
-
248
- async def format_tool_results(
249
- self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
250
- ) -> list[BetaMessageParam]:
251
- """Format tool results into Claude messages."""
252
- # Process each tool result
253
- user_content = []
254
-
255
- for tool_call, result in zip(tool_calls, tool_results, strict=True):
256
- # Extract Claude-specific metadata from extra fields
257
- tool_use_id = tool_call.id
258
- if not tool_use_id:
259
- logger.warning("No tool_use_id found for %s", tool_call.name)
260
- continue
261
-
262
- # Convert MCP tool results to Claude format
263
- claude_blocks = []
264
-
265
- if result.isError:
266
- # Extract error message from content
267
- error_msg = "Tool execution failed"
268
- for content in result.content:
269
- if isinstance(content, types.TextContent):
270
- error_msg = content.text
271
- break
272
- claude_blocks.append(text_to_content_block(f"Error: {error_msg}"))
273
- else:
274
- # Process success content
275
- for content in result.content:
276
- if isinstance(content, types.TextContent):
277
- claude_blocks.append(text_to_content_block(content.text))
278
- elif isinstance(content, types.ImageContent):
279
- claude_blocks.append(base64_to_content_block(content.data))
280
-
281
- # Add tool result
282
- user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
283
-
284
- # Return as a user message containing all tool results
285
- return [
286
- cast(
287
- "BetaMessageParam",
288
- {
289
- "role": "user",
290
- "content": user_content,
291
- },
292
- )
293
- ]
294
-
295
- async def create_user_message(self, text: str) -> BetaMessageParam:
296
- """Create a user message in Claude's format."""
297
- return cast("BetaMessageParam", {"role": "user", "content": text})
298
-
299
- def _convert_tools_for_claude(self) -> list[dict]:
300
- """Convert MCP tools to Claude tool format."""
301
- claude_tools = []
302
- self._claude_to_mcp_tool_map = {} # Reset mapping
303
-
304
- for tool in self._available_tools:
305
- # Special handling for computer use tools
306
- if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
307
- # Use Claude's native computer use format with configurable dimensions
308
- claude_tool = {
309
- "type": "computer_20250124",
310
- "name": "computer",
311
- "display_width_px": self.metadata["display_width"],
312
- "display_height_px": self.metadata["display_height"],
313
- }
314
- # Map Claude's "computer" back to the actual MCP tool name
315
- self._claude_to_mcp_tool_map["computer"] = tool.name
316
- elif tool.name not in self.lifecycle_tools:
317
- # Convert regular tools
318
- claude_tool = {
319
- "name": tool.name,
320
- "description": tool.description or f"Execute {tool.name}",
321
- "input_schema": tool.inputSchema
322
- or {
323
- "type": "object",
324
- "properties": {},
325
- },
326
- }
327
- # Direct mapping for non-computer tools
328
- self._claude_to_mcp_tool_map[tool.name] = tool.name
329
- else:
330
- continue
331
-
332
- claude_tools.append(claude_tool)
333
-
334
- self.claude_tools = claude_tools
335
- return claude_tools
336
-
337
- def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
338
- """Add prompt caching to messages."""
339
- messages_cached = copy.deepcopy(messages)
340
-
341
- # Mark last user message with cache control
342
- if messages_cached and messages_cached[-1].get("role") == "user":
343
- last_content = messages_cached[-1]["content"]
344
- # Content is formatted to be list of ContentBlock in format_blocks and format_message
345
- if isinstance(last_content, list):
346
- for block in last_content:
347
- # Only add cache control to block types that support it
348
- block_type = block.get("type")
349
- if block_type in ["text", "image", "tool_use", "tool_result"]:
350
- cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
351
- block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
352
-
353
- return messages_cached
354
-
355
-
356
- def base64_to_content_block(base64: str) -> BetaImageBlockParam:
357
- """Convert base64 image to Claude content block."""
358
- return {
359
- "type": "image",
360
- "source": {"type": "base64", "media_type": "image/png", "data": base64},
361
- }
362
-
363
-
364
- def text_to_content_block(text: str) -> BetaTextBlockParam:
365
- """Convert text to Claude content block."""
366
- return {"type": "text", "text": text}
367
-
368
-
369
- def tool_use_content_block(
370
- tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
371
- ) -> BetaToolResultBlockParam:
372
- """Create tool result content block."""
373
- return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
1
+ """Claude MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import logging
7
+ from typing import TYPE_CHECKING, Any, ClassVar, cast
8
+
9
+ from anthropic import AsyncAnthropic, BadRequestError
10
+ from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
11
+
12
+ import hud
13
+
14
+ if TYPE_CHECKING:
15
+ from anthropic.types.beta import (
16
+ BetaCacheControlEphemeralParam,
17
+ BetaContentBlockParam,
18
+ BetaImageBlockParam,
19
+ BetaMessageParam,
20
+ BetaTextBlockParam,
21
+ BetaToolResultBlockParam,
22
+ )
23
+
24
+ from hud.datasets import Task
25
+
26
+ import mcp.types as types
27
+
28
+ from hud.settings import settings
29
+ from hud.tools.computer.settings import computer_settings
30
+ from hud.types import AgentResponse, MCPToolCall, MCPToolResult
31
+
32
+ from .base import MCPAgent
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class ClaudeAgent(MCPAgent):
38
+ """
39
+ Claude agent that uses MCP servers for tool execution.
40
+
41
+ This agent uses Claude's native tool calling capabilities but executes
42
+ tools through MCP servers instead of direct implementation.
43
+ """
44
+
45
+ metadata: ClassVar[dict[str, Any]] = {
46
+ "display_width": computer_settings.ANTHROPIC_COMPUTER_WIDTH,
47
+ "display_height": computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
48
+ }
49
+
50
+ def __init__(
51
+ self,
52
+ model_client: AsyncAnthropic | None = None,
53
+ model: str = "claude-3-7-sonnet-20250219",
54
+ max_tokens: int = 4096,
55
+ use_computer_beta: bool = True,
56
+ **kwargs: Any,
57
+ ) -> None:
58
+ """
59
+ Initialize Claude MCP agent.
60
+
61
+ Args:
62
+ model_client: AsyncAnthropic client (created if not provided)
63
+ model: Claude model to use
64
+ max_tokens: Maximum tokens for response
65
+ use_computer_beta: Whether to use computer-use beta features
66
+ **kwargs: Additional arguments passed to BaseMCPAgent (including mcp_client)
67
+ """
68
+ super().__init__(**kwargs)
69
+
70
+ # Initialize client if not provided
71
+ if model_client is None:
72
+ api_key = settings.anthropic_api_key
73
+ if not api_key:
74
+ raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
75
+ model_client = AsyncAnthropic(api_key=api_key)
76
+
77
+ self.anthropic_client = model_client
78
+ self.model = model
79
+ self.max_tokens = max_tokens
80
+ self.use_computer_beta = use_computer_beta
81
+
82
+ self.model_name = self.model
83
+
84
+ # Track mapping from Claude tool names to MCP tool names
85
+ self._claude_to_mcp_tool_map: dict[str, str] = {}
86
+ self.claude_tools: list[dict] = []
87
+
88
+ # Base system prompt for autonomous operation
89
+ self.system_prompt = """
90
+ You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
91
+
92
+ When working on tasks:
93
+ 1. Be thorough and systematic in your approach
94
+ 2. Complete tasks autonomously without asking for confirmation
95
+ 3. Use available tools efficiently to accomplish your goals
96
+ 4. Verify your actions and ensure task completion
97
+ 5. Be precise and accurate in all operations
98
+
99
+ Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
100
+ """.strip() # noqa: E501
101
+
102
+ async def initialize(self, task: str | Task | None = None) -> None:
103
+ """Initialize the agent and build tool mappings."""
104
+ await super().initialize(task)
105
+ # Build tool mappings after tools are discovered
106
+ self._convert_tools_for_claude()
107
+
108
+ async def get_system_messages(self) -> list[Any]:
109
+ """No system messages for Claude because applied in get_response"""
110
+ return []
111
+
112
+ async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
113
+ """Format messages for Claude."""
114
+ # Convert MCP content types to Anthropic content types
115
+ anthropic_blocks: list[BetaContentBlockParam] = []
116
+
117
+ for block in blocks:
118
+ if isinstance(block, types.TextContent):
119
+ # Only include fields that Anthropic expects
120
+ anthropic_blocks.append(
121
+ cast(
122
+ "BetaTextBlockParam",
123
+ {
124
+ "type": "text",
125
+ "text": block.text,
126
+ },
127
+ )
128
+ )
129
+ elif isinstance(block, types.ImageContent):
130
+ # Convert MCP ImageContent to Anthropic format
131
+ anthropic_blocks.append(
132
+ cast(
133
+ "BetaImageBlockParam",
134
+ {
135
+ "type": "image",
136
+ "source": {
137
+ "type": "base64",
138
+ "media_type": block.mimeType,
139
+ "data": block.data,
140
+ },
141
+ },
142
+ )
143
+ )
144
+ else:
145
+ # For other types, try to cast but log a warning
146
+ logger.warning("Unknown content block type: %s", type(block))
147
+ anthropic_blocks.append(cast("BetaContentBlockParam", block))
148
+
149
+ return [
150
+ cast(
151
+ "BetaMessageParam",
152
+ {
153
+ "role": "user",
154
+ "content": anthropic_blocks,
155
+ },
156
+ )
157
+ ]
158
+
159
+ @hud.instrument(
160
+ span_type="agent",
161
+ record_args=False, # Messages can be large
162
+ record_result=True,
163
+ )
164
+ async def get_response(self, messages: list[BetaMessageParam]) -> AgentResponse:
165
+ """Get response from Claude including any tool calls."""
166
+
167
+ # Make API call with retry for prompt length
168
+ current_messages = messages.copy()
169
+
170
+ while True:
171
+ messages_cached = self._add_prompt_caching(current_messages)
172
+
173
+ # Build create kwargs
174
+ create_kwargs = {
175
+ "model": self.model,
176
+ "max_tokens": self.max_tokens,
177
+ "system": self.system_prompt,
178
+ "messages": messages_cached,
179
+ "tools": self.claude_tools,
180
+ "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
181
+ }
182
+
183
+ # Add beta features if using computer tools
184
+ if self.use_computer_beta and any(
185
+ tool.get("type") == "computer_20250124" for tool in self.claude_tools
186
+ ):
187
+ create_kwargs["betas"] = ["computer-use-2025-01-24"]
188
+
189
+ try:
190
+ response = await self.anthropic_client.beta.messages.create(**create_kwargs)
191
+ break
192
+ except BadRequestError as e:
193
+ if e.message.startswith("prompt is too long"):
194
+ logger.warning("Prompt too long, truncating message history")
195
+ # Keep first message and last 20 messages
196
+ if len(current_messages) > 21:
197
+ current_messages = [current_messages[0], *current_messages[-20:]]
198
+ else:
199
+ raise
200
+ else:
201
+ raise
202
+
203
+ messages.append(
204
+ cast(
205
+ "BetaMessageParam",
206
+ {
207
+ "role": "assistant",
208
+ "content": response.content,
209
+ },
210
+ )
211
+ )
212
+
213
+ # Process response
214
+ result = AgentResponse(content="", tool_calls=[], done=True)
215
+
216
+ # Extract text content and reasoning
217
+ text_content = ""
218
+ thinking_content = ""
219
+
220
+ for block in response.content:
221
+ if block.type == "tool_use":
222
+ # Map Claude tool name back to MCP tool name
223
+ mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
224
+
225
+ # Create MCPToolCall object with Claude metadata as extra fields
226
+ # Pyright will complain but the tool class accepts extra fields
227
+ tool_call = MCPToolCall(
228
+ id=block.id, # canonical identifier for telemetry
229
+ name=mcp_tool_name,
230
+ arguments=block.input,
231
+ claude_name=block.name, # type: ignore
232
+ )
233
+ result.tool_calls.append(tool_call)
234
+ result.done = False
235
+ elif block.type == "text":
236
+ text_content += block.text
237
+ elif hasattr(block, "type") and block.type == "thinking":
238
+ thinking_content += f"Thinking: {block.thinking}\n"
239
+
240
+ # Combine text and thinking for final content
241
+ if thinking_content:
242
+ result.content = thinking_content + text_content
243
+ else:
244
+ result.content = text_content
245
+
246
+ return result
247
+
248
+ async def format_tool_results(
249
+ self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
250
+ ) -> list[BetaMessageParam]:
251
+ """Format tool results into Claude messages."""
252
+ # Process each tool result
253
+ user_content = []
254
+
255
+ for tool_call, result in zip(tool_calls, tool_results, strict=True):
256
+ # Extract Claude-specific metadata from extra fields
257
+ tool_use_id = tool_call.id
258
+ if not tool_use_id:
259
+ logger.warning("No tool_use_id found for %s", tool_call.name)
260
+ continue
261
+
262
+ # Convert MCP tool results to Claude format
263
+ claude_blocks = []
264
+
265
+ if result.isError:
266
+ # Extract error message from content
267
+ error_msg = "Tool execution failed"
268
+ for content in result.content:
269
+ if isinstance(content, types.TextContent):
270
+ error_msg = content.text
271
+ break
272
+ claude_blocks.append(text_to_content_block(f"Error: {error_msg}"))
273
+ else:
274
+ # Process success content
275
+ for content in result.content:
276
+ if isinstance(content, types.TextContent):
277
+ claude_blocks.append(text_to_content_block(content.text))
278
+ elif isinstance(content, types.ImageContent):
279
+ claude_blocks.append(base64_to_content_block(content.data))
280
+
281
+ # Add tool result
282
+ user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
283
+
284
+ # Return as a user message containing all tool results
285
+ return [
286
+ cast(
287
+ "BetaMessageParam",
288
+ {
289
+ "role": "user",
290
+ "content": user_content,
291
+ },
292
+ )
293
+ ]
294
+
295
+ async def create_user_message(self, text: str) -> BetaMessageParam:
296
+ """Create a user message in Claude's format."""
297
+ return cast("BetaMessageParam", {"role": "user", "content": text})
298
+
299
+ def _convert_tools_for_claude(self) -> list[dict]:
300
+ """Convert MCP tools to Claude tool format."""
301
+ claude_tools = []
302
+ self._claude_to_mcp_tool_map = {} # Reset mapping
303
+
304
+ for tool in self._available_tools:
305
+ # Special handling for computer use tools
306
+ if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
307
+ # Use Claude's native computer use format with configurable dimensions
308
+ claude_tool = {
309
+ "type": "computer_20250124",
310
+ "name": "computer",
311
+ "display_width_px": self.metadata["display_width"],
312
+ "display_height_px": self.metadata["display_height"],
313
+ }
314
+ # Map Claude's "computer" back to the actual MCP tool name
315
+ self._claude_to_mcp_tool_map["computer"] = tool.name
316
+ elif tool.name not in self.lifecycle_tools:
317
+ # Convert regular tools
318
+ claude_tool = {
319
+ "name": tool.name,
320
+ "description": tool.description or f"Execute {tool.name}",
321
+ "input_schema": tool.inputSchema
322
+ or {
323
+ "type": "object",
324
+ "properties": {},
325
+ },
326
+ }
327
+ # Direct mapping for non-computer tools
328
+ self._claude_to_mcp_tool_map[tool.name] = tool.name
329
+ else:
330
+ continue
331
+
332
+ claude_tools.append(claude_tool)
333
+
334
+ self.claude_tools = claude_tools
335
+ return claude_tools
336
+
337
+ def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
338
+ """Add prompt caching to messages."""
339
+ messages_cached = copy.deepcopy(messages)
340
+
341
+ # Mark last user message with cache control
342
+ if messages_cached and messages_cached[-1].get("role") == "user":
343
+ last_content = messages_cached[-1]["content"]
344
+ # Content is formatted to be list of ContentBlock in format_blocks and format_message
345
+ if isinstance(last_content, list):
346
+ for block in last_content:
347
+ # Only add cache control to block types that support it
348
+ block_type = block.get("type")
349
+ if block_type in ["text", "image", "tool_use", "tool_result"]:
350
+ cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
351
+ block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
352
+
353
+ return messages_cached
354
+
355
+
356
+ def base64_to_content_block(base64: str) -> BetaImageBlockParam:
357
+ """Convert base64 image to Claude content block."""
358
+ return {
359
+ "type": "image",
360
+ "source": {"type": "base64", "media_type": "image/png", "data": base64},
361
+ }
362
+
363
+
364
+ def text_to_content_block(text: str) -> BetaTextBlockParam:
365
+ """Convert text to Claude content block."""
366
+ return {"type": "text", "text": text}
367
+
368
+
369
+ def tool_use_content_block(
370
+ tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
371
+ ) -> BetaToolResultBlockParam:
372
+ """Create tool result content block."""
373
+ return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}