hud-python 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (64) hide show
  1. hud/__init__.py +14 -5
  2. hud/env/docker_client.py +1 -1
  3. hud/env/environment.py +10 -7
  4. hud/env/local_docker_client.py +1 -1
  5. hud/env/remote_client.py +1 -1
  6. hud/env/remote_docker_client.py +2 -2
  7. hud/exceptions.py +2 -1
  8. hud/mcp_agent/__init__.py +15 -0
  9. hud/mcp_agent/base.py +723 -0
  10. hud/mcp_agent/claude.py +316 -0
  11. hud/mcp_agent/langchain.py +231 -0
  12. hud/mcp_agent/openai.py +318 -0
  13. hud/mcp_agent/tests/__init__.py +1 -0
  14. hud/mcp_agent/tests/test_base.py +437 -0
  15. hud/settings.py +14 -2
  16. hud/task.py +4 -0
  17. hud/telemetry/__init__.py +11 -7
  18. hud/telemetry/_trace.py +82 -71
  19. hud/telemetry/context.py +9 -27
  20. hud/telemetry/exporter.py +6 -5
  21. hud/telemetry/instrumentation/mcp.py +174 -410
  22. hud/telemetry/mcp_models.py +13 -74
  23. hud/telemetry/tests/test_context.py +9 -6
  24. hud/telemetry/tests/test_trace.py +92 -61
  25. hud/tools/__init__.py +21 -0
  26. hud/tools/base.py +65 -0
  27. hud/tools/bash.py +137 -0
  28. hud/tools/computer/__init__.py +13 -0
  29. hud/tools/computer/anthropic.py +411 -0
  30. hud/tools/computer/hud.py +315 -0
  31. hud/tools/computer/openai.py +283 -0
  32. hud/tools/edit.py +290 -0
  33. hud/tools/executors/__init__.py +13 -0
  34. hud/tools/executors/base.py +331 -0
  35. hud/tools/executors/pyautogui.py +585 -0
  36. hud/tools/executors/tests/__init__.py +1 -0
  37. hud/tools/executors/tests/test_base_executor.py +338 -0
  38. hud/tools/executors/tests/test_pyautogui_executor.py +162 -0
  39. hud/tools/executors/xdo.py +503 -0
  40. hud/tools/helper/README.md +56 -0
  41. hud/tools/helper/__init__.py +9 -0
  42. hud/tools/helper/mcp_server.py +78 -0
  43. hud/tools/helper/server_initialization.py +115 -0
  44. hud/tools/helper/utils.py +58 -0
  45. hud/tools/playwright_tool.py +373 -0
  46. hud/tools/tests/__init__.py +3 -0
  47. hud/tools/tests/test_bash.py +152 -0
  48. hud/tools/tests/test_computer.py +52 -0
  49. hud/tools/tests/test_computer_actions.py +34 -0
  50. hud/tools/tests/test_edit.py +233 -0
  51. hud/tools/tests/test_init.py +27 -0
  52. hud/tools/tests/test_playwright_tool.py +183 -0
  53. hud/tools/tests/test_tools.py +154 -0
  54. hud/tools/tests/test_utils.py +156 -0
  55. hud/tools/utils.py +50 -0
  56. hud/types.py +10 -1
  57. hud/utils/tests/test_init.py +21 -0
  58. hud/utils/tests/test_version.py +1 -1
  59. hud/version.py +1 -1
  60. {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/METADATA +9 -6
  61. hud_python-0.3.0.dist-info/RECORD +124 -0
  62. hud_python-0.2.10.dist-info/RECORD +0 -85
  63. {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/WHEEL +0 -0
  64. {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,316 @@
1
+ """Claude MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import logging
7
+ from typing import TYPE_CHECKING, Any, cast
8
+
9
+ from anthropic import AsyncAnthropic, BadRequestError
10
+
11
+ if TYPE_CHECKING:
12
+ from anthropic.types.beta import (
13
+ BetaCacheControlEphemeralParam,
14
+ BetaImageBlockParam,
15
+ BetaMessageParam,
16
+ BetaTextBlockParam,
17
+ BetaToolResultBlockParam,
18
+ )
19
+
20
+ from hud.settings import settings
21
+
22
+ from .base import BaseMCPAgent
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def base64_to_content_block(base64: str) -> BetaImageBlockParam:
28
+ """Convert base64 image to Claude content block."""
29
+ return {
30
+ "type": "image",
31
+ "source": {"type": "base64", "media_type": "image/png", "data": base64},
32
+ }
33
+
34
+
35
+ def text_to_content_block(text: str) -> BetaTextBlockParam:
36
+ """Convert text to Claude content block."""
37
+ return {"type": "text", "text": text}
38
+
39
+
40
+ def tool_use_content_block(
41
+ tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
42
+ ) -> BetaToolResultBlockParam:
43
+ """Create tool result content block."""
44
+ return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
45
+
46
+
47
+ class ClaudeMCPAgent(BaseMCPAgent):
48
+ """
49
+ Claude agent that uses MCP servers for tool execution.
50
+
51
+ This agent uses Claude's native tool calling capabilities but executes
52
+ tools through MCP servers instead of direct implementation.
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ model_client: AsyncAnthropic | None = None,
58
+ model: str = "claude-3-7-sonnet-20250219",
59
+ max_tokens: int = 4096,
60
+ display_width_px: int = 1400,
61
+ display_height_px: int = 850,
62
+ use_computer_beta: bool = True,
63
+ **kwargs: Any,
64
+ ) -> None:
65
+ """
66
+ Initialize Claude MCP agent.
67
+
68
+ Args:
69
+ client: AsyncAnthropic client (created if not provided)
70
+ model: Claude model to use
71
+ max_tokens: Maximum tokens for response
72
+ display_width_px: Display width for computer use tools
73
+ display_height_px: Display height for computer use tools
74
+ use_computer_beta: Whether to use computer-use beta features
75
+ **kwargs: Additional arguments passed to BaseMCPAgent
76
+ """
77
+ super().__init__(**kwargs)
78
+
79
+ # Initialize client if not provided
80
+ if model_client is None:
81
+ api_key = settings.anthropic_api_key
82
+ if not api_key:
83
+ raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
84
+ model_client = AsyncAnthropic(api_key=api_key)
85
+
86
+ self.anthropic_client = model_client
87
+ self.model = model
88
+ self.max_tokens = max_tokens
89
+ self.display_width_px = display_width_px
90
+ self.display_height_px = display_height_px
91
+ self.use_computer_beta = use_computer_beta
92
+
93
+ # Track mapping from Claude tool names to MCP tool names
94
+ self._claude_to_mcp_tool_map: dict[str, str] = {}
95
+
96
+ async def initialize(self) -> None:
97
+ """Initialize the agent and build tool mappings."""
98
+ await super().initialize()
99
+ # Build tool mappings after tools are discovered
100
+ self._convert_tools_for_claude()
101
+
102
+ async def create_initial_messages(
103
+ self, prompt: str, screenshot: str | None
104
+ ) -> list[BetaMessageParam]:
105
+ """Create initial messages for Claude."""
106
+ user_content: list[BetaImageBlockParam | BetaTextBlockParam] = []
107
+
108
+ # Add prompt text
109
+ user_content.append(text_to_content_block(prompt))
110
+
111
+ # Add screenshot if available
112
+ if screenshot:
113
+ user_content.append(base64_to_content_block(screenshot))
114
+
115
+ # Return initial user message
116
+ return [
117
+ cast(
118
+ "BetaMessageParam",
119
+ {
120
+ "role": "user",
121
+ "content": user_content,
122
+ },
123
+ )
124
+ ]
125
+
126
+ async def get_model_response(
127
+ self, messages: list[BetaMessageParam], step: int
128
+ ) -> dict[str, Any]:
129
+ """Get response from Claude including any tool calls."""
130
+ # Get Claude tools
131
+ claude_tools = self._convert_tools_for_claude()
132
+
133
+ # Make API call with retry for prompt length
134
+ current_messages = messages.copy()
135
+
136
+ while True:
137
+ messages_cached = self._add_prompt_caching(current_messages)
138
+
139
+ # Build create kwargs
140
+ create_kwargs = {
141
+ "model": self.model,
142
+ "max_tokens": self.max_tokens,
143
+ "system": self.get_system_prompt(),
144
+ "messages": messages_cached,
145
+ "tools": claude_tools,
146
+ "tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
147
+ }
148
+
149
+ # Add beta features if using computer tools
150
+ if self.use_computer_beta and any(
151
+ t.get("type") == "computer_20250124" for t in claude_tools
152
+ ):
153
+ create_kwargs["betas"] = ["computer-use-2025-01-24"]
154
+
155
+ try:
156
+ response = await self.anthropic_client.beta.messages.create(**create_kwargs)
157
+ break
158
+ except BadRequestError as e:
159
+ if e.message.startswith("prompt is too long"):
160
+ logger.warning("Prompt too long, truncating message history")
161
+ # Keep first message and last 20 messages
162
+ if len(current_messages) > 21:
163
+ current_messages = [current_messages[0]] + current_messages[-20:]
164
+ else:
165
+ raise
166
+ else:
167
+ raise
168
+
169
+ # Add assistant response to messages (for next step)
170
+ messages.append(
171
+ cast(
172
+ "BetaMessageParam",
173
+ {
174
+ "role": "assistant",
175
+ "content": response.content,
176
+ },
177
+ )
178
+ )
179
+
180
+ # Process response
181
+ result = {
182
+ "content": "",
183
+ "tool_calls": [],
184
+ "done": True,
185
+ "raw_response": response.model_dump(), # For debugging
186
+ }
187
+
188
+ # Extract text content and reasoning
189
+ text_content = ""
190
+ thinking_content = ""
191
+
192
+ for block in response.content:
193
+ if block.type == "tool_use":
194
+ # Map Claude tool name back to MCP tool name
195
+ mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
196
+
197
+ # Include the tool_use_id in the tool call for later reference
198
+ result["tool_calls"].append(
199
+ {
200
+ "name": mcp_tool_name, # Use MCP tool name for execution
201
+ "arguments": block.input,
202
+ "tool_use_id": block.id, # Claude-specific metadata
203
+ "claude_name": block.name, # Keep original Claude name for reference
204
+ }
205
+ )
206
+ result["done"] = False
207
+ elif block.type == "text":
208
+ text_content += block.text
209
+ elif hasattr(block, "type") and block.type == "thinking":
210
+ thinking_content += f"Thinking: {block.thinking}\n"
211
+
212
+ # Combine text and thinking for final content
213
+ if thinking_content:
214
+ result["content"] = thinking_content + text_content
215
+ else:
216
+ result["content"] = text_content
217
+
218
+ return result
219
+
220
+ async def format_tool_results(
221
+ self, processed_results: dict[str, Any], tool_calls: list[dict]
222
+ ) -> list[BetaMessageParam]:
223
+ """Format tool results into Claude messages."""
224
+ # Build a mapping of tool_name to tool_use_id from the original calls
225
+ tool_id_map = {}
226
+ for tool_call in tool_calls:
227
+ if "tool_use_id" in tool_call:
228
+ tool_id_map[tool_call["name"]] = tool_call["tool_use_id"]
229
+
230
+ # Process each tool result
231
+ user_content = []
232
+
233
+ for tool_name, content_blocks in processed_results["results"]:
234
+ # Get the tool_use_id for this tool
235
+ tool_use_id = tool_id_map.get(tool_name)
236
+ if not tool_use_id:
237
+ logger.warning("No tool_use_id found for %s", tool_name)
238
+ continue
239
+
240
+ # Convert content blocks to Claude format
241
+ claude_blocks = []
242
+ for block in content_blocks:
243
+ if block["type"] == "text":
244
+ claude_blocks.append(text_to_content_block(block["text"]))
245
+ elif block["type"] == "error":
246
+ claude_blocks.append(text_to_content_block(f"Error: {block['text']}"))
247
+ elif block["type"] == "image":
248
+ claude_blocks.append(base64_to_content_block(block["data"]))
249
+
250
+ # Add tool result
251
+ user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
252
+
253
+ # Return as a user message containing all tool results
254
+ return [
255
+ cast(
256
+ "BetaMessageParam",
257
+ {
258
+ "role": "user",
259
+ "content": user_content,
260
+ },
261
+ )
262
+ ]
263
+
264
+ async def create_user_message(self, text: str) -> BetaMessageParam:
265
+ """Create a user message in Claude's format."""
266
+ return cast("BetaMessageParam", {"role": "user", "content": text})
267
+
268
+ def _convert_tools_for_claude(self) -> list[dict]:
269
+ """Convert MCP tools to Claude tool format."""
270
+ claude_tools = []
271
+ self._claude_to_mcp_tool_map = {} # Reset mapping
272
+
273
+ for tool in self._available_tools:
274
+ # Special handling for computer use tools
275
+ if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
276
+ # Use Claude's native computer use format with configurable dimensions
277
+ claude_tool = {
278
+ "type": "computer_20250124",
279
+ "name": "computer",
280
+ "display_width_px": self.display_width_px,
281
+ "display_height_px": self.display_height_px,
282
+ }
283
+ # Map Claude's "computer" back to the actual MCP tool name
284
+ self._claude_to_mcp_tool_map["computer"] = tool.name
285
+ else:
286
+ # Convert regular tools
287
+ claude_tool = {
288
+ "name": tool.name,
289
+ "description": tool.description or f"Execute {tool.name}",
290
+ "input_schema": tool.inputSchema
291
+ or {
292
+ "type": "object",
293
+ "properties": {},
294
+ },
295
+ }
296
+ # Direct mapping for non-computer tools
297
+ self._claude_to_mcp_tool_map[tool.name] = tool.name
298
+
299
+ claude_tools.append(claude_tool)
300
+
301
+ return claude_tools
302
+
303
+ def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
304
+ """Add prompt caching to messages."""
305
+ messages_cached = copy.deepcopy(messages)
306
+
307
+ # Mark last user message with cache control
308
+ if messages_cached and messages_cached[-1].get("role") == "user":
309
+ last_content = messages_cached[-1]["content"]
310
+ if isinstance(last_content, list):
311
+ for block in last_content:
312
+ if block.get("type") not in ["thinking", "redacted_thinking"]:
313
+ cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
314
+ block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
315
+
316
+ return messages_cached
@@ -0,0 +1,231 @@
1
+ """LangChain MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from langchain.agents import AgentExecutor, create_tool_calling_agent
9
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
10
+ from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage
11
+ from mcp_use.adapters.langchain_adapter import LangChainAdapter
12
+
13
+ if TYPE_CHECKING:
14
+ from langchain.schema.language_model import BaseLanguageModel
15
+ from langchain_core.tools import BaseTool
16
+ from .base import BaseMCPAgent
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class LangChainMCPAgent(BaseMCPAgent):
22
+ """
23
+ LangChain agent that uses MCP servers for tool execution.
24
+
25
+ This agent wraps any LangChain-compatible LLM and provides
26
+ access to MCP tools through LangChain's tool-calling interface.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ llm: BaseLanguageModel,
32
+ **kwargs: Any,
33
+ ) -> None:
34
+ """
35
+ Initialize LangChain MCP agent.
36
+
37
+ Args:
38
+ llm: Any LangChain-compatible language model
39
+ **kwargs: Additional arguments passed to BaseMCPAgent
40
+ """
41
+ super().__init__(**kwargs)
42
+
43
+ self.llm = llm
44
+ self.adapter = LangChainAdapter(disallowed_tools=self.disallowed_tools)
45
+ self._langchain_tools: list[BaseTool] | None = None
46
+
47
+ def _get_langchain_tools(self) -> list[BaseTool]:
48
+ """Get or create LangChain tools from MCP tools."""
49
+ if self._langchain_tools is not None:
50
+ return self._langchain_tools
51
+
52
+ # Create LangChain tools from MCP tools using the adapter
53
+ self._langchain_tools = []
54
+
55
+ # Get tools grouped by connector
56
+ tools_by_connector = self.get_tools_by_connector()
57
+
58
+ # Convert tools using the adapter
59
+ for connector, tools in tools_by_connector.items():
60
+ langchain_tools = self.adapter._convert_tools(tools, connector) # type: ignore[reportAttributeAccessIssue]
61
+ self._langchain_tools.extend(langchain_tools)
62
+
63
+ logger.info("Created %s LangChain tools from MCP tools", len(self._langchain_tools))
64
+ return self._langchain_tools
65
+
66
+ async def create_initial_messages(
67
+ self, prompt: str, screenshot: str | None
68
+ ) -> list[BaseMessage]:
69
+ """Create initial messages for LangChain."""
70
+ messages = []
71
+
72
+ # Add system message
73
+ system_prompt = self.get_system_prompt()
74
+ messages.append(SystemMessage(content=system_prompt))
75
+
76
+ # Add user message with prompt and optional screenshot
77
+ if screenshot:
78
+ # For multimodal models, include the image
79
+ content = [
80
+ {"type": "text", "text": prompt},
81
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot}"}},
82
+ ]
83
+ messages.append(HumanMessage(content=content))
84
+ else:
85
+ messages.append(HumanMessage(content=prompt))
86
+
87
+ return messages
88
+
89
+ async def get_model_response(self, messages: list[BaseMessage], step: int) -> dict[str, Any]:
90
+ """Get response from LangChain model including any tool calls."""
91
+ # Get LangChain tools (created lazily)
92
+ langchain_tools = self._get_langchain_tools()
93
+
94
+ # Create a prompt template from current messages
95
+ # Extract system message if present
96
+ system_content = "You are a helpful assistant"
97
+ non_system_messages = []
98
+
99
+ for msg in messages:
100
+ if isinstance(msg, SystemMessage):
101
+ system_content = str(msg.content)
102
+ else:
103
+ non_system_messages.append(msg)
104
+
105
+ # Create prompt with placeholders
106
+ prompt = ChatPromptTemplate.from_messages(
107
+ [
108
+ ("system", system_content),
109
+ MessagesPlaceholder(variable_name="chat_history"),
110
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
111
+ ]
112
+ )
113
+
114
+ # Create agent with tools
115
+ agent = create_tool_calling_agent(
116
+ llm=self.llm,
117
+ tools=langchain_tools,
118
+ prompt=prompt,
119
+ )
120
+
121
+ # Create executor
122
+ executor = AgentExecutor(
123
+ agent=agent,
124
+ tools=langchain_tools,
125
+ verbose=False,
126
+ )
127
+
128
+ # Format the last user message as input
129
+ last_user_msg = None
130
+ for msg in reversed(non_system_messages):
131
+ if isinstance(msg, HumanMessage):
132
+ last_user_msg = msg
133
+ break
134
+
135
+ if not last_user_msg:
136
+ return {
137
+ "content": "No user message found",
138
+ "tool_calls": [],
139
+ "done": True,
140
+ }
141
+
142
+ # Extract text from message content
143
+ input_text = ""
144
+ if isinstance(last_user_msg.content, str):
145
+ input_text = last_user_msg.content
146
+ elif isinstance(last_user_msg.content, list):
147
+ # Extract text from multimodal content
148
+ for item in last_user_msg.content:
149
+ if isinstance(item, dict) and item.get("type") == "text":
150
+ input_text = item.get("text", "")
151
+ break
152
+
153
+ # Build chat history (exclude last user message and system)
154
+ chat_history = []
155
+ for _, msg in enumerate(non_system_messages[:-1]):
156
+ if isinstance(msg, HumanMessage | AIMessage):
157
+ chat_history.append(msg)
158
+
159
+ # Execute the agent
160
+ try:
161
+ result = await executor.ainvoke(
162
+ {
163
+ "input": input_text,
164
+ "chat_history": chat_history,
165
+ }
166
+ )
167
+
168
+ # Process the result
169
+ output = result.get("output", "")
170
+
171
+ # Check if tools were called
172
+ if result.get("intermediate_steps"):
173
+ # Tools were called
174
+ tool_calls = []
175
+ for action, _ in result["intermediate_steps"]:
176
+ if hasattr(action, "tool") and hasattr(action, "tool_input"):
177
+ tool_calls.append(
178
+ {
179
+ "name": action.tool,
180
+ "arguments": action.tool_input,
181
+ }
182
+ )
183
+
184
+ return {
185
+ "content": output,
186
+ "tool_calls": tool_calls,
187
+ "done": False, # Continue if tools were called
188
+ }
189
+ else:
190
+ # No tools called, just text response
191
+ return {
192
+ "content": output,
193
+ "tool_calls": [],
194
+ "done": True,
195
+ }
196
+
197
+ except Exception as e:
198
+ logger.error("Agent execution failed: %s", e)
199
+ return {
200
+ "content": f"Error: {e!s}",
201
+ "tool_calls": [],
202
+ "done": True,
203
+ }
204
+
205
+ async def format_tool_results(
206
+ self, processed_results: dict[str, Any], tool_calls: list[dict]
207
+ ) -> list[BaseMessage]:
208
+ """Format tool results into LangChain messages."""
209
+ # Create an AI message with the tool calls and results
210
+ messages = []
211
+
212
+ # First add an AI message indicating tools were called
213
+ tool_names = [tc["name"] for tc in tool_calls]
214
+ ai_content = f"I'll use the following tools: {', '.join(tool_names)}"
215
+ messages.append(AIMessage(content=ai_content))
216
+
217
+ # Then add a human message with the tool results
218
+ result_text = processed_results["text"]
219
+ screenshot = processed_results.get("screenshot")
220
+
221
+ if screenshot:
222
+ # Include screenshot in multimodal format
223
+ content = [
224
+ {"type": "text", "text": f"Tool results:\n{result_text}"},
225
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot}"}},
226
+ ]
227
+ messages.append(HumanMessage(content=content))
228
+ else:
229
+ messages.append(HumanMessage(content=f"Tool results:\n{result_text}"))
230
+
231
+ return messages