hud-python 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +14 -5
- hud/env/docker_client.py +1 -1
- hud/env/environment.py +10 -7
- hud/env/local_docker_client.py +1 -1
- hud/env/remote_client.py +1 -1
- hud/env/remote_docker_client.py +2 -2
- hud/exceptions.py +2 -1
- hud/mcp_agent/__init__.py +15 -0
- hud/mcp_agent/base.py +723 -0
- hud/mcp_agent/claude.py +316 -0
- hud/mcp_agent/langchain.py +231 -0
- hud/mcp_agent/openai.py +318 -0
- hud/mcp_agent/tests/__init__.py +1 -0
- hud/mcp_agent/tests/test_base.py +437 -0
- hud/settings.py +14 -2
- hud/task.py +4 -0
- hud/telemetry/__init__.py +11 -7
- hud/telemetry/_trace.py +82 -71
- hud/telemetry/context.py +9 -27
- hud/telemetry/exporter.py +6 -5
- hud/telemetry/instrumentation/mcp.py +174 -410
- hud/telemetry/mcp_models.py +13 -74
- hud/telemetry/tests/test_context.py +9 -6
- hud/telemetry/tests/test_trace.py +92 -61
- hud/tools/__init__.py +21 -0
- hud/tools/base.py +65 -0
- hud/tools/bash.py +137 -0
- hud/tools/computer/__init__.py +13 -0
- hud/tools/computer/anthropic.py +411 -0
- hud/tools/computer/hud.py +315 -0
- hud/tools/computer/openai.py +283 -0
- hud/tools/edit.py +290 -0
- hud/tools/executors/__init__.py +13 -0
- hud/tools/executors/base.py +331 -0
- hud/tools/executors/pyautogui.py +585 -0
- hud/tools/executors/tests/__init__.py +1 -0
- hud/tools/executors/tests/test_base_executor.py +338 -0
- hud/tools/executors/tests/test_pyautogui_executor.py +162 -0
- hud/tools/executors/xdo.py +503 -0
- hud/tools/helper/README.md +56 -0
- hud/tools/helper/__init__.py +9 -0
- hud/tools/helper/mcp_server.py +78 -0
- hud/tools/helper/server_initialization.py +115 -0
- hud/tools/helper/utils.py +58 -0
- hud/tools/playwright_tool.py +373 -0
- hud/tools/tests/__init__.py +3 -0
- hud/tools/tests/test_bash.py +152 -0
- hud/tools/tests/test_computer.py +52 -0
- hud/tools/tests/test_computer_actions.py +34 -0
- hud/tools/tests/test_edit.py +233 -0
- hud/tools/tests/test_init.py +27 -0
- hud/tools/tests/test_playwright_tool.py +183 -0
- hud/tools/tests/test_tools.py +154 -0
- hud/tools/tests/test_utils.py +156 -0
- hud/tools/utils.py +50 -0
- hud/types.py +10 -1
- hud/utils/tests/test_init.py +21 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/METADATA +9 -6
- hud_python-0.3.0.dist-info/RECORD +124 -0
- hud_python-0.2.10.dist-info/RECORD +0 -85
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/WHEEL +0 -0
- {hud_python-0.2.10.dist-info → hud_python-0.3.0.dist-info}/licenses/LICENSE +0 -0
hud/mcp_agent/claude.py
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
"""Claude MCP Agent implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
8
|
+
|
|
9
|
+
from anthropic import AsyncAnthropic, BadRequestError
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from anthropic.types.beta import (
|
|
13
|
+
BetaCacheControlEphemeralParam,
|
|
14
|
+
BetaImageBlockParam,
|
|
15
|
+
BetaMessageParam,
|
|
16
|
+
BetaTextBlockParam,
|
|
17
|
+
BetaToolResultBlockParam,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from hud.settings import settings
|
|
21
|
+
|
|
22
|
+
from .base import BaseMCPAgent
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def base64_to_content_block(base64: str) -> BetaImageBlockParam:
|
|
28
|
+
"""Convert base64 image to Claude content block."""
|
|
29
|
+
return {
|
|
30
|
+
"type": "image",
|
|
31
|
+
"source": {"type": "base64", "media_type": "image/png", "data": base64},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def text_to_content_block(text: str) -> BetaTextBlockParam:
|
|
36
|
+
"""Convert text to Claude content block."""
|
|
37
|
+
return {"type": "text", "text": text}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def tool_use_content_block(
|
|
41
|
+
tool_use_id: str, content: list[BetaTextBlockParam | BetaImageBlockParam]
|
|
42
|
+
) -> BetaToolResultBlockParam:
|
|
43
|
+
"""Create tool result content block."""
|
|
44
|
+
return {"type": "tool_result", "tool_use_id": tool_use_id, "content": content}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ClaudeMCPAgent(BaseMCPAgent):
|
|
48
|
+
"""
|
|
49
|
+
Claude agent that uses MCP servers for tool execution.
|
|
50
|
+
|
|
51
|
+
This agent uses Claude's native tool calling capabilities but executes
|
|
52
|
+
tools through MCP servers instead of direct implementation.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
model_client: AsyncAnthropic | None = None,
|
|
58
|
+
model: str = "claude-3-7-sonnet-20250219",
|
|
59
|
+
max_tokens: int = 4096,
|
|
60
|
+
display_width_px: int = 1400,
|
|
61
|
+
display_height_px: int = 850,
|
|
62
|
+
use_computer_beta: bool = True,
|
|
63
|
+
**kwargs: Any,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Initialize Claude MCP agent.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
client: AsyncAnthropic client (created if not provided)
|
|
70
|
+
model: Claude model to use
|
|
71
|
+
max_tokens: Maximum tokens for response
|
|
72
|
+
display_width_px: Display width for computer use tools
|
|
73
|
+
display_height_px: Display height for computer use tools
|
|
74
|
+
use_computer_beta: Whether to use computer-use beta features
|
|
75
|
+
**kwargs: Additional arguments passed to BaseMCPAgent
|
|
76
|
+
"""
|
|
77
|
+
super().__init__(**kwargs)
|
|
78
|
+
|
|
79
|
+
# Initialize client if not provided
|
|
80
|
+
if model_client is None:
|
|
81
|
+
api_key = settings.anthropic_api_key
|
|
82
|
+
if not api_key:
|
|
83
|
+
raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
|
|
84
|
+
model_client = AsyncAnthropic(api_key=api_key)
|
|
85
|
+
|
|
86
|
+
self.anthropic_client = model_client
|
|
87
|
+
self.model = model
|
|
88
|
+
self.max_tokens = max_tokens
|
|
89
|
+
self.display_width_px = display_width_px
|
|
90
|
+
self.display_height_px = display_height_px
|
|
91
|
+
self.use_computer_beta = use_computer_beta
|
|
92
|
+
|
|
93
|
+
# Track mapping from Claude tool names to MCP tool names
|
|
94
|
+
self._claude_to_mcp_tool_map: dict[str, str] = {}
|
|
95
|
+
|
|
96
|
+
async def initialize(self) -> None:
|
|
97
|
+
"""Initialize the agent and build tool mappings."""
|
|
98
|
+
await super().initialize()
|
|
99
|
+
# Build tool mappings after tools are discovered
|
|
100
|
+
self._convert_tools_for_claude()
|
|
101
|
+
|
|
102
|
+
async def create_initial_messages(
|
|
103
|
+
self, prompt: str, screenshot: str | None
|
|
104
|
+
) -> list[BetaMessageParam]:
|
|
105
|
+
"""Create initial messages for Claude."""
|
|
106
|
+
user_content: list[BetaImageBlockParam | BetaTextBlockParam] = []
|
|
107
|
+
|
|
108
|
+
# Add prompt text
|
|
109
|
+
user_content.append(text_to_content_block(prompt))
|
|
110
|
+
|
|
111
|
+
# Add screenshot if available
|
|
112
|
+
if screenshot:
|
|
113
|
+
user_content.append(base64_to_content_block(screenshot))
|
|
114
|
+
|
|
115
|
+
# Return initial user message
|
|
116
|
+
return [
|
|
117
|
+
cast(
|
|
118
|
+
"BetaMessageParam",
|
|
119
|
+
{
|
|
120
|
+
"role": "user",
|
|
121
|
+
"content": user_content,
|
|
122
|
+
},
|
|
123
|
+
)
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
async def get_model_response(
|
|
127
|
+
self, messages: list[BetaMessageParam], step: int
|
|
128
|
+
) -> dict[str, Any]:
|
|
129
|
+
"""Get response from Claude including any tool calls."""
|
|
130
|
+
# Get Claude tools
|
|
131
|
+
claude_tools = self._convert_tools_for_claude()
|
|
132
|
+
|
|
133
|
+
# Make API call with retry for prompt length
|
|
134
|
+
current_messages = messages.copy()
|
|
135
|
+
|
|
136
|
+
while True:
|
|
137
|
+
messages_cached = self._add_prompt_caching(current_messages)
|
|
138
|
+
|
|
139
|
+
# Build create kwargs
|
|
140
|
+
create_kwargs = {
|
|
141
|
+
"model": self.model,
|
|
142
|
+
"max_tokens": self.max_tokens,
|
|
143
|
+
"system": self.get_system_prompt(),
|
|
144
|
+
"messages": messages_cached,
|
|
145
|
+
"tools": claude_tools,
|
|
146
|
+
"tool_choice": {"type": "auto", "disable_parallel_tool_use": True},
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Add beta features if using computer tools
|
|
150
|
+
if self.use_computer_beta and any(
|
|
151
|
+
t.get("type") == "computer_20250124" for t in claude_tools
|
|
152
|
+
):
|
|
153
|
+
create_kwargs["betas"] = ["computer-use-2025-01-24"]
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
response = await self.anthropic_client.beta.messages.create(**create_kwargs)
|
|
157
|
+
break
|
|
158
|
+
except BadRequestError as e:
|
|
159
|
+
if e.message.startswith("prompt is too long"):
|
|
160
|
+
logger.warning("Prompt too long, truncating message history")
|
|
161
|
+
# Keep first message and last 20 messages
|
|
162
|
+
if len(current_messages) > 21:
|
|
163
|
+
current_messages = [current_messages[0]] + current_messages[-20:]
|
|
164
|
+
else:
|
|
165
|
+
raise
|
|
166
|
+
else:
|
|
167
|
+
raise
|
|
168
|
+
|
|
169
|
+
# Add assistant response to messages (for next step)
|
|
170
|
+
messages.append(
|
|
171
|
+
cast(
|
|
172
|
+
"BetaMessageParam",
|
|
173
|
+
{
|
|
174
|
+
"role": "assistant",
|
|
175
|
+
"content": response.content,
|
|
176
|
+
},
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Process response
|
|
181
|
+
result = {
|
|
182
|
+
"content": "",
|
|
183
|
+
"tool_calls": [],
|
|
184
|
+
"done": True,
|
|
185
|
+
"raw_response": response.model_dump(), # For debugging
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
# Extract text content and reasoning
|
|
189
|
+
text_content = ""
|
|
190
|
+
thinking_content = ""
|
|
191
|
+
|
|
192
|
+
for block in response.content:
|
|
193
|
+
if block.type == "tool_use":
|
|
194
|
+
# Map Claude tool name back to MCP tool name
|
|
195
|
+
mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
|
|
196
|
+
|
|
197
|
+
# Include the tool_use_id in the tool call for later reference
|
|
198
|
+
result["tool_calls"].append(
|
|
199
|
+
{
|
|
200
|
+
"name": mcp_tool_name, # Use MCP tool name for execution
|
|
201
|
+
"arguments": block.input,
|
|
202
|
+
"tool_use_id": block.id, # Claude-specific metadata
|
|
203
|
+
"claude_name": block.name, # Keep original Claude name for reference
|
|
204
|
+
}
|
|
205
|
+
)
|
|
206
|
+
result["done"] = False
|
|
207
|
+
elif block.type == "text":
|
|
208
|
+
text_content += block.text
|
|
209
|
+
elif hasattr(block, "type") and block.type == "thinking":
|
|
210
|
+
thinking_content += f"Thinking: {block.thinking}\n"
|
|
211
|
+
|
|
212
|
+
# Combine text and thinking for final content
|
|
213
|
+
if thinking_content:
|
|
214
|
+
result["content"] = thinking_content + text_content
|
|
215
|
+
else:
|
|
216
|
+
result["content"] = text_content
|
|
217
|
+
|
|
218
|
+
return result
|
|
219
|
+
|
|
220
|
+
async def format_tool_results(
|
|
221
|
+
self, processed_results: dict[str, Any], tool_calls: list[dict]
|
|
222
|
+
) -> list[BetaMessageParam]:
|
|
223
|
+
"""Format tool results into Claude messages."""
|
|
224
|
+
# Build a mapping of tool_name to tool_use_id from the original calls
|
|
225
|
+
tool_id_map = {}
|
|
226
|
+
for tool_call in tool_calls:
|
|
227
|
+
if "tool_use_id" in tool_call:
|
|
228
|
+
tool_id_map[tool_call["name"]] = tool_call["tool_use_id"]
|
|
229
|
+
|
|
230
|
+
# Process each tool result
|
|
231
|
+
user_content = []
|
|
232
|
+
|
|
233
|
+
for tool_name, content_blocks in processed_results["results"]:
|
|
234
|
+
# Get the tool_use_id for this tool
|
|
235
|
+
tool_use_id = tool_id_map.get(tool_name)
|
|
236
|
+
if not tool_use_id:
|
|
237
|
+
logger.warning("No tool_use_id found for %s", tool_name)
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Convert content blocks to Claude format
|
|
241
|
+
claude_blocks = []
|
|
242
|
+
for block in content_blocks:
|
|
243
|
+
if block["type"] == "text":
|
|
244
|
+
claude_blocks.append(text_to_content_block(block["text"]))
|
|
245
|
+
elif block["type"] == "error":
|
|
246
|
+
claude_blocks.append(text_to_content_block(f"Error: {block['text']}"))
|
|
247
|
+
elif block["type"] == "image":
|
|
248
|
+
claude_blocks.append(base64_to_content_block(block["data"]))
|
|
249
|
+
|
|
250
|
+
# Add tool result
|
|
251
|
+
user_content.append(tool_use_content_block(tool_use_id, claude_blocks))
|
|
252
|
+
|
|
253
|
+
# Return as a user message containing all tool results
|
|
254
|
+
return [
|
|
255
|
+
cast(
|
|
256
|
+
"BetaMessageParam",
|
|
257
|
+
{
|
|
258
|
+
"role": "user",
|
|
259
|
+
"content": user_content,
|
|
260
|
+
},
|
|
261
|
+
)
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
async def create_user_message(self, text: str) -> BetaMessageParam:
|
|
265
|
+
"""Create a user message in Claude's format."""
|
|
266
|
+
return cast("BetaMessageParam", {"role": "user", "content": text})
|
|
267
|
+
|
|
268
|
+
def _convert_tools_for_claude(self) -> list[dict]:
|
|
269
|
+
"""Convert MCP tools to Claude tool format."""
|
|
270
|
+
claude_tools = []
|
|
271
|
+
self._claude_to_mcp_tool_map = {} # Reset mapping
|
|
272
|
+
|
|
273
|
+
for tool in self._available_tools:
|
|
274
|
+
# Special handling for computer use tools
|
|
275
|
+
if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
|
|
276
|
+
# Use Claude's native computer use format with configurable dimensions
|
|
277
|
+
claude_tool = {
|
|
278
|
+
"type": "computer_20250124",
|
|
279
|
+
"name": "computer",
|
|
280
|
+
"display_width_px": self.display_width_px,
|
|
281
|
+
"display_height_px": self.display_height_px,
|
|
282
|
+
}
|
|
283
|
+
# Map Claude's "computer" back to the actual MCP tool name
|
|
284
|
+
self._claude_to_mcp_tool_map["computer"] = tool.name
|
|
285
|
+
else:
|
|
286
|
+
# Convert regular tools
|
|
287
|
+
claude_tool = {
|
|
288
|
+
"name": tool.name,
|
|
289
|
+
"description": tool.description or f"Execute {tool.name}",
|
|
290
|
+
"input_schema": tool.inputSchema
|
|
291
|
+
or {
|
|
292
|
+
"type": "object",
|
|
293
|
+
"properties": {},
|
|
294
|
+
},
|
|
295
|
+
}
|
|
296
|
+
# Direct mapping for non-computer tools
|
|
297
|
+
self._claude_to_mcp_tool_map[tool.name] = tool.name
|
|
298
|
+
|
|
299
|
+
claude_tools.append(claude_tool)
|
|
300
|
+
|
|
301
|
+
return claude_tools
|
|
302
|
+
|
|
303
|
+
def _add_prompt_caching(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
|
|
304
|
+
"""Add prompt caching to messages."""
|
|
305
|
+
messages_cached = copy.deepcopy(messages)
|
|
306
|
+
|
|
307
|
+
# Mark last user message with cache control
|
|
308
|
+
if messages_cached and messages_cached[-1].get("role") == "user":
|
|
309
|
+
last_content = messages_cached[-1]["content"]
|
|
310
|
+
if isinstance(last_content, list):
|
|
311
|
+
for block in last_content:
|
|
312
|
+
if block.get("type") not in ["thinking", "redacted_thinking"]:
|
|
313
|
+
cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
|
|
314
|
+
block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
|
|
315
|
+
|
|
316
|
+
return messages_cached
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""LangChain MCP Agent implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
from langchain.agents import AgentExecutor, create_tool_calling_agent
|
|
9
|
+
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
10
|
+
from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
|
11
|
+
from mcp_use.adapters.langchain_adapter import LangChainAdapter
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from langchain.schema.language_model import BaseLanguageModel
|
|
15
|
+
from langchain_core.tools import BaseTool
|
|
16
|
+
from .base import BaseMCPAgent
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LangChainMCPAgent(BaseMCPAgent):
|
|
22
|
+
"""
|
|
23
|
+
LangChain agent that uses MCP servers for tool execution.
|
|
24
|
+
|
|
25
|
+
This agent wraps any LangChain-compatible LLM and provides
|
|
26
|
+
access to MCP tools through LangChain's tool-calling interface.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
llm: BaseLanguageModel,
|
|
32
|
+
**kwargs: Any,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Initialize LangChain MCP agent.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
llm: Any LangChain-compatible language model
|
|
39
|
+
**kwargs: Additional arguments passed to BaseMCPAgent
|
|
40
|
+
"""
|
|
41
|
+
super().__init__(**kwargs)
|
|
42
|
+
|
|
43
|
+
self.llm = llm
|
|
44
|
+
self.adapter = LangChainAdapter(disallowed_tools=self.disallowed_tools)
|
|
45
|
+
self._langchain_tools: list[BaseTool] | None = None
|
|
46
|
+
|
|
47
|
+
def _get_langchain_tools(self) -> list[BaseTool]:
|
|
48
|
+
"""Get or create LangChain tools from MCP tools."""
|
|
49
|
+
if self._langchain_tools is not None:
|
|
50
|
+
return self._langchain_tools
|
|
51
|
+
|
|
52
|
+
# Create LangChain tools from MCP tools using the adapter
|
|
53
|
+
self._langchain_tools = []
|
|
54
|
+
|
|
55
|
+
# Get tools grouped by connector
|
|
56
|
+
tools_by_connector = self.get_tools_by_connector()
|
|
57
|
+
|
|
58
|
+
# Convert tools using the adapter
|
|
59
|
+
for connector, tools in tools_by_connector.items():
|
|
60
|
+
langchain_tools = self.adapter._convert_tools(tools, connector) # type: ignore[reportAttributeAccessIssue]
|
|
61
|
+
self._langchain_tools.extend(langchain_tools)
|
|
62
|
+
|
|
63
|
+
logger.info("Created %s LangChain tools from MCP tools", len(self._langchain_tools))
|
|
64
|
+
return self._langchain_tools
|
|
65
|
+
|
|
66
|
+
async def create_initial_messages(
|
|
67
|
+
self, prompt: str, screenshot: str | None
|
|
68
|
+
) -> list[BaseMessage]:
|
|
69
|
+
"""Create initial messages for LangChain."""
|
|
70
|
+
messages = []
|
|
71
|
+
|
|
72
|
+
# Add system message
|
|
73
|
+
system_prompt = self.get_system_prompt()
|
|
74
|
+
messages.append(SystemMessage(content=system_prompt))
|
|
75
|
+
|
|
76
|
+
# Add user message with prompt and optional screenshot
|
|
77
|
+
if screenshot:
|
|
78
|
+
# For multimodal models, include the image
|
|
79
|
+
content = [
|
|
80
|
+
{"type": "text", "text": prompt},
|
|
81
|
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot}"}},
|
|
82
|
+
]
|
|
83
|
+
messages.append(HumanMessage(content=content))
|
|
84
|
+
else:
|
|
85
|
+
messages.append(HumanMessage(content=prompt))
|
|
86
|
+
|
|
87
|
+
return messages
|
|
88
|
+
|
|
89
|
+
async def get_model_response(self, messages: list[BaseMessage], step: int) -> dict[str, Any]:
|
|
90
|
+
"""Get response from LangChain model including any tool calls."""
|
|
91
|
+
# Get LangChain tools (created lazily)
|
|
92
|
+
langchain_tools = self._get_langchain_tools()
|
|
93
|
+
|
|
94
|
+
# Create a prompt template from current messages
|
|
95
|
+
# Extract system message if present
|
|
96
|
+
system_content = "You are a helpful assistant"
|
|
97
|
+
non_system_messages = []
|
|
98
|
+
|
|
99
|
+
for msg in messages:
|
|
100
|
+
if isinstance(msg, SystemMessage):
|
|
101
|
+
system_content = str(msg.content)
|
|
102
|
+
else:
|
|
103
|
+
non_system_messages.append(msg)
|
|
104
|
+
|
|
105
|
+
# Create prompt with placeholders
|
|
106
|
+
prompt = ChatPromptTemplate.from_messages(
|
|
107
|
+
[
|
|
108
|
+
("system", system_content),
|
|
109
|
+
MessagesPlaceholder(variable_name="chat_history"),
|
|
110
|
+
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
|
111
|
+
]
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Create agent with tools
|
|
115
|
+
agent = create_tool_calling_agent(
|
|
116
|
+
llm=self.llm,
|
|
117
|
+
tools=langchain_tools,
|
|
118
|
+
prompt=prompt,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Create executor
|
|
122
|
+
executor = AgentExecutor(
|
|
123
|
+
agent=agent,
|
|
124
|
+
tools=langchain_tools,
|
|
125
|
+
verbose=False,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Format the last user message as input
|
|
129
|
+
last_user_msg = None
|
|
130
|
+
for msg in reversed(non_system_messages):
|
|
131
|
+
if isinstance(msg, HumanMessage):
|
|
132
|
+
last_user_msg = msg
|
|
133
|
+
break
|
|
134
|
+
|
|
135
|
+
if not last_user_msg:
|
|
136
|
+
return {
|
|
137
|
+
"content": "No user message found",
|
|
138
|
+
"tool_calls": [],
|
|
139
|
+
"done": True,
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
# Extract text from message content
|
|
143
|
+
input_text = ""
|
|
144
|
+
if isinstance(last_user_msg.content, str):
|
|
145
|
+
input_text = last_user_msg.content
|
|
146
|
+
elif isinstance(last_user_msg.content, list):
|
|
147
|
+
# Extract text from multimodal content
|
|
148
|
+
for item in last_user_msg.content:
|
|
149
|
+
if isinstance(item, dict) and item.get("type") == "text":
|
|
150
|
+
input_text = item.get("text", "")
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
# Build chat history (exclude last user message and system)
|
|
154
|
+
chat_history = []
|
|
155
|
+
for _, msg in enumerate(non_system_messages[:-1]):
|
|
156
|
+
if isinstance(msg, HumanMessage | AIMessage):
|
|
157
|
+
chat_history.append(msg)
|
|
158
|
+
|
|
159
|
+
# Execute the agent
|
|
160
|
+
try:
|
|
161
|
+
result = await executor.ainvoke(
|
|
162
|
+
{
|
|
163
|
+
"input": input_text,
|
|
164
|
+
"chat_history": chat_history,
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Process the result
|
|
169
|
+
output = result.get("output", "")
|
|
170
|
+
|
|
171
|
+
# Check if tools were called
|
|
172
|
+
if result.get("intermediate_steps"):
|
|
173
|
+
# Tools were called
|
|
174
|
+
tool_calls = []
|
|
175
|
+
for action, _ in result["intermediate_steps"]:
|
|
176
|
+
if hasattr(action, "tool") and hasattr(action, "tool_input"):
|
|
177
|
+
tool_calls.append(
|
|
178
|
+
{
|
|
179
|
+
"name": action.tool,
|
|
180
|
+
"arguments": action.tool_input,
|
|
181
|
+
}
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
"content": output,
|
|
186
|
+
"tool_calls": tool_calls,
|
|
187
|
+
"done": False, # Continue if tools were called
|
|
188
|
+
}
|
|
189
|
+
else:
|
|
190
|
+
# No tools called, just text response
|
|
191
|
+
return {
|
|
192
|
+
"content": output,
|
|
193
|
+
"tool_calls": [],
|
|
194
|
+
"done": True,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.error("Agent execution failed: %s", e)
|
|
199
|
+
return {
|
|
200
|
+
"content": f"Error: {e!s}",
|
|
201
|
+
"tool_calls": [],
|
|
202
|
+
"done": True,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
async def format_tool_results(
|
|
206
|
+
self, processed_results: dict[str, Any], tool_calls: list[dict]
|
|
207
|
+
) -> list[BaseMessage]:
|
|
208
|
+
"""Format tool results into LangChain messages."""
|
|
209
|
+
# Create an AI message with the tool calls and results
|
|
210
|
+
messages = []
|
|
211
|
+
|
|
212
|
+
# First add an AI message indicating tools were called
|
|
213
|
+
tool_names = [tc["name"] for tc in tool_calls]
|
|
214
|
+
ai_content = f"I'll use the following tools: {', '.join(tool_names)}"
|
|
215
|
+
messages.append(AIMessage(content=ai_content))
|
|
216
|
+
|
|
217
|
+
# Then add a human message with the tool results
|
|
218
|
+
result_text = processed_results["text"]
|
|
219
|
+
screenshot = processed_results.get("screenshot")
|
|
220
|
+
|
|
221
|
+
if screenshot:
|
|
222
|
+
# Include screenshot in multimodal format
|
|
223
|
+
content = [
|
|
224
|
+
{"type": "text", "text": f"Tool results:\n{result_text}"},
|
|
225
|
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot}"}},
|
|
226
|
+
]
|
|
227
|
+
messages.append(HumanMessage(content=content))
|
|
228
|
+
else:
|
|
229
|
+
messages.append(HumanMessage(content=f"Tool results:\n{result_text}"))
|
|
230
|
+
|
|
231
|
+
return messages
|