hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (86) hide show
  1. hud/__init__.py +20 -8
  2. hud/adapters/common/adapter.py +14 -3
  3. hud/adapters/common/tests/test_adapter.py +16 -4
  4. hud/datasets.py +188 -0
  5. hud/env/docker_client.py +15 -3
  6. hud/env/environment.py +10 -7
  7. hud/env/local_docker_client.py +29 -7
  8. hud/env/remote_client.py +1 -1
  9. hud/env/remote_docker_client.py +2 -2
  10. hud/exceptions.py +2 -1
  11. hud/gym.py +0 -9
  12. hud/mcp/__init__.py +17 -0
  13. hud/mcp/base.py +631 -0
  14. hud/mcp/claude.py +321 -0
  15. hud/mcp/client.py +312 -0
  16. hud/mcp/langchain.py +250 -0
  17. hud/mcp/openai.py +334 -0
  18. hud/mcp/tests/__init__.py +1 -0
  19. hud/mcp/tests/test_base.py +512 -0
  20. hud/mcp/tests/test_claude.py +294 -0
  21. hud/mcp/tests/test_client.py +324 -0
  22. hud/mcp/tests/test_openai.py +238 -0
  23. hud/settings.py +20 -2
  24. hud/task.py +5 -88
  25. hud/taskset.py +2 -23
  26. hud/telemetry/__init__.py +16 -7
  27. hud/telemetry/_trace.py +246 -72
  28. hud/telemetry/context.py +88 -27
  29. hud/telemetry/exporter.py +171 -11
  30. hud/telemetry/instrumentation/mcp.py +174 -410
  31. hud/telemetry/job.py +141 -0
  32. hud/telemetry/mcp_models.py +13 -74
  33. hud/telemetry/tests/test_context.py +9 -6
  34. hud/telemetry/tests/test_trace.py +120 -78
  35. hud/tools/__init__.py +34 -0
  36. hud/tools/base.py +65 -0
  37. hud/tools/bash.py +137 -0
  38. hud/tools/computer/__init__.py +13 -0
  39. hud/tools/computer/anthropic.py +411 -0
  40. hud/tools/computer/hud.py +315 -0
  41. hud/tools/computer/openai.py +283 -0
  42. hud/tools/edit.py +290 -0
  43. hud/tools/executors/__init__.py +30 -0
  44. hud/tools/executors/base.py +331 -0
  45. hud/tools/executors/pyautogui.py +619 -0
  46. hud/tools/executors/tests/__init__.py +1 -0
  47. hud/tools/executors/tests/test_base_executor.py +338 -0
  48. hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
  49. hud/tools/executors/xdo.py +503 -0
  50. hud/tools/helper/README.md +56 -0
  51. hud/tools/helper/__init__.py +9 -0
  52. hud/tools/helper/mcp_server.py +78 -0
  53. hud/tools/helper/server_initialization.py +115 -0
  54. hud/tools/helper/utils.py +58 -0
  55. hud/tools/playwright_tool.py +379 -0
  56. hud/tools/tests/__init__.py +3 -0
  57. hud/tools/tests/test_bash.py +152 -0
  58. hud/tools/tests/test_computer.py +52 -0
  59. hud/tools/tests/test_computer_actions.py +34 -0
  60. hud/tools/tests/test_edit.py +240 -0
  61. hud/tools/tests/test_init.py +27 -0
  62. hud/tools/tests/test_playwright_tool.py +183 -0
  63. hud/tools/tests/test_tools.py +157 -0
  64. hud/tools/tests/test_utils.py +156 -0
  65. hud/tools/utils.py +50 -0
  66. hud/trajectory.py +5 -1
  67. hud/types.py +10 -1
  68. hud/utils/tests/test_init.py +21 -0
  69. hud/utils/tests/test_version.py +1 -1
  70. hud/version.py +1 -1
  71. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
  72. hud_python-0.3.1.dist-info/RECORD +119 -0
  73. hud/evaluators/__init__.py +0 -9
  74. hud/evaluators/base.py +0 -32
  75. hud/evaluators/inspect.py +0 -24
  76. hud/evaluators/judge.py +0 -189
  77. hud/evaluators/match.py +0 -156
  78. hud/evaluators/remote.py +0 -65
  79. hud/evaluators/tests/__init__.py +0 -0
  80. hud/evaluators/tests/test_inspect.py +0 -12
  81. hud/evaluators/tests/test_judge.py +0 -231
  82. hud/evaluators/tests/test_match.py +0 -115
  83. hud/evaluators/tests/test_remote.py +0 -98
  84. hud_python-0.2.10.dist-info/RECORD +0 -85
  85. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
  86. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/mcp/langchain.py ADDED
@@ -0,0 +1,250 @@
1
+ """LangChain MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ import mcp.types as types
9
+ from langchain.agents import AgentExecutor, create_tool_calling_agent
10
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
11
+ from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage
12
+ from mcp.types import CallToolRequestParams as MCPToolCall
13
+ from mcp.types import CallToolResult as MCPToolResult
14
+ from mcp_use.adapters.langchain_adapter import LangChainAdapter
15
+
16
+ if TYPE_CHECKING:
17
+ from langchain.schema.language_model import BaseLanguageModel
18
+ from langchain_core.tools import BaseTool
19
+ from .base import BaseMCPAgent, ModelResponse
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class LangChainMCPAgent(BaseMCPAgent):
25
+ """
26
+ LangChain agent that uses MCP servers for tool execution.
27
+
28
+ This agent wraps any LangChain-compatible LLM and provides
29
+ access to MCP tools through LangChain's tool-calling interface.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ llm: BaseLanguageModel,
35
+ **kwargs: Any,
36
+ ) -> None:
37
+ """
38
+ Initialize LangChain MCP agent.
39
+
40
+ Args:
41
+ llm: Any LangChain-compatible language model
42
+ **kwargs: Additional arguments passed to BaseMCPAgent
43
+ """
44
+ super().__init__(**kwargs)
45
+
46
+ self.llm = llm
47
+ self.adapter = LangChainAdapter(disallowed_tools=self.disallowed_tools)
48
+ self._langchain_tools: list[BaseTool] | None = None
49
+
50
+ self.model_name = (
51
+ "langchain-" + self.llm.model_name # type: ignore
52
+ if hasattr(self.llm, "model_name")
53
+ else "unknown"
54
+ )
55
+
56
+ def _get_langchain_tools(self) -> list[BaseTool]:
57
+ """Get or create LangChain tools from MCP tools."""
58
+ if self._langchain_tools is not None:
59
+ return self._langchain_tools
60
+
61
+ # Create LangChain tools from MCP tools using the adapter
62
+ self._langchain_tools = []
63
+
64
+ # Get tools grouped by connector
65
+ tools_by_connector = self.get_tools_by_connector()
66
+
67
+ # Convert tools using the adapter
68
+ for connector, tools in tools_by_connector.items():
69
+ langchain_tools = self.adapter._convert_tools(tools, connector) # type: ignore[reportAttributeAccessIssue]
70
+ self._langchain_tools.extend(langchain_tools)
71
+
72
+ logger.info("Created %s LangChain tools from MCP tools", len(self._langchain_tools))
73
+ return self._langchain_tools
74
+
75
+ async def create_initial_messages(
76
+ self, prompt: str, screenshot: str | None
77
+ ) -> list[BaseMessage]:
78
+ """Create initial messages for LangChain."""
79
+ messages = []
80
+
81
+ # Add system message
82
+ system_prompt = self.get_system_prompt()
83
+ messages.append(SystemMessage(content=system_prompt))
84
+
85
+ # Add user message with prompt and optional screenshot
86
+ if screenshot:
87
+ # For multimodal models, include the image
88
+ content = [
89
+ {"type": "text", "text": prompt},
90
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot}"}},
91
+ ]
92
+ messages.append(HumanMessage(content=content))
93
+ else:
94
+ messages.append(HumanMessage(content=prompt))
95
+
96
+ return messages
97
+
98
+ async def get_model_response(self, messages: list[BaseMessage]) -> ModelResponse:
99
+ """Get response from LangChain model including any tool calls."""
100
+ # Get LangChain tools (created lazily)
101
+ langchain_tools = self._get_langchain_tools()
102
+
103
+ # Create a prompt template from current messages
104
+ # Extract system message if present
105
+ system_content = "You are a helpful assistant"
106
+ non_system_messages = []
107
+
108
+ for msg in messages:
109
+ if isinstance(msg, SystemMessage):
110
+ system_content = str(msg.content)
111
+ else:
112
+ non_system_messages.append(msg)
113
+
114
+ # Create prompt with placeholders
115
+ prompt = ChatPromptTemplate.from_messages(
116
+ [
117
+ ("system", system_content),
118
+ MessagesPlaceholder(variable_name="chat_history"),
119
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
120
+ ]
121
+ )
122
+
123
+ # Create agent with tools
124
+ agent = create_tool_calling_agent(
125
+ llm=self.llm,
126
+ tools=langchain_tools,
127
+ prompt=prompt,
128
+ )
129
+
130
+ # Create executor
131
+ executor = AgentExecutor(
132
+ agent=agent,
133
+ tools=langchain_tools,
134
+ verbose=False,
135
+ )
136
+
137
+ # Format the last user message as input
138
+ last_user_msg = None
139
+ for msg in reversed(non_system_messages):
140
+ if isinstance(msg, HumanMessage):
141
+ last_user_msg = msg
142
+ break
143
+
144
+ if not last_user_msg:
145
+ return ModelResponse(content="No user message found", tool_calls=[], done=True)
146
+
147
+ # Extract text from message content
148
+ input_text = ""
149
+ if isinstance(last_user_msg.content, str):
150
+ input_text = last_user_msg.content
151
+ elif isinstance(last_user_msg.content, list):
152
+ # Extract text from multimodal content
153
+ for item in last_user_msg.content:
154
+ if isinstance(item, dict) and item.get("type") == "text":
155
+ input_text = item.get("text", "")
156
+ break
157
+
158
+ # Build chat history (exclude last user message and system)
159
+ chat_history = []
160
+ for _, msg in enumerate(non_system_messages[:-1]):
161
+ if isinstance(msg, HumanMessage | AIMessage):
162
+ chat_history.append(msg)
163
+
164
+ # Execute the agent
165
+ try:
166
+ result = await executor.ainvoke(
167
+ {
168
+ "input": input_text,
169
+ "chat_history": chat_history,
170
+ }
171
+ )
172
+
173
+ # Process the result
174
+ output = result.get("output", "")
175
+
176
+ # Check if tools were called
177
+ if result.get("intermediate_steps"):
178
+ # Tools were called
179
+ tool_calls = []
180
+ for action, _ in result["intermediate_steps"]:
181
+ if hasattr(action, "tool") and hasattr(action, "tool_input"):
182
+ tool_calls.append(
183
+ MCPToolCall(
184
+ name=action.tool,
185
+ arguments=action.tool_input,
186
+ )
187
+ )
188
+
189
+ return ModelResponse(content=output, tool_calls=tool_calls, done=False)
190
+ else:
191
+ # No tools called, just text response
192
+ return ModelResponse(content=output, tool_calls=[], done=True)
193
+
194
+ except Exception as e:
195
+ logger.error("Agent execution failed: %s", e)
196
+ return ModelResponse(content=f"Error: {e!s}", tool_calls=[], done=True)
197
+
198
+ async def format_tool_results(
199
+ self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
200
+ ) -> list[BaseMessage]:
201
+ """Format tool results into LangChain messages."""
202
+ # Create an AI message with the tool calls and results
203
+ messages = []
204
+
205
+ # First add an AI message indicating tools were called
206
+ tool_names = [tc.name for tc in tool_calls]
207
+ ai_content = f"I'll use the following tools: {', '.join(tool_names)}"
208
+ messages.append(AIMessage(content=ai_content))
209
+
210
+ # Build result text from tool results
211
+ text_parts = []
212
+ latest_screenshot = None
213
+
214
+ for tool_call, result in zip(tool_calls, tool_results, strict=False):
215
+ if result.isError:
216
+ error_text = "Tool execution failed"
217
+ for content in result.content:
218
+ if isinstance(content, types.TextContent):
219
+ error_text = content.text
220
+ break
221
+ text_parts.append(f"Error - {tool_call.name}: {error_text}")
222
+ else:
223
+ # Process success content
224
+ tool_output = []
225
+ for content in result.content:
226
+ if isinstance(content, types.TextContent):
227
+ tool_output.append(content.text)
228
+ elif isinstance(content, types.ImageContent):
229
+ latest_screenshot = content.data
230
+
231
+ if tool_output:
232
+ text_parts.append(f"{tool_call.name}: " + " ".join(tool_output))
233
+
234
+ result_text = "\n".join(text_parts) if text_parts else "No output from tools"
235
+
236
+ # Then add a human message with the tool results
237
+ if latest_screenshot:
238
+ # Include screenshot in multimodal format
239
+ content = [
240
+ {"type": "text", "text": f"Tool results:\n{result_text}"},
241
+ {
242
+ "type": "image_url",
243
+ "image_url": {"url": f"data:image/png;base64,{latest_screenshot}"},
244
+ },
245
+ ]
246
+ messages.append(HumanMessage(content=content))
247
+ else:
248
+ messages.append(HumanMessage(content=f"Tool results:\n{result_text}"))
249
+
250
+ return messages
hud/mcp/openai.py ADDED
@@ -0,0 +1,334 @@
1
+ """OpenAI MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any, Literal
7
+
8
+ import mcp.types as types
9
+ from mcp.types import CallToolRequestParams as MCPToolCall
10
+ from mcp.types import CallToolResult as MCPToolResult
11
+ from openai import AsyncOpenAI
12
+ from openai.types.responses import (
13
+ ResponseComputerToolCall,
14
+ ResponseInputParam,
15
+ ResponseOutputMessage,
16
+ ResponseOutputText,
17
+ ToolParam,
18
+ )
19
+
20
+ from hud.settings import settings
21
+
22
+ from .base import AgentResult, BaseMCPAgent, ModelResponse
23
+
24
+ if TYPE_CHECKING:
25
+ from hud.datasets import TaskConfig
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class OpenAIMCPAgent(BaseMCPAgent):
31
+ """
32
+ OpenAI agent that uses MCP servers for tool execution.
33
+
34
+ This agent uses OpenAI's Computer Use API format but executes
35
+ tools through MCP servers instead of direct implementation.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ model_client: AsyncOpenAI | None = None,
41
+ model: str = "computer-use-preview",
42
+ environment: Literal["windows", "mac", "linux", "browser"] = "linux",
43
+ display_width: int = 1024,
44
+ display_height: int = 768,
45
+ **kwargs: Any,
46
+ ) -> None:
47
+ """
48
+ Initialize OpenAI MCP agent.
49
+
50
+ Args:
51
+ client: AsyncOpenAI client (created if not provided)
52
+ model: OpenAI model to use
53
+ environment: Environment type for computer use
54
+ display_width: Display width for computer use
55
+ display_height: Display height for computer use
56
+ **kwargs: Additional arguments passed to BaseMCPAgent
57
+ """
58
+ super().__init__(**kwargs)
59
+
60
+ # Initialize client if not provided
61
+ if model_client is None:
62
+ api_key = settings.openai_api_key
63
+ if not api_key:
64
+ raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY.")
65
+ model_client = AsyncOpenAI(api_key=api_key)
66
+
67
+ self.openai_client = model_client
68
+ self.model = model
69
+ self.environment = environment
70
+ self.display_width = display_width
71
+ self.display_height = display_height
72
+
73
+ # State tracking for OpenAI's stateful API
74
+ self.last_response_id: str | None = None
75
+ self.pending_call_id: str | None = None
76
+ self.pending_safety_checks: list[Any] = []
77
+
78
+ self.model_name = "openai-" + self.model
79
+
80
+ # Base system prompt for autonomous operation
81
+ self.base_system_prompt = """
82
+ You are an autonomous computer-using agent. Follow these guidelines:
83
+
84
+ 1. NEVER ask for confirmation. Complete all tasks autonomously.
85
+ 2. Do NOT send messages like "I need to confirm before..." or "Do you want me to continue?" - just proceed.
86
+ 3. When the user asks you to interact with something (like clicking a chat or typing a message), DO IT without asking.
87
+ 4. Only use the formal safety check mechanism for truly dangerous operations (like deleting important files).
88
+ 5. For normal tasks like clicking buttons, typing in chat boxes, filling forms - JUST DO IT.
89
+ 6. The user has already given you permission by running this agent. No further confirmation is needed.
90
+ 7. Be decisive and action-oriented. Complete the requested task fully.
91
+
92
+ Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
93
+ """ # noqa: E501
94
+
95
+ async def run(self, prompt_or_task: str | TaskConfig, max_steps: int = 10) -> AgentResult:
96
+ """
97
+ Run the agent with the given prompt or task.
98
+
99
+ Override to reset OpenAI-specific state.
100
+ """
101
+ # Reset state for new run
102
+ self.last_response_id = None
103
+ self.pending_call_id = None
104
+ self.pending_safety_checks = []
105
+
106
+ # Use base implementation
107
+ return await super().run(prompt_or_task, max_steps)
108
+
109
+ async def create_initial_messages(
110
+ self, prompt: str, screenshot: str | None = None
111
+ ) -> list[Any]:
112
+ """
113
+ Create initial messages for OpenAI.
114
+
115
+ OpenAI uses a different message format - we'll store the prompt
116
+ and screenshot for use in get_model_response.
117
+ """
118
+ # For OpenAI, we don't create messages upfront, we build them in get_model_response
119
+ # Just return a list with the prompt and screenshot
120
+ return [{"prompt": prompt, "screenshot": screenshot}]
121
+
122
+ async def get_model_response(self, messages: list[Any]) -> ModelResponse:
123
+ """Get response from OpenAI including any tool calls."""
124
+ # OpenAI's API is stateful, so we handle messages differently
125
+
126
+ # Check if we have computer tools available
127
+ computer_tool_name = None
128
+ for tool in self._available_tools:
129
+ if tool.name in ["computer_openai", "computer"]:
130
+ computer_tool_name = tool.name
131
+ break
132
+
133
+ if not computer_tool_name:
134
+ # No computer tools available, just return a text response
135
+ return ModelResponse(
136
+ content="No computer use tools available",
137
+ tool_calls=[],
138
+ done=True,
139
+ )
140
+
141
+ # Define the computer use tool
142
+ computer_tool: ToolParam = { # type: ignore[reportAssignmentType]
143
+ "type": "computer_use_preview",
144
+ "display_width": self.display_width,
145
+ "display_height": self.display_height,
146
+ "environment": self.environment,
147
+ }
148
+
149
+ # Build the request based on whether this is first step or follow-up
150
+ if self.pending_call_id is None and self.last_response_id is None:
151
+ # First step - extract prompt and screenshot from messages
152
+ initial_data = messages[0] # Our custom format from create_initial_messages
153
+ prompt_text = initial_data.get("prompt", "")
154
+ screenshot = initial_data.get("screenshot")
155
+
156
+ # Create the initial request
157
+ input_content: list[dict[str, Any]] = [{"type": "input_text", "text": prompt_text}]
158
+
159
+ if screenshot:
160
+ input_content.append(
161
+ {
162
+ "type": "input_image",
163
+ "image_url": f"data:image/png;base64,{screenshot}",
164
+ }
165
+ )
166
+
167
+ input_param: ResponseInputParam = [{"role": "user", "content": input_content}] # type: ignore[reportUnknownMemberType]
168
+
169
+ # Combine base system prompt with any custom system prompt
170
+ full_instructions = self.base_system_prompt
171
+ if self.custom_system_prompt:
172
+ full_instructions = f"{self.custom_system_prompt}\n\n{full_instructions}"
173
+
174
+ response = await self.openai_client.responses.create(
175
+ model=self.model,
176
+ tools=[computer_tool],
177
+ input=input_param,
178
+ instructions=full_instructions,
179
+ truncation="auto",
180
+ reasoning={"summary": "auto"},
181
+ )
182
+ else:
183
+ # Follow-up step - check if this is user input or tool result
184
+ latest_message = messages[-1] if messages else {}
185
+
186
+ if latest_message.get("type") == "user_input":
187
+ # User provided input in conversation mode
188
+ user_text = latest_message.get("text", "")
189
+ input_param_followup: ResponseInputParam = [
190
+ {"role": "user", "content": [{"type": "input_text", "text": user_text}]}
191
+ ]
192
+ # Reset pending_call_id since this is user input, not a tool response
193
+ self.pending_call_id = None
194
+ else:
195
+ # Tool result - need screenshot from processed results
196
+ latest_screenshot = None
197
+ for msg in reversed(messages):
198
+ if isinstance(msg, dict) and "screenshot" in msg:
199
+ latest_screenshot = msg["screenshot"]
200
+ break
201
+
202
+ if not latest_screenshot:
203
+ logger.warning("No screenshot provided for response to action")
204
+ return ModelResponse(
205
+ content="No screenshot available for next action",
206
+ tool_calls=[],
207
+ done=True,
208
+ )
209
+
210
+ # Create response to previous action
211
+ input_param_followup: ResponseInputParam = [ # type: ignore[reportAssignmentType]
212
+ { # type: ignore[reportAssignmentType]
213
+ "call_id": self.pending_call_id,
214
+ "type": "computer_call_output",
215
+ "output": {
216
+ "type": "input_image",
217
+ "image_url": f"data:image/png;base64,{latest_screenshot}",
218
+ },
219
+ "acknowledged_safety_checks": self.pending_safety_checks,
220
+ }
221
+ ]
222
+
223
+ self.pending_safety_checks = []
224
+
225
+ response = await self.openai_client.responses.create(
226
+ model=self.model,
227
+ previous_response_id=self.last_response_id,
228
+ tools=[computer_tool],
229
+ input=input_param_followup,
230
+ truncation="auto",
231
+ )
232
+
233
+ # Store response ID for next call
234
+ self.last_response_id = response.id
235
+
236
+ # Process response
237
+ result = ModelResponse(
238
+ content="",
239
+ tool_calls=[],
240
+ done=False, # Will be set to True only if no tool calls
241
+ )
242
+
243
+ self.pending_call_id = None
244
+
245
+ # Check for computer calls
246
+ computer_calls = [
247
+ item
248
+ for item in response.output
249
+ if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
250
+ ]
251
+
252
+ if computer_calls:
253
+ # Process computer calls
254
+ result.done = False
255
+ for computer_call in computer_calls:
256
+ self.pending_call_id = computer_call.call_id
257
+ self.pending_safety_checks = computer_call.pending_safety_checks
258
+
259
+ # Convert OpenAI action to MCP tool call
260
+ action = computer_call.action.model_dump()
261
+
262
+ # Create MCPToolCall object with OpenAI metadata as extra fields
263
+ # Pyright will complain but the tool class accepts extra fields
264
+ tool_call = MCPToolCall(
265
+ name=computer_tool_name,
266
+ arguments=action,
267
+ call_id=computer_call.call_id, # type: ignore
268
+ pending_safety_checks=computer_call.pending_safety_checks, # type: ignore
269
+ )
270
+ result.tool_calls.append(tool_call)
271
+ else:
272
+ # No computer calls, check for text response
273
+ for item in response.output:
274
+ if isinstance(item, ResponseOutputMessage) and item.type == "message":
275
+ # Extract text from content blocks
276
+ text_parts = [
277
+ content.text
278
+ for content in item.content
279
+ if isinstance(content, ResponseOutputText)
280
+ ]
281
+ if text_parts:
282
+ result.content = "".join(text_parts)
283
+ break
284
+
285
+ # Extract reasoning if present
286
+ reasoning_text = ""
287
+ for item in response.output:
288
+ if item.type == "reasoning" and hasattr(item, "summary") and item.summary:
289
+ reasoning_text += f"Thinking: {item.summary[0].text}\n"
290
+
291
+ if reasoning_text:
292
+ result.content = reasoning_text + result.content if result.content else reasoning_text
293
+
294
+ # Set done=True if no tool calls (task complete or waiting for user)
295
+ if not result.tool_calls:
296
+ result.done = True
297
+
298
+ return result
299
+
300
+ async def format_tool_results(
301
+ self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
302
+ ) -> list[Any]:
303
+ """
304
+ Format tool results for OpenAI's stateful API.
305
+
306
+ OpenAI doesn't use a traditional message format - we just need to
307
+ preserve the screenshot for the next step.
308
+ """
309
+ # Extract latest screenshot from results
310
+ latest_screenshot = None
311
+ for result in tool_results:
312
+ if not result.isError:
313
+ for content in result.content:
314
+ if isinstance(content, types.ImageContent):
315
+ latest_screenshot = content.data
316
+
317
+ # Return a simple dict that get_model_response can use
318
+ return [
319
+ {
320
+ "type": "tool_result",
321
+ "screenshot": latest_screenshot,
322
+ }
323
+ ]
324
+
325
+ async def create_user_message(self, text: str) -> dict[str, Any]:
326
+ """
327
+ Create a user message for OpenAI's stateful API.
328
+
329
+ Since OpenAI maintains conversation state server-side,
330
+ we just need to track that we're expecting user input.
331
+ """
332
+ # For OpenAI, we'll handle this in get_model_response
333
+ # by including the user's text in the next input
334
+ return {"type": "user_input", "text": text}
@@ -0,0 +1 @@
1
+ """Tests for MCP Agent module."""