hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -414
  87. hud/tools/computer/hud.py +376 -328
  88. hud/tools/computer/openai.py +295 -286
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.4.dist-info/METADATA +0 -284
  190. hud_python-0.3.4.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,238 +1,237 @@
1
- """Tests for OpenAI MCP Agent implementation."""
2
-
3
- from __future__ import annotations
4
-
5
- from unittest.mock import AsyncMock, MagicMock, patch
6
-
7
- import pytest
8
- from mcp import types
9
- from mcp.types import CallToolRequestParams as MCPToolCall
10
-
11
- from hud.mcp.openai import OpenAIMCPAgent
12
-
13
-
14
- class TestOpenAIMCPAgent:
15
- """Test OpenAIMCPAgent class."""
16
-
17
- @pytest.fixture
18
- def mock_mcp_client(self):
19
- """Create a mock MCP client."""
20
- mcp_client = MagicMock()
21
- mcp_client.get_all_active_sessions = MagicMock(return_value={})
22
- mcp_client.get_tool_map = MagicMock(return_value={})
23
- return mcp_client
24
-
25
- @pytest.fixture
26
- def mock_openai(self):
27
- """Create a mock OpenAI client."""
28
- with patch("hud.mcp.openai.AsyncOpenAI") as mock:
29
- client = AsyncMock()
30
- mock.return_value = client
31
- yield client
32
-
33
- @pytest.mark.asyncio
34
- async def test_init(self, mock_mcp_client):
35
- """Test agent initialization."""
36
- mock_model_client = MagicMock()
37
- agent = OpenAIMCPAgent(
38
- mcp_client=mock_mcp_client, model_client=mock_model_client, model="gpt-4"
39
- )
40
-
41
- assert agent.model_name == "openai-gpt-4"
42
- assert agent.model == "gpt-4"
43
- assert agent.openai_client == mock_model_client
44
-
45
- @pytest.mark.asyncio
46
- async def test_create_initial_messages(self, mock_mcp_client):
47
- """Test creating initial messages."""
48
- mock_model_client = MagicMock()
49
- agent = OpenAIMCPAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
50
-
51
- # Test with text only
52
- messages = await agent.create_initial_messages("Hello, GPT!")
53
- assert len(messages) == 1
54
- assert messages[0]["prompt"] == "Hello, GPT!"
55
- assert messages[0]["screenshot"] is None
56
-
57
- # Test with screenshot
58
- messages = await agent.create_initial_messages("Look at this", screenshot="base64data")
59
- assert len(messages) == 1
60
- assert messages[0]["prompt"] == "Look at this"
61
- assert messages[0]["screenshot"] == "base64data"
62
-
63
- @pytest.mark.asyncio
64
- async def test_format_tool_results(self, mock_mcp_client, mock_openai):
65
- """Test formatting tool results."""
66
- agent = OpenAIMCPAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
67
-
68
- tool_calls = [
69
- MCPToolCall(name="test_tool", arguments={}, call_id="call_123"), # type: ignore
70
- MCPToolCall(name="screenshot", arguments={}, call_id="call_456"), # type: ignore
71
- ]
72
-
73
- tool_results = [
74
- types.CallToolResult(
75
- content=[types.TextContent(type="text", text="Success")], isError=False
76
- ),
77
- types.CallToolResult(
78
- content=[types.ImageContent(type="image", data="base64data", mimeType="image/png")],
79
- isError=False,
80
- ),
81
- ]
82
-
83
- messages = await agent.format_tool_results(tool_calls, tool_results)
84
-
85
- # OpenAI's format_tool_results just returns a simple dict with screenshot
86
- assert len(messages) == 1
87
- assert messages[0]["type"] == "tool_result"
88
- assert (
89
- messages[0]["screenshot"] == "base64data"
90
- ) # Should extract screenshot from second result
91
-
92
- @pytest.mark.asyncio
93
- async def test_format_tool_results_with_error(self, mock_mcp_client, mock_openai):
94
- """Test formatting tool results with errors."""
95
- agent = OpenAIMCPAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
96
-
97
- tool_calls = [
98
- MCPToolCall(name="failing_tool", arguments={}, call_id="call_error"), # type: ignore
99
- ]
100
-
101
- tool_results = [
102
- types.CallToolResult(
103
- content=[types.TextContent(type="text", text="Something went wrong")], isError=True
104
- ),
105
- ]
106
-
107
- messages = await agent.format_tool_results(tool_calls, tool_results)
108
-
109
- # Since the result has isError=True, no screenshot should be extracted
110
- assert len(messages) == 1
111
- assert messages[0]["type"] == "tool_result"
112
- assert messages[0]["screenshot"] is None
113
-
114
- @pytest.mark.asyncio
115
- async def test_get_model_response(self, mock_mcp_client, mock_openai):
116
- """Test getting model response from OpenAI API."""
117
- agent = OpenAIMCPAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
118
-
119
- # Set up available tools so agent doesn't return "No computer use tools available"
120
- agent._available_tools = [
121
- types.Tool(name="computer_openai", description="Computer tool", inputSchema={})
122
- ]
123
-
124
- # Since OpenAI checks isinstance() on response types, we need to mock that
125
- # For now, let's just test that we get the expected "No computer use tools available"
126
- # when there are no matching tools
127
- agent._available_tools = [
128
- types.Tool(name="other_tool", description="Other tool", inputSchema={})
129
- ]
130
-
131
- messages = [{"prompt": "What's on the screen?", "screenshot": None}]
132
- response = await agent.get_model_response(messages)
133
-
134
- assert response.content == "No computer use tools available"
135
- assert response.tool_calls == []
136
- assert response.done is True
137
-
138
- @pytest.mark.asyncio
139
- async def test_get_model_response_text_only(self, mock_mcp_client, mock_openai):
140
- """Test getting text-only response when no computer tools available."""
141
- agent = OpenAIMCPAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
142
-
143
- # Set up with no computer tools
144
- agent._available_tools = []
145
-
146
- messages = [{"prompt": "Hi", "screenshot": None}]
147
- response = await agent.get_model_response(messages)
148
-
149
- assert response.content == "No computer use tools available"
150
- assert response.tool_calls == []
151
- assert response.done is True
152
-
153
- @pytest.mark.asyncio
154
- async def test_run_with_tools(self, mock_mcp_client, mock_openai):
155
- """Test running agent with tool usage."""
156
- agent = OpenAIMCPAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
157
-
158
- # Mock tool availability
159
- agent._available_tools = [
160
- types.Tool(name="search", description="Search tool", inputSchema={"type": "object"})
161
- ]
162
- agent._tool_map = {
163
- "search": (
164
- "server1",
165
- types.Tool(
166
- name="search", description="Search tool", inputSchema={"type": "object"}
167
- ),
168
- )
169
- }
170
-
171
- # Mock initial response with tool use
172
- initial_choice = MagicMock()
173
- initial_choice.message = MagicMock(
174
- content=None,
175
- tool_calls=[
176
- MagicMock(
177
- id="call_search",
178
- function=MagicMock(name="search", arguments='{"query": "OpenAI news"}'),
179
- )
180
- ],
181
- )
182
-
183
- initial_response = MagicMock()
184
- initial_response.choices = [initial_choice]
185
- initial_response.usage = MagicMock(prompt_tokens=10, completion_tokens=15, total_tokens=25)
186
-
187
- # Mock follow-up response
188
- final_choice = MagicMock()
189
- final_choice.message = MagicMock(
190
- content="Here are the latest OpenAI news...", tool_calls=None
191
- )
192
-
193
- final_response = MagicMock()
194
- final_response.choices = [final_choice]
195
- final_response.usage = MagicMock(prompt_tokens=20, completion_tokens=10, total_tokens=30)
196
-
197
- mock_openai.chat.completions.create = AsyncMock(
198
- side_effect=[initial_response, final_response]
199
- )
200
-
201
- # Mock tool execution
202
- agent.mcp_client.call_tool = AsyncMock(
203
- return_value=types.CallToolResult(
204
- content=[types.TextContent(type="text", text="Search results...")], isError=False
205
- )
206
- )
207
-
208
- # Use a string prompt instead of a task
209
- result = await agent.run("Search for OpenAI news")
210
-
211
- # Since OpenAI integration currently returns "No computer use tools available"
212
- # when the tool isn't a computer tool, we expect this
213
- assert result.content == "No computer use tools available"
214
- assert result.done is True
215
-
216
- @pytest.mark.asyncio
217
- async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
218
- """Test handling empty response from API."""
219
- agent = OpenAIMCPAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
220
-
221
- # Set up available tools
222
- agent._available_tools = [
223
- types.Tool(name="computer_openai", description="Computer tool", inputSchema={})
224
- ]
225
-
226
- # Mock empty response
227
- mock_response = MagicMock()
228
- mock_response.id = "response_empty"
229
- mock_response.state = "completed"
230
- mock_response.output = [] # Empty output
231
-
232
- mock_openai.responses.create = AsyncMock(return_value=mock_response)
233
-
234
- messages = [{"prompt": "Hi", "screenshot": None}]
235
- response = await agent.get_model_response(messages)
236
-
237
- assert response.content == ""
238
- assert response.tool_calls == []
1
+ """Tests for OpenAI MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import AsyncMock, MagicMock, patch
6
+
7
+ import pytest
8
+ from mcp import types
9
+
10
+ from hud.agents.openai import OperatorAgent
11
+ from hud.types import MCPToolCall, MCPToolResult
12
+
13
+
14
+ class TestOperatorAgent:
15
+ """Test OperatorAgent class."""
16
+
17
+ @pytest.fixture
18
+ def mock_mcp_client(self):
19
+ """Create a mock MCP client."""
20
+ mcp_client = MagicMock()
21
+ return mcp_client
22
+
23
+ @pytest.fixture
24
+ def mock_openai(self):
25
+ """Create a mock OpenAI client."""
26
+ with patch("hud.agents.openai.AsyncOpenAI") as mock:
27
+ client = AsyncMock()
28
+ mock.return_value = client
29
+ yield client
30
+
31
+ @pytest.mark.asyncio
32
+ async def test_init(self, mock_mcp_client):
33
+ """Test agent initialization."""
34
+ mock_model_client = MagicMock()
35
+ agent = OperatorAgent(
36
+ mcp_client=mock_mcp_client, model_client=mock_model_client, model="gpt-4"
37
+ )
38
+
39
+ assert agent.model_name == "openai-gpt-4"
40
+ assert agent.model == "gpt-4"
41
+ assert agent.openai_client == mock_model_client
42
+
43
+ @pytest.mark.asyncio
44
+ async def test_format_blocks(self, mock_mcp_client):
45
+ """Test formatting content blocks."""
46
+ mock_model_client = MagicMock()
47
+ agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_model_client)
48
+
49
+ # Test with text blocks
50
+ blocks: list[types.ContentBlock] = [
51
+ types.TextContent(type="text", text="Hello, GPT!"),
52
+ types.TextContent(type="text", text="Another message"),
53
+ ]
54
+
55
+ messages = await agent.format_blocks(blocks)
56
+ assert len(messages) == 2
57
+ assert messages[0] == {"type": "input_text", "text": "Hello, GPT!"}
58
+ assert messages[1] == {"type": "input_text", "text": "Another message"}
59
+
60
+ # Test with mixed content
61
+ blocks = [
62
+ types.TextContent(type="text", text="Text content"),
63
+ types.ImageContent(type="image", data="base64data", mimeType="image/png"),
64
+ ]
65
+
66
+ messages = await agent.format_blocks(blocks)
67
+ assert len(messages) == 2
68
+ assert messages[0] == {"type": "input_text", "text": "Text content"}
69
+ assert messages[1] == {
70
+ "type": "input_image",
71
+ "image_url": "data:image/png;base64,base64data",
72
+ }
73
+
74
+ @pytest.mark.asyncio
75
+ async def test_format_tool_results(self, mock_mcp_client, mock_openai):
76
+ """Test formatting tool results."""
77
+ agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
78
+
79
+ tool_calls = [
80
+ MCPToolCall(name="test_tool", arguments={}, id="call_123"), # type: ignore
81
+ MCPToolCall(name="screenshot", arguments={}, id="call_456"), # type: ignore
82
+ ]
83
+
84
+ tool_results = [
85
+ MCPToolResult(content=[types.TextContent(type="text", text="Success")], isError=False),
86
+ MCPToolResult(
87
+ content=[types.ImageContent(type="image", data="base64data", mimeType="image/png")],
88
+ isError=False,
89
+ ),
90
+ ]
91
+
92
+ messages = await agent.format_tool_results(tool_calls, tool_results)
93
+
94
+ # OpenAI's format_tool_results returns input_image with screenshot
95
+ assert len(messages) == 1
96
+ assert messages[0]["type"] == "input_image"
97
+ assert "image_url" in messages[0]
98
+ assert messages[0]["image_url"] == "data:image/png;base64,base64data"
99
+
100
+ @pytest.mark.asyncio
101
+ async def test_format_tool_results_with_error(self, mock_mcp_client, mock_openai):
102
+ """Test formatting tool results with errors."""
103
+ agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
104
+
105
+ tool_calls = [
106
+ MCPToolCall(name="failing_tool", arguments={}, id="call_error"), # type: ignore
107
+ ]
108
+
109
+ tool_results = [
110
+ MCPToolResult(
111
+ content=[types.TextContent(type="text", text="Something went wrong")], isError=True
112
+ ),
113
+ ]
114
+
115
+ messages = await agent.format_tool_results(tool_calls, tool_results)
116
+
117
+ # Since the result has isError=True and no screenshot, returns empty list
118
+ assert len(messages) == 0
119
+
120
+ @pytest.mark.asyncio
121
+ async def test_get_model_response(self, mock_mcp_client, mock_openai):
122
+ """Test getting model response from OpenAI API."""
123
+ agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
124
+
125
+ # Set up available tools so agent doesn't return "No computer use tools available"
126
+ agent._available_tools = [
127
+ types.Tool(name="computer_openai", description="Computer tool", inputSchema={})
128
+ ]
129
+
130
+ # Since OpenAI checks isinstance() on response types, we need to mock that
131
+ # For now, let's just test that we get the expected "No computer use tools available"
132
+ # when there are no matching tools
133
+ agent._available_tools = [
134
+ types.Tool(name="other_tool", description="Other tool", inputSchema={})
135
+ ]
136
+
137
+ messages = [{"prompt": "What's on the screen?", "screenshot": None}]
138
+ response = await agent.get_response(messages)
139
+
140
+ assert response.content == "No computer use tools available"
141
+ assert response.tool_calls == []
142
+ assert response.done is True
143
+
144
+ @pytest.mark.asyncio
145
+ async def test_get_model_response_text_only(self, mock_mcp_client, mock_openai):
146
+ """Test getting text-only response when no computer tools available."""
147
+ agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
148
+
149
+ # Set up with no computer tools
150
+ agent._available_tools = []
151
+
152
+ messages = [{"prompt": "Hi", "screenshot": None}]
153
+ response = await agent.get_response(messages)
154
+
155
+ assert response.content == "No computer use tools available"
156
+ assert response.tool_calls == []
157
+ assert response.done is True
158
+
159
+ @pytest.mark.asyncio
160
+ async def test_run_with_tools(self, mock_mcp_client, mock_openai):
161
+ """Test running agent with tool usage."""
162
+ agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
163
+
164
+ # Mock tool availability
165
+ agent._available_tools = [
166
+ types.Tool(name="search", description="Search tool", inputSchema={"type": "object"})
167
+ ]
168
+ # Base agent doesn't require server mapping for tool execution
169
+
170
+ # Mock initial response with tool use
171
+ initial_choice = MagicMock()
172
+ initial_choice.message = MagicMock(
173
+ content=None,
174
+ tool_calls=[
175
+ MagicMock(
176
+ id="call_search",
177
+ function=MagicMock(name="search", arguments='{"query": "OpenAI news"}'),
178
+ )
179
+ ],
180
+ )
181
+
182
+ initial_response = MagicMock()
183
+ initial_response.choices = [initial_choice]
184
+ initial_response.usage = MagicMock(prompt_tokens=10, completion_tokens=15, total_tokens=25)
185
+
186
+ # Mock follow-up response
187
+ final_choice = MagicMock()
188
+ final_choice.message = MagicMock(
189
+ content="Here are the latest OpenAI news...", tool_calls=None
190
+ )
191
+
192
+ final_response = MagicMock()
193
+ final_response.choices = [final_choice]
194
+ final_response.usage = MagicMock(prompt_tokens=20, completion_tokens=10, total_tokens=30)
195
+
196
+ mock_openai.chat.completions.create = AsyncMock(
197
+ side_effect=[initial_response, final_response]
198
+ )
199
+
200
+ # Mock tool execution
201
+ mock_mcp_client.call_tool = AsyncMock(
202
+ return_value=MCPToolResult(
203
+ content=[types.TextContent(type="text", text="Search results...")], isError=False
204
+ )
205
+ )
206
+
207
+ # Use a string prompt instead of a task
208
+ result = await agent.run("Search for OpenAI news")
209
+
210
+ # Since OpenAI integration currently returns "No computer use tools available"
211
+ # when the tool isn't a computer tool, we expect this
212
+ assert result.content == "No computer use tools available"
213
+ assert result.done is True
214
+
215
+ @pytest.mark.asyncio
216
+ async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
217
+ """Test handling empty response from API."""
218
+ agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
219
+
220
+ # Set up available tools
221
+ agent._available_tools = [
222
+ types.Tool(name="openai_computer", description="Computer tool", inputSchema={})
223
+ ]
224
+
225
+ # Mock empty response
226
+ mock_response = MagicMock()
227
+ mock_response.id = "response_empty"
228
+ mock_response.state = "completed"
229
+ mock_response.output = [] # Empty output
230
+
231
+ mock_openai.responses.create = AsyncMock(return_value=mock_response)
232
+
233
+ messages = [{"prompt": "Hi", "screenshot": None}]
234
+ response = await agent.get_response(messages)
235
+
236
+ assert response.content == ""
237
+ assert response.tool_calls == []