cua-agent 0.1.6__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (57) hide show
  1. agent/__init__.py +3 -2
  2. agent/core/__init__.py +1 -6
  3. agent/core/{computer_agent.py → agent.py} +31 -76
  4. agent/core/{loop.py → base.py} +68 -127
  5. agent/core/factory.py +104 -0
  6. agent/core/messages.py +279 -125
  7. agent/core/provider_config.py +15 -0
  8. agent/core/types.py +45 -0
  9. agent/core/visualization.py +197 -0
  10. agent/providers/anthropic/api/client.py +142 -1
  11. agent/providers/anthropic/api_handler.py +140 -0
  12. agent/providers/anthropic/callbacks/__init__.py +5 -0
  13. agent/providers/anthropic/loop.py +207 -221
  14. agent/providers/anthropic/response_handler.py +226 -0
  15. agent/providers/anthropic/tools/bash.py +0 -97
  16. agent/providers/anthropic/utils.py +368 -0
  17. agent/providers/omni/__init__.py +1 -20
  18. agent/providers/omni/api_handler.py +42 -0
  19. agent/providers/omni/clients/anthropic.py +4 -0
  20. agent/providers/omni/image_utils.py +0 -72
  21. agent/providers/omni/loop.py +491 -607
  22. agent/providers/omni/parser.py +58 -4
  23. agent/providers/omni/tools/__init__.py +25 -7
  24. agent/providers/omni/tools/base.py +29 -0
  25. agent/providers/omni/tools/bash.py +43 -38
  26. agent/providers/omni/tools/computer.py +144 -182
  27. agent/providers/omni/tools/manager.py +25 -45
  28. agent/providers/omni/types.py +1 -3
  29. agent/providers/omni/utils.py +224 -145
  30. agent/providers/openai/__init__.py +6 -0
  31. agent/providers/openai/api_handler.py +453 -0
  32. agent/providers/openai/loop.py +440 -0
  33. agent/providers/openai/response_handler.py +205 -0
  34. agent/providers/openai/tools/__init__.py +15 -0
  35. agent/providers/openai/tools/base.py +79 -0
  36. agent/providers/openai/tools/computer.py +319 -0
  37. agent/providers/openai/tools/manager.py +106 -0
  38. agent/providers/openai/types.py +36 -0
  39. agent/providers/openai/utils.py +98 -0
  40. cua_agent-0.1.18.dist-info/METADATA +165 -0
  41. cua_agent-0.1.18.dist-info/RECORD +73 -0
  42. agent/README.md +0 -63
  43. agent/providers/anthropic/messages/manager.py +0 -112
  44. agent/providers/omni/callbacks.py +0 -78
  45. agent/providers/omni/clients/groq.py +0 -101
  46. agent/providers/omni/experiment.py +0 -276
  47. agent/providers/omni/messages.py +0 -171
  48. agent/providers/omni/tool_manager.py +0 -91
  49. agent/providers/omni/visualization.py +0 -130
  50. agent/types/__init__.py +0 -23
  51. agent/types/base.py +0 -41
  52. agent/types/messages.py +0 -36
  53. cua_agent-0.1.6.dist-info/METADATA +0 -120
  54. cua_agent-0.1.6.dist-info/RECORD +0 -64
  55. /agent/{types → core}/tools.py +0 -0
  56. {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/WHEEL +0 -0
  57. {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,440 @@
1
+ """OpenAI Agent Response API provider implementation."""
2
+
3
+ import logging
4
+ import asyncio
5
+ import base64
6
+ from typing import Any, Dict, List, Optional, AsyncGenerator, Callable, Awaitable, TYPE_CHECKING
7
+
8
+ from computer import Computer
9
+ from ...core.base import BaseLoop
10
+ from ...core.types import AgentResponse
11
+ from ...core.messages import StandardMessageManager, ImageRetentionConfig
12
+
13
+ from .api_handler import OpenAIAPIHandler
14
+ from .response_handler import OpenAIResponseHandler
15
+ from .tools.manager import ToolManager
16
+ from .types import LLMProvider, ResponseItemType
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class OpenAILoop(BaseLoop):
22
+ """OpenAI-specific implementation of the agent loop.
23
+
24
+ This class extends BaseLoop to provide specialized support for OpenAI's Agent Response API
25
+ with computer control capabilities.
26
+ """
27
+
28
+ ###########################################
29
+ # INITIALIZATION AND CONFIGURATION
30
+ ###########################################
31
+
32
+ def __init__(
33
+ self,
34
+ api_key: str,
35
+ computer: Computer,
36
+ model: str = "computer-use-preview",
37
+ only_n_most_recent_images: Optional[int] = 2,
38
+ base_dir: Optional[str] = "trajectories",
39
+ max_retries: int = 3,
40
+ retry_delay: float = 1.0,
41
+ save_trajectory: bool = True,
42
+ acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
43
+ **kwargs,
44
+ ):
45
+ """Initialize the OpenAI loop.
46
+
47
+ Args:
48
+ api_key: OpenAI API key
49
+ model: Model name (ignored, always uses computer-use-preview)
50
+ computer: Computer instance
51
+ only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
52
+ base_dir: Base directory for saving experiment data
53
+ max_retries: Maximum number of retries for API calls
54
+ retry_delay: Delay between retries in seconds
55
+ save_trajectory: Whether to save trajectory data
56
+ acknowledge_safety_check_callback: Optional callback for safety check acknowledgment
57
+ **kwargs: Additional provider-specific arguments
58
+ """
59
+ # Always use computer-use-preview model
60
+ if model != "computer-use-preview":
61
+ logger.info(
62
+ f"Overriding provided model '{model}' with required model 'computer-use-preview'"
63
+ )
64
+
65
+ # Initialize base class with core config
66
+ super().__init__(
67
+ computer=computer,
68
+ model="computer-use-preview", # Always use computer-use-preview
69
+ api_key=api_key,
70
+ max_retries=max_retries,
71
+ retry_delay=retry_delay,
72
+ base_dir=base_dir,
73
+ save_trajectory=save_trajectory,
74
+ only_n_most_recent_images=only_n_most_recent_images,
75
+ **kwargs,
76
+ )
77
+
78
+ # Initialize message manager
79
+ self.message_manager = StandardMessageManager(
80
+ config=ImageRetentionConfig(num_images_to_keep=only_n_most_recent_images)
81
+ )
82
+
83
+ # OpenAI-specific attributes
84
+ self.provider = LLMProvider.OPENAI
85
+ self.client = None
86
+ self.retry_count = 0
87
+ self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
88
+ self.queue = asyncio.Queue() # Initialize queue
89
+ self.last_response_id = None # Store the last response ID across runs
90
+
91
+ # Initialize handlers
92
+ self.api_handler = OpenAIAPIHandler(self)
93
+ self.response_handler = OpenAIResponseHandler(self)
94
+
95
+ # Initialize tool manager with callback
96
+ self.tool_manager = ToolManager(
97
+ computer=computer, acknowledge_safety_check_callback=acknowledge_safety_check_callback
98
+ )
99
+
100
+ ###########################################
101
+ # CLIENT INITIALIZATION - IMPLEMENTING ABSTRACT METHOD
102
+ ###########################################
103
+
104
+ async def initialize_client(self) -> None:
105
+ """Initialize the OpenAI API client and tools.
106
+
107
+ Implements abstract method from BaseLoop to set up the OpenAI-specific
108
+ client, tool manager, and message manager.
109
+ """
110
+ try:
111
+ # Initialize tool manager
112
+ await self.tool_manager.initialize()
113
+ except Exception as e:
114
+ logger.error(f"Error initializing OpenAI client: {str(e)}")
115
+ self.client = None
116
+ raise RuntimeError(f"Failed to initialize OpenAI client: {str(e)}")
117
+
118
+ ###########################################
119
+ # MAIN LOOP - IMPLEMENTING ABSTRACT METHOD
120
+ ###########################################
121
+
122
+ async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
123
+ """Run the agent loop with provided messages.
124
+
125
+ Args:
126
+ messages: List of message objects in standard format
127
+
128
+ Yields:
129
+ Agent response format
130
+ """
131
+ try:
132
+ logger.info("Starting OpenAI loop run")
133
+
134
+ # Create queue for response streaming
135
+ queue = asyncio.Queue()
136
+
137
+ # Ensure tool manager is initialized
138
+ await self.tool_manager.initialize()
139
+
140
+ # Start loop in background task
141
+ loop_task = asyncio.create_task(self._run_loop(queue, messages))
142
+
143
+ # Process and yield messages as they arrive
144
+ while True:
145
+ try:
146
+ item = await queue.get()
147
+ if item is None: # Stop signal
148
+ break
149
+ yield item
150
+ queue.task_done()
151
+ except Exception as e:
152
+ logger.error(f"Error processing queue item: {str(e)}")
153
+ continue
154
+
155
+ # Wait for loop to complete
156
+ await loop_task
157
+
158
+ # Send completion message
159
+ yield {
160
+ "role": "assistant",
161
+ "content": "Task completed successfully.",
162
+ "metadata": {"title": "✅ Complete"},
163
+ }
164
+
165
+ except Exception as e:
166
+ logger.error(f"Error executing task: {str(e)}")
167
+ yield {
168
+ "role": "assistant",
169
+ "content": f"Error: {str(e)}",
170
+ "metadata": {"title": "❌ Error"},
171
+ }
172
+
173
+ ###########################################
174
+ # AGENT LOOP IMPLEMENTATION
175
+ ###########################################
176
+
177
+ async def _run_loop(self, queue: asyncio.Queue, messages: List[Dict[str, Any]]) -> None:
178
+ """Run the agent loop with provided messages.
179
+
180
+ Args:
181
+ queue: Queue for response streaming
182
+ messages: List of messages in standard format
183
+ """
184
+ try:
185
+ # Use the instance-level last_response_id instead of creating a local variable
186
+ # This way it persists between runs
187
+
188
+ # Capture initial screenshot
189
+ try:
190
+ # Take screenshot
191
+ screenshot = await self.computer.interface.screenshot()
192
+ logger.info("Screenshot captured successfully")
193
+
194
+ # Convert to base64 if needed
195
+ if isinstance(screenshot, bytes):
196
+ screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
197
+ else:
198
+ screenshot_base64 = screenshot
199
+
200
+ # Save screenshot if requested
201
+ if self.save_trajectory:
202
+ # Ensure screenshot_base64 is a string
203
+ if not isinstance(screenshot_base64, str):
204
+ logger.warning(
205
+ "Converting non-string screenshot_base64 to string for _save_screenshot"
206
+ )
207
+ if isinstance(screenshot_base64, (bytearray, memoryview)):
208
+ screenshot_base64 = base64.b64encode(screenshot_base64).decode("utf-8")
209
+ self._save_screenshot(screenshot_base64, action_type="state")
210
+ logger.info("Screenshot saved to trajectory")
211
+
212
+ # First add any existing user messages that were passed to run()
213
+ user_query = None
214
+ for msg in messages:
215
+ if msg.get("role") == "user":
216
+ user_content = msg.get("content", "")
217
+ if isinstance(user_content, str) and user_content:
218
+ user_query = user_content
219
+ # Add the user's original query to the message manager
220
+ self.message_manager.add_user_message(
221
+ [{"type": "text", "text": user_content}]
222
+ )
223
+ break
224
+
225
+ # Add screenshot to message manager
226
+ message_content = [
227
+ {
228
+ "type": "image",
229
+ "source": {
230
+ "type": "base64",
231
+ "media_type": "image/png",
232
+ "data": screenshot_base64,
233
+ },
234
+ }
235
+ ]
236
+
237
+ # Add appropriate text with the screenshot
238
+ message_content.append(
239
+ {
240
+ "type": "text",
241
+ "text": user_query,
242
+ }
243
+ )
244
+
245
+ # Add the screenshot and text to the message manager
246
+ self.message_manager.add_user_message(message_content)
247
+
248
+ # Process user request and convert our standard message format to one OpenAI expects
249
+ messages = self.message_manager.messages
250
+ logger.info(f"Starting agent loop with {len(messages)} messages")
251
+
252
+ # Create initial turn directory
253
+ if self.save_trajectory:
254
+ self._create_turn_dir()
255
+
256
+ # Call API
257
+ screen_size = await self.computer.interface.get_screen_size()
258
+ response = await self.api_handler.send_initial_request(
259
+ messages=messages,
260
+ display_width=str(screen_size["width"]),
261
+ display_height=str(screen_size["height"]),
262
+ previous_response_id=self.last_response_id,
263
+ )
264
+
265
+ # Store response ID for next request
266
+ # OpenAI API response structure: the ID is in the response dictionary
267
+ if isinstance(response, dict) and "id" in response:
268
+ self.last_response_id = response["id"] # Update instance variable
269
+ logger.info(f"Received response with ID: {self.last_response_id}")
270
+ else:
271
+ logger.warning(
272
+ f"Could not find response ID in OpenAI response: {type(response)}"
273
+ )
274
+ # Don't reset last_response_id to None - keep the previous value if available
275
+
276
+ # Process API response
277
+ await queue.put(response)
278
+
279
+ # Loop to continue processing responses until task is complete
280
+ task_complete = False
281
+ while not task_complete:
282
+ # Check if there are any computer calls
283
+ output_items = response.get("output", []) or []
284
+ computer_calls = [
285
+ item for item in output_items if item.get("type") == "computer_call"
286
+ ]
287
+
288
+ if not computer_calls:
289
+ logger.info("No computer calls in response, task may be complete.")
290
+ task_complete = True
291
+ continue
292
+
293
+ # Process the first computer call
294
+ computer_call = computer_calls[0]
295
+ action = computer_call.get("action", {})
296
+ call_id = computer_call.get("call_id")
297
+
298
+ # Check for safety checks
299
+ pending_safety_checks = computer_call.get("pending_safety_checks", [])
300
+ acknowledged_safety_checks = []
301
+
302
+ if pending_safety_checks:
303
+ # Log safety checks
304
+ for check in pending_safety_checks:
305
+ logger.warning(
306
+ f"Safety check: {check.get('code')} - {check.get('message')}"
307
+ )
308
+
309
+ # If we have a callback, use it to acknowledge safety checks
310
+ if self.acknowledge_safety_check_callback:
311
+ acknowledged = await self.acknowledge_safety_check_callback(
312
+ pending_safety_checks
313
+ )
314
+ if not acknowledged:
315
+ logger.warning("Safety check acknowledgment failed")
316
+ await queue.put(
317
+ {
318
+ "role": "assistant",
319
+ "content": "Safety checks were not acknowledged. Cannot proceed with action.",
320
+ "metadata": {"title": "⚠️ Safety Warning"},
321
+ }
322
+ )
323
+ continue
324
+ acknowledged_safety_checks = pending_safety_checks
325
+
326
+ # Execute the action
327
+ try:
328
+ # Create a new turn directory for this action if saving trajectories
329
+ if self.save_trajectory:
330
+ self._create_turn_dir()
331
+
332
+ # Execute the tool
333
+ result = await self.tool_manager.execute_tool("computer", action)
334
+
335
+ # Take screenshot after action
336
+ screenshot = await self.computer.interface.screenshot()
337
+ if isinstance(screenshot, bytes):
338
+ screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
339
+ else:
340
+ screenshot_base64 = screenshot
341
+
342
+ # Create computer_call_output
343
+ computer_call_output = {
344
+ "type": "computer_call_output",
345
+ "call_id": call_id,
346
+ "output": {
347
+ "type": "input_image",
348
+ "image_url": f"data:image/png;base64,{screenshot_base64}",
349
+ },
350
+ }
351
+
352
+ # Add acknowledged safety checks if any
353
+ if acknowledged_safety_checks:
354
+ computer_call_output["acknowledged_safety_checks"] = (
355
+ acknowledged_safety_checks
356
+ )
357
+
358
+ # Save to message manager for history
359
+ self.message_manager.add_system_message(
360
+ f"[Computer action executed: {action.get('type')}]"
361
+ )
362
+ self.message_manager.add_user_message([computer_call_output])
363
+
364
+ # For follow-up requests with previous_response_id, we only need to send
365
+ # the computer_call_output, not the full message history
366
+ # The API handler will extract this from the message history
367
+ if isinstance(self.last_response_id, str):
368
+ response = await self.api_handler.send_computer_call_request(
369
+ messages=self.message_manager.messages,
370
+ display_width=str(screen_size["width"]),
371
+ display_height=str(screen_size["height"]),
372
+ previous_response_id=self.last_response_id, # Use instance variable
373
+ )
374
+
375
+ # Store response ID for next request
376
+ if isinstance(response, dict) and "id" in response:
377
+ self.last_response_id = response["id"] # Update instance variable
378
+ logger.info(f"Received response with ID: {self.last_response_id}")
379
+ else:
380
+ logger.warning(
381
+ f"Could not find response ID in OpenAI response: {type(response)}"
382
+ )
383
+ # Keep using the previous response ID if we can't find a new one
384
+
385
+ # Process the response
386
+ # await self.response_handler.process_response(response, queue)
387
+ await queue.put(response)
388
+ except Exception as e:
389
+ logger.error(f"Error executing computer action: {str(e)}")
390
+ await queue.put(
391
+ {
392
+ "role": "assistant",
393
+ "content": f"Error executing action: {str(e)}",
394
+ "metadata": {"title": "❌ Error"},
395
+ }
396
+ )
397
+ task_complete = True
398
+
399
+ except Exception as e:
400
+ logger.error(f"Error capturing initial screenshot: {str(e)}")
401
+ await queue.put(
402
+ {
403
+ "role": "assistant",
404
+ "content": f"Error capturing screenshot: {str(e)}",
405
+ "metadata": {"title": "❌ Error"},
406
+ }
407
+ )
408
+ await queue.put(None) # Signal that we're done
409
+ return
410
+
411
+ # Signal that we're done
412
+ await queue.put(None)
413
+
414
+ except Exception as e:
415
+ logger.error(f"Error in _run_loop: {str(e)}")
416
+ await queue.put(
417
+ {
418
+ "role": "assistant",
419
+ "content": f"Error: {str(e)}",
420
+ "metadata": {"title": "❌ Error"},
421
+ }
422
+ )
423
+ await queue.put(None) # Signal that we're done
424
+
425
+ def get_last_response_id(self) -> Optional[str]:
426
+ """Get the last response ID.
427
+
428
+ Returns:
429
+ The last response ID or None if no response has been received
430
+ """
431
+ return self.last_response_id
432
+
433
+ def set_last_response_id(self, response_id: str) -> None:
434
+ """Set the last response ID.
435
+
436
+ Args:
437
+ response_id: OpenAI response ID to set
438
+ """
439
+ self.last_response_id = response_id
440
+ logger.info(f"Manually set response ID to: {self.last_response_id}")
@@ -0,0 +1,205 @@
1
+ """Response handler for the OpenAI provider."""
2
+
3
+ import logging
4
+ import asyncio
5
+ import traceback
6
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING, AsyncGenerator
7
+ import base64
8
+
9
+ from ...core.types import AgentResponse
10
+ from .types import ResponseItemType
11
+
12
+ if TYPE_CHECKING:
13
+ from .loop import OpenAILoop
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class OpenAIResponseHandler:
19
+ """Handler for OpenAI API responses."""
20
+
21
+ def __init__(self, loop: "OpenAILoop"):
22
+ """Initialize the response handler.
23
+
24
+ Args:
25
+ loop: OpenAI loop instance
26
+ """
27
+ self.loop = loop
28
+ logger.info("Initialized OpenAI response handler")
29
+
30
+ async def process_response(self, response: Dict[str, Any], queue: asyncio.Queue) -> None:
31
+ """Process the response from the OpenAI API.
32
+
33
+ Args:
34
+ response: Response from the API
35
+ queue: Queue for response streaming
36
+ """
37
+ try:
38
+ # Get output items
39
+ output_items = response.get("output", []) or []
40
+
41
+ # Process each output item
42
+ for item in output_items:
43
+ if not isinstance(item, dict):
44
+ continue
45
+
46
+ item_type = item.get("type")
47
+
48
+ # For computer_call items, we only need to add to the queue
49
+ # The loop is now handling executing the action and creating the computer_call_output
50
+ if item_type == ResponseItemType.COMPUTER_CALL:
51
+ # Send computer_call to queue so it can be processed
52
+ await queue.put(item)
53
+
54
+ elif item_type == ResponseItemType.MESSAGE:
55
+ # Send message to queue
56
+ await queue.put(item)
57
+
58
+ elif item_type == ResponseItemType.REASONING:
59
+ # Process reasoning summary
60
+ summary = None
61
+ if "summary" in item and isinstance(item["summary"], list):
62
+ for summary_item in item["summary"]:
63
+ if (
64
+ isinstance(summary_item, dict)
65
+ and summary_item.get("type") == "summary_text"
66
+ ):
67
+ summary = summary_item.get("text")
68
+ break
69
+
70
+ if summary:
71
+ # Log the reasoning summary
72
+ logger.info(f"Reasoning summary: {summary}")
73
+
74
+ # Send reasoning summary to queue with a special format
75
+ await queue.put(
76
+ {
77
+ "role": "assistant",
78
+ "content": f"[Reasoning: {summary}]",
79
+ "metadata": {"title": "💭 Reasoning", "is_summary": True},
80
+ }
81
+ )
82
+
83
+ # Also pass the original reasoning item to the queue for complete context
84
+ await queue.put(item)
85
+
86
+ except Exception as e:
87
+ logger.error(f"Error processing response: {str(e)}")
88
+ await queue.put(
89
+ {
90
+ "role": "assistant",
91
+ "content": f"Error processing response: {str(e)}",
92
+ "metadata": {"title": "❌ Error"},
93
+ }
94
+ )
95
+
96
+ def _process_message_item(self, item: Dict[str, Any]) -> AgentResponse:
97
+ """Process a message item from the response.
98
+
99
+ Args:
100
+ item: Message item from the response
101
+
102
+ Returns:
103
+ Processed message in AgentResponse format
104
+ """
105
+ # Extract content items - add null check
106
+ content_items = item.get("content", []) or []
107
+
108
+ # Extract text from content items - use output_text type from OpenAI
109
+ text = ""
110
+ for content_item in content_items:
111
+ # Skip if content_item is None or not a dict
112
+ if content_item is None or not isinstance(content_item, dict):
113
+ continue
114
+
115
+ # In OpenAI Agent Response API, text content is in "output_text" type items
116
+ if content_item.get("type") == "output_text":
117
+ text += content_item.get("text", "")
118
+
119
+ # Create agent response
120
+ return {
121
+ "role": "assistant",
122
+ "content": text
123
+ or "I don't have a response for that right now.", # Provide fallback when text is empty
124
+ "metadata": {"title": "💬 Response"},
125
+ }
126
+
127
+ async def _process_computer_call(self, item: Dict[str, Any], queue: asyncio.Queue) -> None:
128
+ """Process a computer call item from the response.
129
+
130
+ Args:
131
+ item: Computer call item
132
+ queue: Queue to add responses to
133
+ """
134
+ try:
135
+ # Log the computer call
136
+ action = item.get("action", {}) or {}
137
+ if not isinstance(action, dict):
138
+ logger.warning(f"Expected dict for action, got {type(action)}")
139
+ action = {}
140
+
141
+ action_type = action.get("type", "unknown")
142
+ logger.info(f"Processing computer call: {action_type}")
143
+
144
+ # Execute the tool call
145
+ result = await self.loop.tool_manager.execute_tool("computer", action)
146
+
147
+ # Add any message to the conversation history and queue
148
+ if result and result.base64_image:
149
+ # Update message history with the call output
150
+ self.loop.message_manager.add_user_message(
151
+ [{"type": "text", "text": f"[Computer action completed: {action_type}]"}]
152
+ )
153
+
154
+ # Add image to messages (using correct content types for Agent Response API)
155
+ self.loop.message_manager.add_user_message(
156
+ [
157
+ {
158
+ "type": "image",
159
+ "source": {
160
+ "type": "base64",
161
+ "media_type": "image/png",
162
+ "data": result.base64_image,
163
+ },
164
+ }
165
+ ]
166
+ )
167
+
168
+ # If browser environment, include URL if available
169
+ # if (
170
+ # hasattr(self.loop.computer, "environment")
171
+ # and self.loop.computer.environment == "browser"
172
+ # ):
173
+ # try:
174
+ # if hasattr(self.loop.computer.interface, "get_current_url"):
175
+ # current_url = await self.loop.computer.interface.get_current_url()
176
+ # self.loop.message_manager.add_user_message(
177
+ # [
178
+ # {
179
+ # "type": "text",
180
+ # "text": f"Current URL: {current_url}",
181
+ # }
182
+ # ]
183
+ # )
184
+ # except Exception as e:
185
+ # logger.warning(f"Failed to get current URL: {str(e)}")
186
+
187
+ # Log successful completion
188
+ logger.info(f"Computer call {action_type} executed successfully")
189
+
190
+ except Exception as e:
191
+ logger.error(f"Error executing computer call: {str(e)}")
192
+ logger.debug(traceback.format_exc())
193
+
194
+ # Add error to conversation
195
+ self.loop.message_manager.add_user_message(
196
+ [{"type": "text", "text": f"Error executing computer action: {str(e)}"}]
197
+ )
198
+
199
+ # Send error to queue
200
+ error_response = {
201
+ "role": "assistant",
202
+ "content": f"Error executing computer action: {str(e)}",
203
+ "metadata": {"title": "❌ Error"},
204
+ }
205
+ await queue.put(error_response)
@@ -0,0 +1,15 @@
1
+ """OpenAI tools module for computer control."""
2
+
3
+ from .manager import ToolManager
4
+ from .computer import ComputerTool
5
+ from .base import BaseOpenAITool, ToolResult, ToolError, ToolFailure, CLIResult
6
+
7
+ __all__ = [
8
+ "ToolManager",
9
+ "ComputerTool",
10
+ "BaseOpenAITool",
11
+ "ToolResult",
12
+ "ToolError",
13
+ "ToolFailure",
14
+ "CLIResult",
15
+ ]