cua-agent 0.3.2__py3-none-any.whl → 0.4.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (111) hide show
  1. agent/__init__.py +15 -51
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +216 -0
  5. agent/agent.py +577 -0
  6. agent/callbacks/__init__.py +17 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/trajectory_saver.py +305 -0
  13. agent/cli.py +290 -0
  14. agent/computer_handler.py +107 -0
  15. agent/decorators.py +90 -0
  16. agent/loops/__init__.py +11 -0
  17. agent/loops/anthropic.py +728 -0
  18. agent/loops/omniparser.py +339 -0
  19. agent/loops/openai.py +95 -0
  20. agent/loops/uitars.py +688 -0
  21. agent/responses.py +207 -0
  22. agent/types.py +79 -0
  23. agent/ui/__init__.py +7 -1
  24. agent/ui/gradio/__init__.py +6 -19
  25. agent/ui/gradio/app.py +80 -1299
  26. agent/ui/gradio/ui_components.py +703 -0
  27. cua_agent-0.4.0b1.dist-info/METADATA +424 -0
  28. cua_agent-0.4.0b1.dist-info/RECORD +30 -0
  29. agent/core/__init__.py +0 -27
  30. agent/core/agent.py +0 -210
  31. agent/core/base.py +0 -217
  32. agent/core/callbacks.py +0 -200
  33. agent/core/experiment.py +0 -249
  34. agent/core/factory.py +0 -122
  35. agent/core/messages.py +0 -332
  36. agent/core/provider_config.py +0 -21
  37. agent/core/telemetry.py +0 -142
  38. agent/core/tools/__init__.py +0 -21
  39. agent/core/tools/base.py +0 -74
  40. agent/core/tools/bash.py +0 -52
  41. agent/core/tools/collection.py +0 -46
  42. agent/core/tools/computer.py +0 -113
  43. agent/core/tools/edit.py +0 -67
  44. agent/core/tools/manager.py +0 -56
  45. agent/core/tools.py +0 -32
  46. agent/core/types.py +0 -88
  47. agent/core/visualization.py +0 -197
  48. agent/providers/__init__.py +0 -4
  49. agent/providers/anthropic/__init__.py +0 -6
  50. agent/providers/anthropic/api/client.py +0 -360
  51. agent/providers/anthropic/api/logging.py +0 -150
  52. agent/providers/anthropic/api_handler.py +0 -140
  53. agent/providers/anthropic/callbacks/__init__.py +0 -5
  54. agent/providers/anthropic/callbacks/manager.py +0 -65
  55. agent/providers/anthropic/loop.py +0 -568
  56. agent/providers/anthropic/prompts.py +0 -23
  57. agent/providers/anthropic/response_handler.py +0 -226
  58. agent/providers/anthropic/tools/__init__.py +0 -33
  59. agent/providers/anthropic/tools/base.py +0 -88
  60. agent/providers/anthropic/tools/bash.py +0 -66
  61. agent/providers/anthropic/tools/collection.py +0 -34
  62. agent/providers/anthropic/tools/computer.py +0 -396
  63. agent/providers/anthropic/tools/edit.py +0 -326
  64. agent/providers/anthropic/tools/manager.py +0 -54
  65. agent/providers/anthropic/tools/run.py +0 -42
  66. agent/providers/anthropic/types.py +0 -16
  67. agent/providers/anthropic/utils.py +0 -381
  68. agent/providers/omni/__init__.py +0 -8
  69. agent/providers/omni/api_handler.py +0 -42
  70. agent/providers/omni/clients/anthropic.py +0 -103
  71. agent/providers/omni/clients/base.py +0 -35
  72. agent/providers/omni/clients/oaicompat.py +0 -195
  73. agent/providers/omni/clients/ollama.py +0 -122
  74. agent/providers/omni/clients/openai.py +0 -155
  75. agent/providers/omni/clients/utils.py +0 -25
  76. agent/providers/omni/image_utils.py +0 -34
  77. agent/providers/omni/loop.py +0 -990
  78. agent/providers/omni/parser.py +0 -307
  79. agent/providers/omni/prompts.py +0 -64
  80. agent/providers/omni/tools/__init__.py +0 -30
  81. agent/providers/omni/tools/base.py +0 -29
  82. agent/providers/omni/tools/bash.py +0 -74
  83. agent/providers/omni/tools/computer.py +0 -179
  84. agent/providers/omni/tools/manager.py +0 -61
  85. agent/providers/omni/utils.py +0 -236
  86. agent/providers/openai/__init__.py +0 -6
  87. agent/providers/openai/api_handler.py +0 -456
  88. agent/providers/openai/loop.py +0 -472
  89. agent/providers/openai/response_handler.py +0 -205
  90. agent/providers/openai/tools/__init__.py +0 -15
  91. agent/providers/openai/tools/base.py +0 -79
  92. agent/providers/openai/tools/computer.py +0 -326
  93. agent/providers/openai/tools/manager.py +0 -106
  94. agent/providers/openai/types.py +0 -36
  95. agent/providers/openai/utils.py +0 -98
  96. agent/providers/uitars/__init__.py +0 -1
  97. agent/providers/uitars/clients/base.py +0 -35
  98. agent/providers/uitars/clients/mlxvlm.py +0 -263
  99. agent/providers/uitars/clients/oaicompat.py +0 -214
  100. agent/providers/uitars/loop.py +0 -660
  101. agent/providers/uitars/prompts.py +0 -63
  102. agent/providers/uitars/tools/__init__.py +0 -1
  103. agent/providers/uitars/tools/computer.py +0 -283
  104. agent/providers/uitars/tools/manager.py +0 -60
  105. agent/providers/uitars/utils.py +0 -264
  106. agent/telemetry.py +0 -21
  107. agent/ui/__main__.py +0 -15
  108. cua_agent-0.3.2.dist-info/METADATA +0 -295
  109. cua_agent-0.3.2.dist-info/RECORD +0 -87
  110. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b1.dist-info}/WHEEL +0 -0
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b1.dist-info}/entry_points.txt +0 -0
@@ -1,472 +0,0 @@
1
- """OpenAI Agent Response API provider implementation."""
2
-
3
- import logging
4
- import asyncio
5
- import base64
6
- from typing import Any, Dict, List, Optional, AsyncGenerator, Callable, Awaitable, TYPE_CHECKING
7
-
8
- from computer import Computer
9
- from ...core.base import BaseLoop
10
- from ...core.types import AgentResponse
11
- from ...core.messages import StandardMessageManager, ImageRetentionConfig
12
-
13
- from .api_handler import OpenAIAPIHandler
14
- from .response_handler import OpenAIResponseHandler
15
- from .tools.manager import ToolManager
16
- from .types import LLMProvider, ResponseItemType
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- class OpenAILoop(BaseLoop):
22
- """OpenAI-specific implementation of the agent loop.
23
-
24
- This class extends BaseLoop to provide specialized support for OpenAI's Agent Response API
25
- with computer control capabilities.
26
- """
27
-
28
- ###########################################
29
- # INITIALIZATION AND CONFIGURATION
30
- ###########################################
31
-
32
- def __init__(
33
- self,
34
- api_key: str,
35
- computer: Computer,
36
- model: str = "computer-use-preview",
37
- only_n_most_recent_images: Optional[int] = 2,
38
- base_dir: Optional[str] = "trajectories",
39
- max_retries: int = 3,
40
- retry_delay: float = 1.0,
41
- save_trajectory: bool = True,
42
- acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
43
- **kwargs,
44
- ):
45
- """Initialize the OpenAI loop.
46
-
47
- Args:
48
- api_key: OpenAI API key
49
- model: Model name (ignored, always uses computer-use-preview)
50
- computer: Computer instance
51
- only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
52
- base_dir: Base directory for saving experiment data
53
- max_retries: Maximum number of retries for API calls
54
- retry_delay: Delay between retries in seconds
55
- save_trajectory: Whether to save trajectory data
56
- acknowledge_safety_check_callback: Optional callback for safety check acknowledgment
57
- **kwargs: Additional provider-specific arguments
58
- """
59
- # Always use computer-use-preview model
60
- if model != "computer-use-preview":
61
- logger.info(
62
- f"Overriding provided model '{model}' with required model 'computer-use-preview'"
63
- )
64
-
65
- # Initialize base class with core config
66
- super().__init__(
67
- computer=computer,
68
- model="computer-use-preview", # Always use computer-use-preview
69
- api_key=api_key,
70
- max_retries=max_retries,
71
- retry_delay=retry_delay,
72
- base_dir=base_dir,
73
- save_trajectory=save_trajectory,
74
- only_n_most_recent_images=only_n_most_recent_images,
75
- **kwargs,
76
- )
77
-
78
- # Initialize message manager
79
- self.message_manager = StandardMessageManager(
80
- config=ImageRetentionConfig(num_images_to_keep=only_n_most_recent_images)
81
- )
82
-
83
- # OpenAI-specific attributes
84
- self.provider = LLMProvider.OPENAI
85
- self.client = None
86
- self.retry_count = 0
87
- self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
88
- self.queue = asyncio.Queue() # Initialize queue
89
- self.last_response_id = None # Store the last response ID across runs
90
- self.loop_task = None # Store the loop task for cancellation
91
-
92
- # Initialize handlers
93
- self.api_handler = OpenAIAPIHandler(self)
94
- self.response_handler = OpenAIResponseHandler(self)
95
-
96
- # Initialize tool manager with callback
97
- self.tool_manager = ToolManager(
98
- computer=computer, acknowledge_safety_check_callback=acknowledge_safety_check_callback
99
- )
100
-
101
- ###########################################
102
- # CLIENT INITIALIZATION - IMPLEMENTING ABSTRACT METHOD
103
- ###########################################
104
-
105
- async def initialize_client(self) -> None:
106
- """Initialize the OpenAI API client and tools.
107
-
108
- Implements abstract method from BaseLoop to set up the OpenAI-specific
109
- client, tool manager, and message manager.
110
- """
111
- try:
112
- # Initialize tool manager
113
- await self.tool_manager.initialize()
114
- except Exception as e:
115
- logger.error(f"Error initializing OpenAI client: {str(e)}")
116
- self.client = None
117
- raise RuntimeError(f"Failed to initialize OpenAI client: {str(e)}")
118
-
119
- ###########################################
120
- # MAIN LOOP - IMPLEMENTING ABSTRACT METHOD
121
- ###########################################
122
-
123
- async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
124
- """Run the agent loop with provided messages.
125
-
126
- Args:
127
- messages: List of message objects in standard format
128
-
129
- Yields:
130
- Agent response format
131
- """
132
- try:
133
- logger.info("Starting OpenAI loop run")
134
-
135
- # Create queue for response streaming
136
- self.queue = asyncio.Queue()
137
-
138
- # Ensure tool manager is initialized
139
- await self.tool_manager.initialize()
140
-
141
- # Start loop in background task
142
- self.loop_task = asyncio.create_task(self._run_loop(self.queue, messages))
143
-
144
- # Process and yield messages as they arrive
145
- while True:
146
- try:
147
- item = await self.queue.get()
148
- if item is None: # Stop signal
149
- break
150
- yield item
151
- self.queue.task_done()
152
- except Exception as e:
153
- logger.error(f"Error processing queue item: {str(e)}")
154
- continue
155
-
156
- # Wait for loop to complete
157
- await self.loop_task
158
-
159
- # Send completion message
160
- yield {
161
- "role": "assistant",
162
- "content": "Task completed successfully.",
163
- "metadata": {"title": "✅ Complete"},
164
- }
165
-
166
- except Exception as e:
167
- logger.error(f"Error executing task: {str(e)}")
168
- yield {
169
- "role": "assistant",
170
- "content": f"Error: {str(e)}",
171
- "metadata": {"title": "❌ Error"},
172
- }
173
-
174
- async def cancel(self) -> None:
175
- """Cancel the currently running agent loop task.
176
-
177
- This method stops the ongoing processing in the agent loop
178
- by cancelling the loop_task if it exists and is running.
179
- """
180
- if self.loop_task and not self.loop_task.done():
181
- logger.info("Cancelling OpenAI loop task")
182
- self.loop_task.cancel()
183
- try:
184
- # Wait for the task to be cancelled with a timeout
185
- await asyncio.wait_for(self.loop_task, timeout=2.0)
186
- except asyncio.TimeoutError:
187
- logger.warning("Timeout while waiting for loop task to cancel")
188
- except asyncio.CancelledError:
189
- logger.info("Loop task cancelled successfully")
190
- except Exception as e:
191
- logger.error(f"Error while cancelling loop task: {str(e)}")
192
- finally:
193
- # Put None in the queue to signal any waiting consumers to stop
194
- await self.queue.put(None)
195
- logger.info("OpenAI loop task cancelled")
196
- else:
197
- logger.info("No active OpenAI loop task to cancel")
198
-
199
- ###########################################
200
- # AGENT LOOP IMPLEMENTATION
201
- ###########################################
202
-
203
- async def _run_loop(self, queue: asyncio.Queue, messages: List[Dict[str, Any]]) -> None:
204
- """Run the agent loop with provided messages.
205
-
206
- Args:
207
- queue: Queue for response streaming
208
- messages: List of messages in standard format
209
- """
210
- try:
211
- # Use the instance-level last_response_id instead of creating a local variable
212
- # This way it persists between runs
213
-
214
- # Capture initial screenshot
215
- try:
216
- # Take screenshot
217
- screenshot = await self.computer.interface.screenshot()
218
- logger.info("Screenshot captured successfully")
219
-
220
- # Convert to base64 if needed
221
- if isinstance(screenshot, bytes):
222
- screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
223
- elif isinstance(screenshot, (bytearray, memoryview)):
224
- screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
225
- else:
226
- screenshot_base64 = str(screenshot)
227
-
228
- # Emit screenshot callbacks
229
- await self.handle_screenshot(screenshot_base64, action_type="initial_state")
230
- self._save_screenshot(screenshot_base64, action_type="state")
231
-
232
- # First add any existing user messages that were passed to run()
233
- user_query = None
234
- for msg in messages:
235
- if msg.get("role") == "user":
236
- user_content = msg.get("content", "")
237
- if isinstance(user_content, str) and user_content:
238
- user_query = user_content
239
- # Add the user's original query to the message manager
240
- self.message_manager.add_user_message(
241
- [{"type": "text", "text": user_content}]
242
- )
243
- break
244
-
245
- # Add screenshot to message manager
246
- message_content = [
247
- {
248
- "type": "image",
249
- "source": {
250
- "type": "base64",
251
- "media_type": "image/png",
252
- "data": screenshot_base64,
253
- },
254
- }
255
- ]
256
-
257
- # Add appropriate text with the screenshot
258
- message_content.append(
259
- {
260
- "type": "text",
261
- "text": user_query,
262
- }
263
- )
264
-
265
- # Add the screenshot and text to the message manager
266
- self.message_manager.add_user_message(message_content)
267
-
268
- # Process user request and convert our standard message format to one OpenAI expects
269
- messages = self.message_manager.messages
270
- logger.info(f"Starting agent loop with {len(messages)} messages")
271
-
272
- # Create initial turn directory
273
- if self.save_trajectory:
274
- self._create_turn_dir()
275
-
276
- # Call API
277
- screen_size = await self.computer.interface.get_screen_size()
278
- response = await self.api_handler.send_initial_request(
279
- messages=self.message_manager.get_messages(), # Apply image retention policy
280
- display_width=str(screen_size["width"]),
281
- display_height=str(screen_size["height"]),
282
- previous_response_id=self.last_response_id,
283
- )
284
-
285
- # Store response ID for next request
286
- # OpenAI API response structure: the ID is in the response dictionary
287
- if isinstance(response, dict) and "id" in response:
288
- self.last_response_id = response["id"] # Update instance variable
289
- logger.info(f"Received response with ID: {self.last_response_id}")
290
- else:
291
- logger.warning(
292
- f"Could not find response ID in OpenAI response: {type(response)}"
293
- )
294
- # Don't reset last_response_id to None - keep the previous value if available
295
-
296
-
297
- # Log standardized response for ease of parsing
298
- # Since this is the openAI responses format, we don't need to convert it to agent response format
299
- self._log_api_call("agent_response", request=None, response=response)
300
- # Process API response
301
- await queue.put(response)
302
-
303
- # Loop to continue processing responses until task is complete
304
- task_complete = False
305
- while not task_complete:
306
- # Check if there are any computer calls
307
- output_items = response.get("output", []) or []
308
- computer_calls = [
309
- item for item in output_items if item.get("type") == "computer_call"
310
- ]
311
-
312
- if not computer_calls:
313
- logger.info("No computer calls in response, task may be complete.")
314
- task_complete = True
315
- continue
316
-
317
- # Process the first computer call
318
- computer_call = computer_calls[0]
319
- action = computer_call.get("action", {})
320
- call_id = computer_call.get("call_id")
321
-
322
- # Check for safety checks
323
- pending_safety_checks = computer_call.get("pending_safety_checks", [])
324
- acknowledged_safety_checks = []
325
-
326
- if pending_safety_checks:
327
- # Log safety checks
328
- for check in pending_safety_checks:
329
- logger.warning(
330
- f"Safety check: {check.get('code')} - {check.get('message')}"
331
- )
332
-
333
- # If we have a callback, use it to acknowledge safety checks
334
- if self.acknowledge_safety_check_callback:
335
- acknowledged = await self.acknowledge_safety_check_callback(
336
- pending_safety_checks
337
- )
338
- if not acknowledged:
339
- logger.warning("Safety check acknowledgment failed")
340
- await queue.put(
341
- {
342
- "role": "assistant",
343
- "content": "Safety checks were not acknowledged. Cannot proceed with action.",
344
- "metadata": {"title": "⚠️ Safety Warning"},
345
- }
346
- )
347
- continue
348
- acknowledged_safety_checks = pending_safety_checks
349
-
350
- # Execute the action
351
- try:
352
- # Create a new turn directory for this action if saving trajectories
353
- if self.save_trajectory:
354
- self._create_turn_dir()
355
-
356
- # Execute the tool
357
- result = await self.tool_manager.execute_tool("computer", action)
358
-
359
- # Take screenshot after action
360
- screenshot = await self.computer.interface.screenshot()
361
- if isinstance(screenshot, bytes):
362
- screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
363
- elif isinstance(screenshot, (bytearray, memoryview)):
364
- screenshot_base64 = base64.b64encode(bytes(screenshot)).decode("utf-8")
365
- else:
366
- screenshot_base64 = str(screenshot)
367
-
368
- # Process screenshot through hooks
369
- action_type = f"after_{action.get('type', 'action')}"
370
- await self.handle_screenshot(screenshot_base64, action_type=action_type)
371
- self._save_screenshot(screenshot_base64, action_type=action_type)
372
-
373
- # Create computer_call_output
374
- computer_call_output = {
375
- "type": "computer_call_output",
376
- "call_id": call_id,
377
- "output": {
378
- "type": "input_image",
379
- "image_url": f"data:image/png;base64,{screenshot_base64}",
380
- },
381
- }
382
-
383
- # Add acknowledged safety checks if any
384
- if acknowledged_safety_checks:
385
- computer_call_output["acknowledged_safety_checks"] = (
386
- acknowledged_safety_checks
387
- )
388
-
389
- # Save to message manager for history
390
- self.message_manager.add_system_message(
391
- f"[Computer action executed: {action.get('type')}]"
392
- )
393
- self.message_manager.add_user_message([computer_call_output])
394
-
395
- # For follow-up requests with previous_response_id, we only need to send
396
- # the computer_call_output, not the full message history
397
- # The API handler will extract this from the message history
398
- if isinstance(self.last_response_id, str):
399
- response = await self.api_handler.send_computer_call_request(
400
- messages=self.message_manager.get_messages(), # Apply image retention policy
401
- display_width=str(screen_size["width"]),
402
- display_height=str(screen_size["height"]),
403
- previous_response_id=self.last_response_id, # Use instance variable
404
- )
405
-
406
- # Store response ID for next request
407
- if isinstance(response, dict) and "id" in response:
408
- self.last_response_id = response["id"] # Update instance variable
409
- logger.info(f"Received response with ID: {self.last_response_id}")
410
- else:
411
- logger.warning(
412
- f"Could not find response ID in OpenAI response: {type(response)}"
413
- )
414
- # Keep using the previous response ID if we can't find a new one
415
-
416
- # Process the response
417
- # await self.response_handler.process_response(response, queue)
418
- self._log_api_call("agent_response", request=None, response=response)
419
- await queue.put(response)
420
- except Exception as e:
421
- logger.error(f"Error executing computer action: {str(e)}")
422
- await queue.put(
423
- {
424
- "role": "assistant",
425
- "content": f"Error executing action: {str(e)}",
426
- "metadata": {"title": "❌ Error"},
427
- }
428
- )
429
- task_complete = True
430
-
431
- except Exception as e:
432
- logger.error(f"Error capturing initial screenshot: {str(e)}")
433
- await queue.put(
434
- {
435
- "role": "assistant",
436
- "content": f"Error capturing screenshot: {str(e)}",
437
- "metadata": {"title": "❌ Error"},
438
- }
439
- )
440
- await queue.put(None) # Signal that we're done
441
- return
442
-
443
- # Signal that we're done
444
- await queue.put(None)
445
-
446
- except Exception as e:
447
- logger.error(f"Error in _run_loop: {str(e)}")
448
- await queue.put(
449
- {
450
- "role": "assistant",
451
- "content": f"Error: {str(e)}",
452
- "metadata": {"title": "❌ Error"},
453
- }
454
- )
455
- await queue.put(None) # Signal that we're done
456
-
457
- def get_last_response_id(self) -> Optional[str]:
458
- """Get the last response ID.
459
-
460
- Returns:
461
- The last response ID or None if no response has been received
462
- """
463
- return self.last_response_id
464
-
465
- def set_last_response_id(self, response_id: str) -> None:
466
- """Set the last response ID.
467
-
468
- Args:
469
- response_id: OpenAI response ID to set
470
- """
471
- self.last_response_id = response_id
472
- logger.info(f"Manually set response ID to: {self.last_response_id}")
@@ -1,205 +0,0 @@
1
- """Response handler for the OpenAI provider."""
2
-
3
- import logging
4
- import asyncio
5
- import traceback
6
- from typing import Any, Dict, List, Optional, TYPE_CHECKING, AsyncGenerator
7
- import base64
8
-
9
- from ...core.types import AgentResponse
10
- from .types import ResponseItemType
11
-
12
- if TYPE_CHECKING:
13
- from .loop import OpenAILoop
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
-
18
- class OpenAIResponseHandler:
19
- """Handler for OpenAI API responses."""
20
-
21
- def __init__(self, loop: "OpenAILoop"):
22
- """Initialize the response handler.
23
-
24
- Args:
25
- loop: OpenAI loop instance
26
- """
27
- self.loop = loop
28
- logger.info("Initialized OpenAI response handler")
29
-
30
- async def process_response(self, response: Dict[str, Any], queue: asyncio.Queue) -> None:
31
- """Process the response from the OpenAI API.
32
-
33
- Args:
34
- response: Response from the API
35
- queue: Queue for response streaming
36
- """
37
- try:
38
- # Get output items
39
- output_items = response.get("output", []) or []
40
-
41
- # Process each output item
42
- for item in output_items:
43
- if not isinstance(item, dict):
44
- continue
45
-
46
- item_type = item.get("type")
47
-
48
- # For computer_call items, we only need to add to the queue
49
- # The loop is now handling executing the action and creating the computer_call_output
50
- if item_type == ResponseItemType.COMPUTER_CALL:
51
- # Send computer_call to queue so it can be processed
52
- await queue.put(item)
53
-
54
- elif item_type == ResponseItemType.MESSAGE:
55
- # Send message to queue
56
- await queue.put(item)
57
-
58
- elif item_type == ResponseItemType.REASONING:
59
- # Process reasoning summary
60
- summary = None
61
- if "summary" in item and isinstance(item["summary"], list):
62
- for summary_item in item["summary"]:
63
- if (
64
- isinstance(summary_item, dict)
65
- and summary_item.get("type") == "summary_text"
66
- ):
67
- summary = summary_item.get("text")
68
- break
69
-
70
- if summary:
71
- # Log the reasoning summary
72
- logger.info(f"Reasoning summary: {summary}")
73
-
74
- # Send reasoning summary to queue with a special format
75
- await queue.put(
76
- {
77
- "role": "assistant",
78
- "content": f"[Reasoning: {summary}]",
79
- "metadata": {"title": "💭 Reasoning", "is_summary": True},
80
- }
81
- )
82
-
83
- # Also pass the original reasoning item to the queue for complete context
84
- await queue.put(item)
85
-
86
- except Exception as e:
87
- logger.error(f"Error processing response: {str(e)}")
88
- await queue.put(
89
- {
90
- "role": "assistant",
91
- "content": f"Error processing response: {str(e)}",
92
- "metadata": {"title": "❌ Error"},
93
- }
94
- )
95
-
96
- def _process_message_item(self, item: Dict[str, Any]) -> AgentResponse:
97
- """Process a message item from the response.
98
-
99
- Args:
100
- item: Message item from the response
101
-
102
- Returns:
103
- Processed message in AgentResponse format
104
- """
105
- # Extract content items - add null check
106
- content_items = item.get("content", []) or []
107
-
108
- # Extract text from content items - use output_text type from OpenAI
109
- text = ""
110
- for content_item in content_items:
111
- # Skip if content_item is None or not a dict
112
- if content_item is None or not isinstance(content_item, dict):
113
- continue
114
-
115
- # In OpenAI Agent Response API, text content is in "output_text" type items
116
- if content_item.get("type") == "output_text":
117
- text += content_item.get("text", "")
118
-
119
- # Create agent response
120
- return {
121
- "role": "assistant",
122
- "content": text
123
- or "I don't have a response for that right now.", # Provide fallback when text is empty
124
- "metadata": {"title": "💬 Response"},
125
- }
126
-
127
- async def _process_computer_call(self, item: Dict[str, Any], queue: asyncio.Queue) -> None:
128
- """Process a computer call item from the response.
129
-
130
- Args:
131
- item: Computer call item
132
- queue: Queue to add responses to
133
- """
134
- try:
135
- # Log the computer call
136
- action = item.get("action", {}) or {}
137
- if not isinstance(action, dict):
138
- logger.warning(f"Expected dict for action, got {type(action)}")
139
- action = {}
140
-
141
- action_type = action.get("type", "unknown")
142
- logger.info(f"Processing computer call: {action_type}")
143
-
144
- # Execute the tool call
145
- result = await self.loop.tool_manager.execute_tool("computer", action)
146
-
147
- # Add any message to the conversation history and queue
148
- if result and result.base64_image:
149
- # Update message history with the call output
150
- self.loop.message_manager.add_user_message(
151
- [{"type": "text", "text": f"[Computer action completed: {action_type}]"}]
152
- )
153
-
154
- # Add image to messages (using correct content types for Agent Response API)
155
- self.loop.message_manager.add_user_message(
156
- [
157
- {
158
- "type": "image",
159
- "source": {
160
- "type": "base64",
161
- "media_type": "image/png",
162
- "data": result.base64_image,
163
- },
164
- }
165
- ]
166
- )
167
-
168
- # If browser environment, include URL if available
169
- # if (
170
- # hasattr(self.loop.computer, "environment")
171
- # and self.loop.computer.environment == "browser"
172
- # ):
173
- # try:
174
- # if hasattr(self.loop.computer.interface, "get_current_url"):
175
- # current_url = await self.loop.computer.interface.get_current_url()
176
- # self.loop.message_manager.add_user_message(
177
- # [
178
- # {
179
- # "type": "text",
180
- # "text": f"Current URL: {current_url}",
181
- # }
182
- # ]
183
- # )
184
- # except Exception as e:
185
- # logger.warning(f"Failed to get current URL: {str(e)}")
186
-
187
- # Log successful completion
188
- logger.info(f"Computer call {action_type} executed successfully")
189
-
190
- except Exception as e:
191
- logger.error(f"Error executing computer call: {str(e)}")
192
- logger.debug(traceback.format_exc())
193
-
194
- # Add error to conversation
195
- self.loop.message_manager.add_user_message(
196
- [{"type": "text", "text": f"Error executing computer action: {str(e)}"}]
197
- )
198
-
199
- # Send error to queue
200
- error_response = {
201
- "role": "assistant",
202
- "content": f"Error executing computer action: {str(e)}",
203
- "metadata": {"title": "❌ Error"},
204
- }
205
- await queue.put(error_response)
@@ -1,15 +0,0 @@
1
- """OpenAI tools module for computer control."""
2
-
3
- from .manager import ToolManager
4
- from .computer import ComputerTool
5
- from .base import BaseOpenAITool, ToolResult, ToolError, ToolFailure, CLIResult
6
-
7
- __all__ = [
8
- "ToolManager",
9
- "ComputerTool",
10
- "BaseOpenAITool",
11
- "ToolResult",
12
- "ToolError",
13
- "ToolFailure",
14
- "CLIResult",
15
- ]