cua-agent 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show
  1. agent/__init__.py +21 -12
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +594 -0
  6. agent/callbacks/__init__.py +19 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/telemetry.py +210 -0
  13. agent/callbacks/trajectory_saver.py +305 -0
  14. agent/cli.py +297 -0
  15. agent/computer_handler.py +107 -0
  16. agent/decorators.py +90 -0
  17. agent/loops/__init__.py +11 -0
  18. agent/loops/anthropic.py +728 -0
  19. agent/loops/omniparser.py +339 -0
  20. agent/loops/openai.py +95 -0
  21. agent/loops/uitars.py +688 -0
  22. agent/responses.py +207 -0
  23. agent/telemetry.py +135 -14
  24. agent/types.py +79 -0
  25. agent/ui/__init__.py +7 -1
  26. agent/ui/__main__.py +2 -13
  27. agent/ui/gradio/__init__.py +6 -19
  28. agent/ui/gradio/app.py +94 -1313
  29. agent/ui/gradio/ui_components.py +721 -0
  30. cua_agent-0.4.0.dist-info/METADATA +424 -0
  31. cua_agent-0.4.0.dist-info/RECORD +33 -0
  32. agent/core/__init__.py +0 -27
  33. agent/core/agent.py +0 -210
  34. agent/core/base.py +0 -217
  35. agent/core/callbacks.py +0 -200
  36. agent/core/experiment.py +0 -249
  37. agent/core/factory.py +0 -122
  38. agent/core/messages.py +0 -332
  39. agent/core/provider_config.py +0 -21
  40. agent/core/telemetry.py +0 -142
  41. agent/core/tools/__init__.py +0 -21
  42. agent/core/tools/base.py +0 -74
  43. agent/core/tools/bash.py +0 -52
  44. agent/core/tools/collection.py +0 -46
  45. agent/core/tools/computer.py +0 -113
  46. agent/core/tools/edit.py +0 -67
  47. agent/core/tools/manager.py +0 -56
  48. agent/core/tools.py +0 -32
  49. agent/core/types.py +0 -88
  50. agent/core/visualization.py +0 -197
  51. agent/providers/__init__.py +0 -4
  52. agent/providers/anthropic/__init__.py +0 -6
  53. agent/providers/anthropic/api/client.py +0 -360
  54. agent/providers/anthropic/api/logging.py +0 -150
  55. agent/providers/anthropic/api_handler.py +0 -140
  56. agent/providers/anthropic/callbacks/__init__.py +0 -5
  57. agent/providers/anthropic/callbacks/manager.py +0 -65
  58. agent/providers/anthropic/loop.py +0 -568
  59. agent/providers/anthropic/prompts.py +0 -23
  60. agent/providers/anthropic/response_handler.py +0 -226
  61. agent/providers/anthropic/tools/__init__.py +0 -33
  62. agent/providers/anthropic/tools/base.py +0 -88
  63. agent/providers/anthropic/tools/bash.py +0 -66
  64. agent/providers/anthropic/tools/collection.py +0 -34
  65. agent/providers/anthropic/tools/computer.py +0 -396
  66. agent/providers/anthropic/tools/edit.py +0 -326
  67. agent/providers/anthropic/tools/manager.py +0 -54
  68. agent/providers/anthropic/tools/run.py +0 -42
  69. agent/providers/anthropic/types.py +0 -16
  70. agent/providers/anthropic/utils.py +0 -381
  71. agent/providers/omni/__init__.py +0 -8
  72. agent/providers/omni/api_handler.py +0 -42
  73. agent/providers/omni/clients/anthropic.py +0 -103
  74. agent/providers/omni/clients/base.py +0 -35
  75. agent/providers/omni/clients/oaicompat.py +0 -195
  76. agent/providers/omni/clients/ollama.py +0 -122
  77. agent/providers/omni/clients/openai.py +0 -155
  78. agent/providers/omni/clients/utils.py +0 -25
  79. agent/providers/omni/image_utils.py +0 -34
  80. agent/providers/omni/loop.py +0 -990
  81. agent/providers/omni/parser.py +0 -307
  82. agent/providers/omni/prompts.py +0 -64
  83. agent/providers/omni/tools/__init__.py +0 -30
  84. agent/providers/omni/tools/base.py +0 -29
  85. agent/providers/omni/tools/bash.py +0 -74
  86. agent/providers/omni/tools/computer.py +0 -179
  87. agent/providers/omni/tools/manager.py +0 -61
  88. agent/providers/omni/utils.py +0 -236
  89. agent/providers/openai/__init__.py +0 -6
  90. agent/providers/openai/api_handler.py +0 -456
  91. agent/providers/openai/loop.py +0 -472
  92. agent/providers/openai/response_handler.py +0 -205
  93. agent/providers/openai/tools/__init__.py +0 -15
  94. agent/providers/openai/tools/base.py +0 -79
  95. agent/providers/openai/tools/computer.py +0 -326
  96. agent/providers/openai/tools/manager.py +0 -106
  97. agent/providers/openai/types.py +0 -36
  98. agent/providers/openai/utils.py +0 -98
  99. agent/providers/uitars/__init__.py +0 -1
  100. agent/providers/uitars/clients/base.py +0 -35
  101. agent/providers/uitars/clients/mlxvlm.py +0 -263
  102. agent/providers/uitars/clients/oaicompat.py +0 -214
  103. agent/providers/uitars/loop.py +0 -660
  104. agent/providers/uitars/prompts.py +0 -63
  105. agent/providers/uitars/tools/__init__.py +0 -1
  106. agent/providers/uitars/tools/computer.py +0 -283
  107. agent/providers/uitars/tools/manager.py +0 -60
  108. agent/providers/uitars/utils.py +0 -264
  109. cua_agent-0.3.2.dist-info/METADATA +0 -295
  110. cua_agent-0.3.2.dist-info/RECORD +0 -87
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +0 -0
  112. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
agent/agent.py ADDED
@@ -0,0 +1,594 @@
1
+ """
2
+ ComputerAgent - Main agent class that selects and runs agent loops
3
+ """
4
+
5
+ import asyncio
6
+ from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Callable, Set
7
+
8
+ from litellm.responses.utils import Usage
9
+
10
+ from .types import Messages, Computer
11
+ from .decorators import find_agent_loop
12
+ from .computer_handler import OpenAIComputerHandler, acknowledge_safety_check_callback, check_blocklisted_url
13
+ import json
14
+ import litellm
15
+ import litellm.utils
16
+ import inspect
17
+ from .adapters import HuggingFaceLocalAdapter
18
+ from .callbacks import (
19
+ ImageRetentionCallback,
20
+ LoggingCallback,
21
+ TrajectorySaverCallback,
22
+ BudgetManagerCallback,
23
+ TelemetryCallback,
24
+ )
25
+
26
+ def get_json(obj: Any, max_depth: int = 10) -> Any:
27
+ def custom_serializer(o: Any, depth: int = 0, seen: Set[int] = None) -> Any:
28
+ if seen is None:
29
+ seen = set()
30
+
31
+ # Use model_dump() if available
32
+ if hasattr(o, 'model_dump'):
33
+ return o.model_dump()
34
+
35
+ # Check depth limit
36
+ if depth > max_depth:
37
+ return f"<max_depth_exceeded:{max_depth}>"
38
+
39
+ # Check for circular references using object id
40
+ obj_id = id(o)
41
+ if obj_id in seen:
42
+ return f"<circular_reference:{type(o).__name__}>"
43
+
44
+ # Handle Computer objects
45
+ if hasattr(o, '__class__') and 'computer' in getattr(o, '__class__').__name__.lower():
46
+ return f"<computer:{o.__class__.__name__}>"
47
+
48
+ # Handle objects with __dict__
49
+ if hasattr(o, '__dict__'):
50
+ seen.add(obj_id)
51
+ try:
52
+ result = {}
53
+ for k, v in o.__dict__.items():
54
+ if v is not None:
55
+ # Recursively serialize with updated depth and seen set
56
+ serialized_value = custom_serializer(v, depth + 1, seen.copy())
57
+ result[k] = serialized_value
58
+ return result
59
+ finally:
60
+ seen.discard(obj_id)
61
+
62
+ # Handle common types that might contain nested objects
63
+ elif isinstance(o, dict):
64
+ seen.add(obj_id)
65
+ try:
66
+ return {
67
+ k: custom_serializer(v, depth + 1, seen.copy())
68
+ for k, v in o.items()
69
+ if v is not None
70
+ }
71
+ finally:
72
+ seen.discard(obj_id)
73
+
74
+ elif isinstance(o, (list, tuple, set)):
75
+ seen.add(obj_id)
76
+ try:
77
+ return [
78
+ custom_serializer(item, depth + 1, seen.copy())
79
+ for item in o
80
+ if item is not None
81
+ ]
82
+ finally:
83
+ seen.discard(obj_id)
84
+
85
+ # For basic types that json.dumps can handle
86
+ elif isinstance(o, (str, int, float, bool)) or o is None:
87
+ return o
88
+
89
+ # Fallback to string representation
90
+ else:
91
+ return str(o)
92
+
93
+ def remove_nones(obj: Any) -> Any:
94
+ if isinstance(obj, dict):
95
+ return {k: remove_nones(v) for k, v in obj.items() if v is not None}
96
+ elif isinstance(obj, list):
97
+ return [remove_nones(item) for item in obj if item is not None]
98
+ return obj
99
+
100
+ # Serialize with circular reference and depth protection
101
+ serialized = custom_serializer(obj)
102
+
103
+ # Convert to JSON string and back to ensure JSON compatibility
104
+ json_str = json.dumps(serialized)
105
+ parsed = json.loads(json_str)
106
+
107
+ # Final cleanup of any remaining None values
108
+ return remove_nones(parsed)
109
+
110
+ def sanitize_message(msg: Any) -> Any:
111
+ """Return a copy of the message with image_url omitted for computer_call_output messages."""
112
+ if msg.get("type") == "computer_call_output":
113
+ output = msg.get("output", {})
114
+ if isinstance(output, dict):
115
+ sanitized = msg.copy()
116
+ sanitized["output"] = {**output, "image_url": "[omitted]"}
117
+ return sanitized
118
+ return msg
119
+
120
+ class ComputerAgent:
121
+ """
122
+ Main agent class that automatically selects the appropriate agent loop
123
+ based on the model and executes tool calls.
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ model: str,
129
+ tools: Optional[List[Any]] = None,
130
+ custom_loop: Optional[Callable] = None,
131
+ only_n_most_recent_images: Optional[int] = None,
132
+ callbacks: Optional[List[Any]] = None,
133
+ verbosity: Optional[int] = None,
134
+ trajectory_dir: Optional[str] = None,
135
+ max_retries: Optional[int] = 3,
136
+ screenshot_delay: Optional[float | int] = 0.5,
137
+ use_prompt_caching: Optional[bool] = False,
138
+ max_trajectory_budget: Optional[float | dict] = None,
139
+ telemetry_enabled: Optional[bool] = True,
140
+ **kwargs
141
+ ):
142
+ """
143
+ Initialize ComputerAgent.
144
+
145
+ Args:
146
+ model: Model name (e.g., "claude-3-5-sonnet-20241022", "computer-use-preview", "omni+vertex_ai/gemini-pro")
147
+ tools: List of tools (computer objects, decorated functions, etc.)
148
+ custom_loop: Custom agent loop function to use instead of auto-selection
149
+ only_n_most_recent_images: If set, only keep the N most recent images in message history. Adds ImageRetentionCallback automatically.
150
+ callbacks: List of AsyncCallbackHandler instances for preprocessing/postprocessing
151
+ verbosity: Logging level (logging.DEBUG, logging.INFO, etc.). If set, adds LoggingCallback automatically
152
+ trajectory_dir: If set, saves trajectory data (screenshots, responses) to this directory. Adds TrajectorySaverCallback automatically.
153
+ max_retries: Maximum number of retries for failed API calls
154
+ screenshot_delay: Delay before screenshots in seconds
155
+ use_prompt_caching: If set, use prompt caching to avoid reprocessing the same prompt. Intended for use with anthropic providers.
156
+ max_trajectory_budget: If set, adds BudgetManagerCallback to track usage costs and stop when budget is exceeded
157
+ telemetry_enabled: If set, adds TelemetryCallback to track anonymized usage data. Enabled by default.
158
+ **kwargs: Additional arguments passed to the agent loop
159
+ """
160
+ self.model = model
161
+ self.tools = tools or []
162
+ self.custom_loop = custom_loop
163
+ self.only_n_most_recent_images = only_n_most_recent_images
164
+ self.callbacks = callbacks or []
165
+ self.verbosity = verbosity
166
+ self.trajectory_dir = trajectory_dir
167
+ self.max_retries = max_retries
168
+ self.screenshot_delay = screenshot_delay
169
+ self.use_prompt_caching = use_prompt_caching
170
+ self.telemetry_enabled = telemetry_enabled
171
+ self.kwargs = kwargs
172
+
173
+ # == Add built-in callbacks ==
174
+
175
+ # Add telemetry callback if telemetry_enabled is set
176
+ if self.telemetry_enabled:
177
+ if isinstance(self.telemetry_enabled, bool):
178
+ self.callbacks.append(TelemetryCallback(self))
179
+ else:
180
+ self.callbacks.append(TelemetryCallback(self, **self.telemetry_enabled))
181
+
182
+ # Add logging callback if verbosity is set
183
+ if self.verbosity is not None:
184
+ self.callbacks.append(LoggingCallback(level=self.verbosity))
185
+
186
+ # Add image retention callback if only_n_most_recent_images is set
187
+ if self.only_n_most_recent_images:
188
+ self.callbacks.append(ImageRetentionCallback(self.only_n_most_recent_images))
189
+
190
+ # Add trajectory saver callback if trajectory_dir is set
191
+ if self.trajectory_dir:
192
+ self.callbacks.append(TrajectorySaverCallback(self.trajectory_dir))
193
+
194
+ # Add budget manager if max_trajectory_budget is set
195
+ if max_trajectory_budget:
196
+ if isinstance(max_trajectory_budget, dict):
197
+ self.callbacks.append(BudgetManagerCallback(**max_trajectory_budget))
198
+ else:
199
+ self.callbacks.append(BudgetManagerCallback(max_trajectory_budget))
200
+
201
+ # == Enable local model providers w/ LiteLLM ==
202
+
203
+ # Register local model providers
204
+ hf_adapter = HuggingFaceLocalAdapter(
205
+ device="auto"
206
+ )
207
+ litellm.custom_provider_map = [
208
+ {"provider": "huggingface-local", "custom_handler": hf_adapter}
209
+ ]
210
+
211
+ # == Initialize computer agent ==
212
+
213
+ # Find the appropriate agent loop
214
+ if custom_loop:
215
+ self.agent_loop = custom_loop
216
+ self.agent_loop_info = None
217
+ else:
218
+ loop_info = find_agent_loop(model)
219
+ if not loop_info:
220
+ raise ValueError(f"No agent loop found for model: {model}")
221
+ self.agent_loop = loop_info.func
222
+ self.agent_loop_info = loop_info
223
+
224
+ self.tool_schemas = []
225
+ self.computer_handler = None
226
+
227
+ async def _initialize_computers(self):
228
+ """Initialize computer objects"""
229
+ if not self.tool_schemas:
230
+ for tool in self.tools:
231
+ if hasattr(tool, '_initialized') and not tool._initialized:
232
+ await tool.run()
233
+
234
+ # Process tools and create tool schemas
235
+ self.tool_schemas = self._process_tools()
236
+
237
+ # Find computer tool and create interface adapter
238
+ computer_handler = None
239
+ for schema in self.tool_schemas:
240
+ if schema["type"] == "computer":
241
+ computer_handler = OpenAIComputerHandler(schema["computer"].interface)
242
+ break
243
+ self.computer_handler = computer_handler
244
+
245
+ def _process_input(self, input: Messages) -> List[Dict[str, Any]]:
246
+ """Process input messages and create schemas for the agent loop"""
247
+ if isinstance(input, str):
248
+ return [{"role": "user", "content": input}]
249
+ return [get_json(msg) for msg in input]
250
+
251
+ def _process_tools(self) -> List[Dict[str, Any]]:
252
+ """Process tools and create schemas for the agent loop"""
253
+ schemas = []
254
+
255
+ for tool in self.tools:
256
+ # Check if it's a computer object (has interface attribute)
257
+ if hasattr(tool, 'interface'):
258
+ # This is a computer tool - will be handled by agent loop
259
+ schemas.append({
260
+ "type": "computer",
261
+ "computer": tool
262
+ })
263
+ elif callable(tool):
264
+ # Use litellm.utils.function_to_dict to extract schema from docstring
265
+ try:
266
+ function_schema = litellm.utils.function_to_dict(tool)
267
+ schemas.append({
268
+ "type": "function",
269
+ "function": function_schema
270
+ })
271
+ except Exception as e:
272
+ print(f"Warning: Could not process tool {tool}: {e}")
273
+ else:
274
+ print(f"Warning: Unknown tool type: {tool}")
275
+
276
+ return schemas
277
+
278
+ def _get_tool(self, name: str) -> Optional[Callable]:
279
+ """Get a tool by name"""
280
+ for tool in self.tools:
281
+ if hasattr(tool, '__name__') and tool.__name__ == name:
282
+ return tool
283
+ elif hasattr(tool, 'func') and tool.func.__name__ == name:
284
+ return tool
285
+ return None
286
+
287
+ # ============================================================================
288
+ # AGENT RUN LOOP LIFECYCLE HOOKS
289
+ # ============================================================================
290
+
291
+ async def _on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
292
+ """Initialize run tracking by calling callbacks."""
293
+ for callback in self.callbacks:
294
+ if hasattr(callback, 'on_run_start'):
295
+ await callback.on_run_start(kwargs, old_items)
296
+
297
+ async def _on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
298
+ """Finalize run tracking by calling callbacks."""
299
+ for callback in self.callbacks:
300
+ if hasattr(callback, 'on_run_end'):
301
+ await callback.on_run_end(kwargs, old_items, new_items)
302
+
303
+ async def _on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
304
+ """Check if run should continue by calling callbacks."""
305
+ for callback in self.callbacks:
306
+ if hasattr(callback, 'on_run_continue'):
307
+ should_continue = await callback.on_run_continue(kwargs, old_items, new_items)
308
+ if not should_continue:
309
+ return False
310
+ return True
311
+
312
+ async def _on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
313
+ """Prepare messages for the LLM call by applying callbacks."""
314
+ result = messages
315
+ for callback in self.callbacks:
316
+ if hasattr(callback, 'on_llm_start'):
317
+ result = await callback.on_llm_start(result)
318
+ return result
319
+
320
+ async def _on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
321
+ """Postprocess messages after the LLM call by applying callbacks."""
322
+ result = messages
323
+ for callback in self.callbacks:
324
+ if hasattr(callback, 'on_llm_end'):
325
+ result = await callback.on_llm_end(result)
326
+ return result
327
+
328
+ async def _on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
329
+ """Called when responses are received."""
330
+ for callback in self.callbacks:
331
+ if hasattr(callback, 'on_responses'):
332
+ await callback.on_responses(get_json(kwargs), get_json(responses))
333
+
334
+ async def _on_computer_call_start(self, item: Dict[str, Any]) -> None:
335
+ """Called when a computer call is about to start."""
336
+ for callback in self.callbacks:
337
+ if hasattr(callback, 'on_computer_call_start'):
338
+ await callback.on_computer_call_start(get_json(item))
339
+
340
+ async def _on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
341
+ """Called when a computer call has completed."""
342
+ for callback in self.callbacks:
343
+ if hasattr(callback, 'on_computer_call_end'):
344
+ await callback.on_computer_call_end(get_json(item), get_json(result))
345
+
346
+ async def _on_function_call_start(self, item: Dict[str, Any]) -> None:
347
+ """Called when a function call is about to start."""
348
+ for callback in self.callbacks:
349
+ if hasattr(callback, 'on_function_call_start'):
350
+ await callback.on_function_call_start(get_json(item))
351
+
352
+ async def _on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
353
+ """Called when a function call has completed."""
354
+ for callback in self.callbacks:
355
+ if hasattr(callback, 'on_function_call_end'):
356
+ await callback.on_function_call_end(get_json(item), get_json(result))
357
+
358
+ async def _on_text(self, item: Dict[str, Any]) -> None:
359
+ """Called when a text message is encountered."""
360
+ for callback in self.callbacks:
361
+ if hasattr(callback, 'on_text'):
362
+ await callback.on_text(get_json(item))
363
+
364
+ async def _on_api_start(self, kwargs: Dict[str, Any]) -> None:
365
+ """Called when an LLM API call is about to start."""
366
+ for callback in self.callbacks:
367
+ if hasattr(callback, 'on_api_start'):
368
+ await callback.on_api_start(get_json(kwargs))
369
+
370
+ async def _on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
371
+ """Called when an LLM API call has completed."""
372
+ for callback in self.callbacks:
373
+ if hasattr(callback, 'on_api_end'):
374
+ await callback.on_api_end(get_json(kwargs), get_json(result))
375
+
376
+ async def _on_usage(self, usage: Dict[str, Any]) -> None:
377
+ """Called when usage information is received."""
378
+ for callback in self.callbacks:
379
+ if hasattr(callback, 'on_usage'):
380
+ await callback.on_usage(get_json(usage))
381
+
382
+ async def _on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
383
+ """Called when a screenshot is taken."""
384
+ for callback in self.callbacks:
385
+ if hasattr(callback, 'on_screenshot'):
386
+ await callback.on_screenshot(screenshot, name)
387
+
388
+ # ============================================================================
389
+ # AGENT OUTPUT PROCESSING
390
+ # ============================================================================
391
+
392
+ async def _handle_item(self, item: Any, computer: Optional[Computer] = None) -> List[Dict[str, Any]]:
393
+ """Handle each item; may cause a computer action + screenshot."""
394
+
395
+ item_type = item.get("type", None)
396
+
397
+ if item_type == "message":
398
+ await self._on_text(item)
399
+ # # Print messages
400
+ # if item.get("content"):
401
+ # for content_item in item.get("content"):
402
+ # if content_item.get("text"):
403
+ # print(content_item.get("text"))
404
+ return []
405
+
406
+ if item_type == "computer_call":
407
+ await self._on_computer_call_start(item)
408
+ if not computer:
409
+ raise ValueError("Computer handler is required for computer calls")
410
+
411
+ # Perform computer actions
412
+ action = item.get("action")
413
+ action_type = action.get("type")
414
+
415
+ # Extract action arguments (all fields except 'type')
416
+ action_args = {k: v for k, v in action.items() if k != "type"}
417
+
418
+ # print(f"{action_type}({action_args})")
419
+
420
+ # Execute the computer action
421
+ computer_method = getattr(computer, action_type, None)
422
+ if computer_method:
423
+ await computer_method(**action_args)
424
+ else:
425
+ print(f"Unknown computer action: {action_type}")
426
+ return []
427
+
428
+ # Take screenshot after action
429
+ if self.screenshot_delay and self.screenshot_delay > 0:
430
+ await asyncio.sleep(self.screenshot_delay)
431
+ screenshot_base64 = await computer.screenshot()
432
+ await self._on_screenshot(screenshot_base64, "screenshot_after")
433
+
434
+ # Handle safety checks
435
+ pending_checks = item.get("pending_safety_checks", [])
436
+ acknowledged_checks = []
437
+ for check in pending_checks:
438
+ check_message = check.get("message", str(check))
439
+ if acknowledge_safety_check_callback(check_message):
440
+ acknowledged_checks.append(check)
441
+ else:
442
+ raise ValueError(f"Safety check failed: {check_message}")
443
+
444
+ # Create call output
445
+ call_output = {
446
+ "type": "computer_call_output",
447
+ "call_id": item.get("call_id"),
448
+ "acknowledged_safety_checks": acknowledged_checks,
449
+ "output": {
450
+ "type": "input_image",
451
+ "image_url": f"data:image/png;base64,{screenshot_base64}",
452
+ },
453
+ }
454
+
455
+ # Additional URL safety checks for browser environments
456
+ if await computer.get_environment() == "browser":
457
+ current_url = await computer.get_current_url()
458
+ call_output["output"]["current_url"] = current_url
459
+ check_blocklisted_url(current_url)
460
+
461
+ result = [call_output]
462
+ await self._on_computer_call_end(item, result)
463
+ return result
464
+
465
+ if item_type == "function_call":
466
+ await self._on_function_call_start(item)
467
+ # Perform function call
468
+ function = self._get_tool(item.get("name"))
469
+ if not function:
470
+ raise ValueError(f"Function {item.get("name")} not found")
471
+
472
+ args = json.loads(item.get("arguments"))
473
+
474
+ # Execute function - use asyncio.to_thread for non-async functions
475
+ if inspect.iscoroutinefunction(function):
476
+ result = await function(**args)
477
+ else:
478
+ result = await asyncio.to_thread(function, **args)
479
+
480
+ # Create function call output
481
+ call_output = {
482
+ "type": "function_call_output",
483
+ "call_id": item.get("call_id"),
484
+ "output": str(result),
485
+ }
486
+
487
+ result = [call_output]
488
+ await self._on_function_call_end(item, result)
489
+ return result
490
+
491
+ return []
492
+
493
+ # ============================================================================
494
+ # MAIN AGENT LOOP
495
+ # ============================================================================
496
+
497
+ async def run(
498
+ self,
499
+ messages: Messages,
500
+ stream: bool = False,
501
+ **kwargs
502
+ ) -> AsyncGenerator[Dict[str, Any], None]:
503
+ """
504
+ Run the agent with the given messages using Computer protocol handler pattern.
505
+
506
+ Args:
507
+ messages: List of message dictionaries
508
+ stream: Whether to stream the response
509
+ **kwargs: Additional arguments
510
+
511
+ Returns:
512
+ AsyncGenerator that yields response chunks
513
+ """
514
+
515
+ await self._initialize_computers()
516
+
517
+ # Merge kwargs
518
+ merged_kwargs = {**self.kwargs, **kwargs}
519
+
520
+ old_items = self._process_input(messages)
521
+ new_items = []
522
+
523
+ # Initialize run tracking
524
+ run_kwargs = {
525
+ "messages": messages,
526
+ "stream": stream,
527
+ "model": self.model,
528
+ "agent_loop": self.agent_loop.__name__,
529
+ **merged_kwargs
530
+ }
531
+ await self._on_run_start(run_kwargs, old_items)
532
+
533
+ while new_items[-1].get("role") != "assistant" if new_items else True:
534
+ # Lifecycle hook: Check if we should continue based on callbacks (e.g., budget manager)
535
+ should_continue = await self._on_run_continue(run_kwargs, old_items, new_items)
536
+ if not should_continue:
537
+ break
538
+
539
+ # Lifecycle hook: Prepare messages for the LLM call
540
+ # Use cases:
541
+ # - PII anonymization
542
+ # - Image retention policy
543
+ combined_messages = old_items + new_items
544
+ preprocessed_messages = await self._on_llm_start(combined_messages)
545
+
546
+ loop_kwargs = {
547
+ "messages": preprocessed_messages,
548
+ "model": self.model,
549
+ "tools": self.tool_schemas,
550
+ "stream": False,
551
+ "computer_handler": self.computer_handler,
552
+ "max_retries": self.max_retries,
553
+ "use_prompt_caching": self.use_prompt_caching,
554
+ **merged_kwargs
555
+ }
556
+
557
+ # Run agent loop iteration
558
+ result = await self.agent_loop(
559
+ **loop_kwargs,
560
+ _on_api_start=self._on_api_start,
561
+ _on_api_end=self._on_api_end,
562
+ _on_usage=self._on_usage,
563
+ _on_screenshot=self._on_screenshot,
564
+ )
565
+ result = get_json(result)
566
+
567
+ # Lifecycle hook: Postprocess messages after the LLM call
568
+ # Use cases:
569
+ # - PII deanonymization (if you want tool calls to see PII)
570
+ result["output"] = await self._on_llm_end(result.get("output", []))
571
+ await self._on_responses(loop_kwargs, result)
572
+
573
+ # Yield agent response
574
+ yield result
575
+
576
+ # Add agent response to new_items
577
+ new_items += result.get("output")
578
+
579
+ # Handle computer actions
580
+ for item in result.get("output"):
581
+ partial_items = await self._handle_item(item, self.computer_handler)
582
+ new_items += partial_items
583
+
584
+ # Yield partial response
585
+ yield {
586
+ "output": partial_items,
587
+ "usage": Usage(
588
+ prompt_tokens=0,
589
+ completion_tokens=0,
590
+ total_tokens=0,
591
+ )
592
+ }
593
+
594
+ await self._on_run_end(loop_kwargs, old_items, new_items)
@@ -0,0 +1,19 @@
1
+ """
2
+ Callback system for ComputerAgent preprocessing and postprocessing hooks.
3
+ """
4
+
5
+ from .base import AsyncCallbackHandler
6
+ from .image_retention import ImageRetentionCallback
7
+ from .logging import LoggingCallback
8
+ from .trajectory_saver import TrajectorySaverCallback
9
+ from .budget_manager import BudgetManagerCallback
10
+ from .telemetry import TelemetryCallback
11
+
12
+ __all__ = [
13
+ "AsyncCallbackHandler",
14
+ "ImageRetentionCallback",
15
+ "LoggingCallback",
16
+ "TrajectorySaverCallback",
17
+ "BudgetManagerCallback",
18
+ "TelemetryCallback",
19
+ ]