cua-agent 0.3.1__tar.gz → 0.4.0b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (115) hide show
  1. cua_agent-0.4.0b1/PKG-INFO +424 -0
  2. cua_agent-0.4.0b1/README.md +381 -0
  3. cua_agent-0.4.0b1/agent/__init__.py +19 -0
  4. cua_agent-0.4.0b1/agent/__main__.py +21 -0
  5. cua_agent-0.4.0b1/agent/adapters/__init__.py +9 -0
  6. cua_agent-0.4.0b1/agent/adapters/huggingfacelocal_adapter.py +216 -0
  7. cua_agent-0.4.0b1/agent/agent.py +577 -0
  8. cua_agent-0.4.0b1/agent/callbacks/__init__.py +17 -0
  9. cua_agent-0.4.0b1/agent/callbacks/base.py +153 -0
  10. cua_agent-0.4.0b1/agent/callbacks/budget_manager.py +44 -0
  11. cua_agent-0.4.0b1/agent/callbacks/image_retention.py +139 -0
  12. cua_agent-0.4.0b1/agent/callbacks/logging.py +247 -0
  13. cua_agent-0.4.0b1/agent/callbacks/pii_anonymization.py +259 -0
  14. cua_agent-0.4.0b1/agent/callbacks/trajectory_saver.py +305 -0
  15. cua_agent-0.4.0b1/agent/cli.py +290 -0
  16. cua_agent-0.4.0b1/agent/computer_handler.py +107 -0
  17. cua_agent-0.4.0b1/agent/decorators.py +90 -0
  18. cua_agent-0.4.0b1/agent/loops/__init__.py +11 -0
  19. cua_agent-0.4.0b1/agent/loops/anthropic.py +728 -0
  20. cua_agent-0.4.0b1/agent/loops/omniparser.py +339 -0
  21. cua_agent-0.4.0b1/agent/loops/openai.py +95 -0
  22. cua_agent-0.4.0b1/agent/loops/uitars.py +688 -0
  23. cua_agent-0.4.0b1/agent/responses.py +207 -0
  24. cua_agent-0.4.0b1/agent/types.py +79 -0
  25. cua_agent-0.4.0b1/agent/ui/__init__.py +7 -0
  26. cua_agent-0.4.0b1/agent/ui/gradio/__init__.py +8 -0
  27. cua_agent-0.4.0b1/agent/ui/gradio/app.py +248 -0
  28. cua_agent-0.4.0b1/agent/ui/gradio/ui_components.py +703 -0
  29. cua_agent-0.4.0b1/pyproject.toml +74 -0
  30. cua_agent-0.3.1/PKG-INFO +0 -295
  31. cua_agent-0.3.1/README.md +0 -222
  32. cua_agent-0.3.1/agent/__init__.py +0 -55
  33. cua_agent-0.3.1/agent/core/__init__.py +0 -27
  34. cua_agent-0.3.1/agent/core/agent.py +0 -210
  35. cua_agent-0.3.1/agent/core/base.py +0 -217
  36. cua_agent-0.3.1/agent/core/callbacks.py +0 -200
  37. cua_agent-0.3.1/agent/core/experiment.py +0 -249
  38. cua_agent-0.3.1/agent/core/factory.py +0 -122
  39. cua_agent-0.3.1/agent/core/messages.py +0 -332
  40. cua_agent-0.3.1/agent/core/provider_config.py +0 -21
  41. cua_agent-0.3.1/agent/core/telemetry.py +0 -142
  42. cua_agent-0.3.1/agent/core/tools/__init__.py +0 -21
  43. cua_agent-0.3.1/agent/core/tools/base.py +0 -74
  44. cua_agent-0.3.1/agent/core/tools/bash.py +0 -52
  45. cua_agent-0.3.1/agent/core/tools/collection.py +0 -46
  46. cua_agent-0.3.1/agent/core/tools/computer.py +0 -113
  47. cua_agent-0.3.1/agent/core/tools/edit.py +0 -67
  48. cua_agent-0.3.1/agent/core/tools/manager.py +0 -56
  49. cua_agent-0.3.1/agent/core/tools.py +0 -32
  50. cua_agent-0.3.1/agent/core/types.py +0 -88
  51. cua_agent-0.3.1/agent/core/visualization.py +0 -197
  52. cua_agent-0.3.1/agent/providers/__init__.py +0 -4
  53. cua_agent-0.3.1/agent/providers/anthropic/__init__.py +0 -6
  54. cua_agent-0.3.1/agent/providers/anthropic/api/client.py +0 -360
  55. cua_agent-0.3.1/agent/providers/anthropic/api/logging.py +0 -150
  56. cua_agent-0.3.1/agent/providers/anthropic/api_handler.py +0 -140
  57. cua_agent-0.3.1/agent/providers/anthropic/callbacks/__init__.py +0 -5
  58. cua_agent-0.3.1/agent/providers/anthropic/callbacks/manager.py +0 -65
  59. cua_agent-0.3.1/agent/providers/anthropic/loop.py +0 -568
  60. cua_agent-0.3.1/agent/providers/anthropic/prompts.py +0 -23
  61. cua_agent-0.3.1/agent/providers/anthropic/response_handler.py +0 -226
  62. cua_agent-0.3.1/agent/providers/anthropic/tools/__init__.py +0 -33
  63. cua_agent-0.3.1/agent/providers/anthropic/tools/base.py +0 -88
  64. cua_agent-0.3.1/agent/providers/anthropic/tools/bash.py +0 -66
  65. cua_agent-0.3.1/agent/providers/anthropic/tools/collection.py +0 -34
  66. cua_agent-0.3.1/agent/providers/anthropic/tools/computer.py +0 -396
  67. cua_agent-0.3.1/agent/providers/anthropic/tools/edit.py +0 -326
  68. cua_agent-0.3.1/agent/providers/anthropic/tools/manager.py +0 -54
  69. cua_agent-0.3.1/agent/providers/anthropic/tools/run.py +0 -42
  70. cua_agent-0.3.1/agent/providers/anthropic/types.py +0 -16
  71. cua_agent-0.3.1/agent/providers/anthropic/utils.py +0 -367
  72. cua_agent-0.3.1/agent/providers/omni/__init__.py +0 -8
  73. cua_agent-0.3.1/agent/providers/omni/api_handler.py +0 -42
  74. cua_agent-0.3.1/agent/providers/omni/clients/anthropic.py +0 -103
  75. cua_agent-0.3.1/agent/providers/omni/clients/base.py +0 -35
  76. cua_agent-0.3.1/agent/providers/omni/clients/oaicompat.py +0 -195
  77. cua_agent-0.3.1/agent/providers/omni/clients/ollama.py +0 -122
  78. cua_agent-0.3.1/agent/providers/omni/clients/openai.py +0 -155
  79. cua_agent-0.3.1/agent/providers/omni/clients/utils.py +0 -25
  80. cua_agent-0.3.1/agent/providers/omni/image_utils.py +0 -34
  81. cua_agent-0.3.1/agent/providers/omni/loop.py +0 -990
  82. cua_agent-0.3.1/agent/providers/omni/parser.py +0 -307
  83. cua_agent-0.3.1/agent/providers/omni/prompts.py +0 -64
  84. cua_agent-0.3.1/agent/providers/omni/tools/__init__.py +0 -30
  85. cua_agent-0.3.1/agent/providers/omni/tools/base.py +0 -29
  86. cua_agent-0.3.1/agent/providers/omni/tools/bash.py +0 -74
  87. cua_agent-0.3.1/agent/providers/omni/tools/computer.py +0 -179
  88. cua_agent-0.3.1/agent/providers/omni/tools/manager.py +0 -61
  89. cua_agent-0.3.1/agent/providers/omni/utils.py +0 -236
  90. cua_agent-0.3.1/agent/providers/openai/__init__.py +0 -6
  91. cua_agent-0.3.1/agent/providers/openai/api_handler.py +0 -456
  92. cua_agent-0.3.1/agent/providers/openai/loop.py +0 -472
  93. cua_agent-0.3.1/agent/providers/openai/response_handler.py +0 -205
  94. cua_agent-0.3.1/agent/providers/openai/tools/__init__.py +0 -15
  95. cua_agent-0.3.1/agent/providers/openai/tools/base.py +0 -79
  96. cua_agent-0.3.1/agent/providers/openai/tools/computer.py +0 -326
  97. cua_agent-0.3.1/agent/providers/openai/tools/manager.py +0 -106
  98. cua_agent-0.3.1/agent/providers/openai/types.py +0 -36
  99. cua_agent-0.3.1/agent/providers/openai/utils.py +0 -98
  100. cua_agent-0.3.1/agent/providers/uitars/__init__.py +0 -1
  101. cua_agent-0.3.1/agent/providers/uitars/clients/base.py +0 -35
  102. cua_agent-0.3.1/agent/providers/uitars/clients/mlxvlm.py +0 -263
  103. cua_agent-0.3.1/agent/providers/uitars/clients/oaicompat.py +0 -214
  104. cua_agent-0.3.1/agent/providers/uitars/loop.py +0 -660
  105. cua_agent-0.3.1/agent/providers/uitars/prompts.py +0 -63
  106. cua_agent-0.3.1/agent/providers/uitars/tools/__init__.py +0 -1
  107. cua_agent-0.3.1/agent/providers/uitars/tools/computer.py +0 -283
  108. cua_agent-0.3.1/agent/providers/uitars/tools/manager.py +0 -60
  109. cua_agent-0.3.1/agent/providers/uitars/utils.py +0 -264
  110. cua_agent-0.3.1/agent/telemetry.py +0 -21
  111. cua_agent-0.3.1/agent/ui/__init__.py +0 -1
  112. cua_agent-0.3.1/agent/ui/__main__.py +0 -15
  113. cua_agent-0.3.1/agent/ui/gradio/__init__.py +0 -21
  114. cua_agent-0.3.1/agent/ui/gradio/app.py +0 -1467
  115. cua_agent-0.3.1/pyproject.toml +0 -151
@@ -0,0 +1,424 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-agent
3
+ Version: 0.4.0b1
4
+ Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: httpx>=0.27.0
8
+ Requires-Dist: aiohttp>=3.9.3
9
+ Requires-Dist: asyncio
10
+ Requires-Dist: anyio>=4.4.1
11
+ Requires-Dist: typing-extensions>=4.12.2
12
+ Requires-Dist: pydantic>=2.6.4
13
+ Requires-Dist: rich>=13.7.1
14
+ Requires-Dist: python-dotenv>=1.0.1
15
+ Requires-Dist: cua-computer<0.5.0,>=0.3.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.0
17
+ Requires-Dist: certifi>=2024.2.2
18
+ Requires-Dist: litellm>=1.74.8
19
+ Provides-Extra: openai
20
+ Provides-Extra: anthropic
21
+ Provides-Extra: omni
22
+ Requires-Dist: ultralytics>=8.0.0; extra == "omni"
23
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "omni"
24
+ Provides-Extra: uitars
25
+ Provides-Extra: uitars-mlx
26
+ Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "uitars-mlx"
27
+ Provides-Extra: uitars-hf
28
+ Requires-Dist: transformers>=4.54.0; extra == "uitars-hf"
29
+ Provides-Extra: ui
30
+ Requires-Dist: gradio>=5.23.3; extra == "ui"
31
+ Requires-Dist: python-dotenv>=1.0.1; extra == "ui"
32
+ Provides-Extra: cli
33
+ Requires-Dist: yaspin>=3.1.0; extra == "cli"
34
+ Provides-Extra: all
35
+ Requires-Dist: ultralytics>=8.0.0; extra == "all"
36
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
37
+ Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "all"
38
+ Requires-Dist: transformers>=4.54.0; extra == "all"
39
+ Requires-Dist: gradio>=5.23.3; extra == "all"
40
+ Requires-Dist: python-dotenv>=1.0.1; extra == "all"
41
+ Requires-Dist: yaspin>=3.1.0; extra == "all"
42
+ Description-Content-Type: text/markdown
43
+
44
+ <div align="center">
45
+ <h1>
46
+ <div class="image-wrapper" style="display: inline-block;">
47
+ <picture>
48
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../../img/logo_white.png" style="display: block; margin: auto;">
49
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../../img/logo_black.png" style="display: block; margin: auto;">
50
+ <img alt="Shows my svg">
51
+ </picture>
52
+ </div>
53
+
54
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
55
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
56
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
57
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
58
+ </h1>
59
+ </div>
60
+
61
+ **cua-agent** is a general Computer-Use framework with liteLLM integration for running agentic workflows on macOS, Windows, and Linux sandboxes. It provides a unified interface for computer-use agents across multiple LLM providers with advanced callback system for extensibility.
62
+
63
+ ## Features
64
+
65
+ - **Safe Computer-Use/Tool-Use**: Using Computer SDK for sandboxed desktops
66
+ - **Multi-Agent Support**: Anthropic Claude, OpenAI computer-use-preview, UI-TARS, Omniparser + any LLM
67
+ - **Multi-API Support**: Take advantage of liteLLM supporting 100+ LLMs / model APIs, including local models (`huggingface-local/`, `ollama_chat/`, `mlx/`)
68
+ - **Cross-Platform**: Works on Windows, macOS, and Linux with cloud and local computer instances
69
+ - **Extensible Callbacks**: Built-in support for image retention, cache control, PII anonymization, budget limits, and trajectory tracking
70
+
71
+ ## Install
72
+
73
+ ```bash
74
+ pip install "cua-agent[all]"
75
+
76
+ # or install specific providers
77
+ pip install "cua-agent[openai]" # OpenAI computer-use-preview support
78
+ pip install "cua-agent[anthropic]" # Anthropic Claude support
79
+ pip install "cua-agent[omni]" # Omniparser + any LLM support
80
+ pip install "cua-agent[uitars]" # UI-TARS
81
+ pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support
82
+ pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support
83
+ pip install "cua-agent[ui]" # Gradio UI support
84
+ ```
85
+
86
+ ## Quick Start
87
+
88
+ ```python
89
+ import asyncio
90
+ import os
91
+ from agent import ComputerAgent
92
+ from computer import Computer
93
+
94
+ async def main():
95
+ # Set up computer instance
96
+ async with Computer(
97
+ os_type="linux",
98
+ provider_type="cloud",
99
+ name=os.getenv("CUA_CONTAINER_NAME"),
100
+ api_key=os.getenv("CUA_API_KEY")
101
+ ) as computer:
102
+
103
+ # Create agent
104
+ agent = ComputerAgent(
105
+ model="anthropic/claude-3-5-sonnet-20241022",
106
+ tools=[computer],
107
+ only_n_most_recent_images=3,
108
+ trajectory_dir="trajectories",
109
+ max_trajectory_budget=5.0 # $5 budget limit
110
+ )
111
+
112
+ # Run agent
113
+ messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]
114
+
115
+ async for result in agent.run(messages):
116
+ for item in result["output"]:
117
+ if item["type"] == "message":
118
+ print(item["content"][0]["text"])
119
+
120
+ if __name__ == "__main__":
121
+ asyncio.run(main())
122
+ ```
123
+
124
+ ## Supported Models
125
+
126
+ ### Anthropic Claude (Computer Use API)
127
+ ```python
128
+ model="anthropic/claude-3-5-sonnet-20241022"
129
+ model="anthropic/claude-3-5-sonnet-20240620"
130
+ model="anthropic/claude-opus-4-20250514"
131
+ model="anthropic/claude-sonnet-4-20250514"
132
+ ```
133
+
134
+ ### OpenAI Computer Use Preview
135
+ ```python
136
+ model="openai/computer-use-preview"
137
+ ```
138
+
139
+ ### UI-TARS (Local or Huggingface Inference)
140
+ ```python
141
+ model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"
142
+ model="ollama_chat/0000/ui-tars-1.5-7b"
143
+ ```
144
+
145
+ ### Omniparser + Any LLM
146
+ ```python
147
+ model="omniparser+ollama_chat/mistral-small3.2"
148
+ model="omniparser+vertex_ai/gemini-pro"
149
+ model="omniparser+anthropic/claude-3-5-sonnet-20241022"
150
+ model="omniparser+openai/gpt-4o"
151
+ ```
152
+
153
+ ## Custom Tools
154
+
155
+ Define custom tools using decorated functions:
156
+
157
+ ```python
158
+ from computer.helpers import sandboxed
159
+
160
+ @sandboxed()
161
+ def read_file(location: str) -> str:
162
+ """Read contents of a file
163
+
164
+ Parameters
165
+ ----------
166
+ location : str
167
+ Path to the file to read
168
+
169
+ Returns
170
+ -------
171
+ str
172
+ Contents of the file or error message
173
+ """
174
+ try:
175
+ with open(location, 'r') as f:
176
+ return f.read()
177
+ except Exception as e:
178
+ return f"Error reading file: {str(e)}"
179
+
180
+ def calculate(a: int, b: int) -> int:
181
+ """Calculate the sum of two integers"""
182
+ return a + b
183
+
184
+ # Use with agent
185
+ agent = ComputerAgent(
186
+ model="anthropic/claude-3-5-sonnet-20241022",
187
+ tools=[computer, read_file, calculate]
188
+ )
189
+ ```
190
+
191
+ ## Callbacks System
192
+
193
+ agent provides a comprehensive callback system for extending functionality:
194
+
195
+ ### Built-in Callbacks
196
+
197
+ ```python
198
+ from agent.callbacks import (
199
+ ImageRetentionCallback,
200
+ TrajectorySaverCallback,
201
+ BudgetManagerCallback,
202
+ LoggingCallback
203
+ )
204
+
205
+ agent = ComputerAgent(
206
+ model="anthropic/claude-3-5-sonnet-20241022",
207
+ tools=[computer],
208
+ callbacks=[
209
+ ImageRetentionCallback(only_n_most_recent_images=3),
210
+ TrajectorySaverCallback(trajectory_dir="trajectories"),
211
+ BudgetManagerCallback(max_budget=10.0, raise_error=True),
212
+ LoggingCallback(level=logging.INFO)
213
+ ]
214
+ )
215
+ ```
216
+
217
+ ### Custom Callbacks
218
+
219
+ ```python
220
+ from agent.callbacks.base import AsyncCallbackHandler
221
+
222
+ class CustomCallback(AsyncCallbackHandler):
223
+ async def on_llm_start(self, messages):
224
+ """Preprocess messages before LLM call"""
225
+ # Add custom preprocessing logic
226
+ return messages
227
+
228
+ async def on_llm_end(self, messages):
229
+ """Postprocess messages after LLM call"""
230
+ # Add custom postprocessing logic
231
+ return messages
232
+
233
+ async def on_usage(self, usage):
234
+ """Track usage information"""
235
+ print(f"Tokens used: {usage.total_tokens}")
236
+ ```
237
+
238
+ ## Budget Management
239
+
240
+ Control costs with built-in budget management:
241
+
242
+ ```python
243
+ # Simple budget limit
244
+ agent = ComputerAgent(
245
+ model="anthropic/claude-3-5-sonnet-20241022",
246
+ max_trajectory_budget=5.0 # $5 limit
247
+ )
248
+
249
+ # Advanced budget configuration
250
+ agent = ComputerAgent(
251
+ model="anthropic/claude-3-5-sonnet-20241022",
252
+ max_trajectory_budget={
253
+ "max_budget": 10.0,
254
+ "raise_error": True, # Raise error when exceeded
255
+ "reset_after_each_run": False # Persistent across runs
256
+ }
257
+ )
258
+ ```
259
+
260
+ ## Trajectory Management
261
+
262
+ Save and replay agent conversations:
263
+
264
+ ```python
265
+ agent = ComputerAgent(
266
+ model="anthropic/claude-3-5-sonnet-20241022",
267
+ trajectory_dir="trajectories", # Auto-save trajectories
268
+ tools=[computer]
269
+ )
270
+
271
+ # Trajectories are saved with:
272
+ # - Complete conversation history
273
+ # - Usage statistics and costs
274
+ # - Timestamps and metadata
275
+ # - Screenshots and computer actions
276
+ ```
277
+
278
+ ## Configuration Options
279
+
280
+ ### ComputerAgent Parameters
281
+
282
+ - `model`: Model identifier (required)
283
+ - `tools`: List of computer objects and decorated functions
284
+ - `callbacks`: List of callback handlers for extensibility
285
+ - `only_n_most_recent_images`: Limit recent images to prevent context overflow
286
+ - `verbosity`: Logging level (logging.INFO, logging.DEBUG, etc.)
287
+ - `trajectory_dir`: Directory to save conversation trajectories
288
+ - `max_retries`: Maximum API call retries (default: 3)
289
+ - `screenshot_delay`: Delay between actions and screenshots (default: 0.5s)
290
+ - `use_prompt_caching`: Enable prompt caching for supported models
291
+ - `max_trajectory_budget`: Budget limit configuration
292
+
293
+ ### Environment Variables
294
+
295
+ ```bash
296
+ # Computer instance (cloud)
297
+ export CUA_CONTAINER_NAME="your-container-name"
298
+ export CUA_API_KEY="your-cua-api-key"
299
+
300
+ # LLM API keys
301
+ export ANTHROPIC_API_KEY="your-anthropic-key"
302
+ export OPENAI_API_KEY="your-openai-key"
303
+ ```
304
+
305
+ ## Advanced Usage
306
+
307
+ ### Streaming Responses
308
+
309
+ ```python
310
+ async for result in agent.run(messages, stream=True):
311
+ # Process streaming chunks
312
+ for item in result["output"]:
313
+ if item["type"] == "message":
314
+ print(item["content"][0]["text"], end="", flush=True)
315
+ elif item["type"] == "computer_call":
316
+ action = item["action"]
317
+ print(f"\n[Action: {action['type']}]")
318
+ ```
319
+
320
+ ### Interactive Chat Loop
321
+
322
+ ```python
323
+ history = []
324
+ while True:
325
+ user_input = input("> ")
326
+ if user_input.lower() in ['quit', 'exit']:
327
+ break
328
+
329
+ history.append({"role": "user", "content": user_input})
330
+
331
+ async for result in agent.run(history):
332
+ history += result["output"]
333
+
334
+ # Display assistant responses
335
+ for item in result["output"]:
336
+ if item["type"] == "message":
337
+ print(item["content"][0]["text"])
338
+ ```
339
+
340
+ ### Error Handling
341
+
342
+ ```python
343
+ try:
344
+ async for result in agent.run(messages):
345
+ # Process results
346
+ pass
347
+ except BudgetExceededException:
348
+ print("Budget limit exceeded")
349
+ except Exception as e:
350
+ print(f"Agent error: {e}")
351
+ ```
352
+
353
+ ## API Reference
354
+
355
+ ### ComputerAgent.run()
356
+
357
+ ```python
358
+ async def run(
359
+ self,
360
+ messages: Messages,
361
+ stream: bool = False,
362
+ **kwargs
363
+ ) -> AsyncGenerator[Dict[str, Any], None]:
364
+ """
365
+ Run the agent with the given messages.
366
+
367
+ Args:
368
+ messages: List of message dictionaries
369
+ stream: Whether to stream the response
370
+ **kwargs: Additional arguments
371
+
372
+ Returns:
373
+ AsyncGenerator that yields response chunks
374
+ """
375
+ ```
376
+
377
+ ### Message Format
378
+
379
+ ```python
380
+ messages = [
381
+ {
382
+ "role": "user",
383
+ "content": "Take a screenshot and describe what you see"
384
+ },
385
+ {
386
+ "role": "assistant",
387
+ "content": "I'll take a screenshot for you."
388
+ }
389
+ ]
390
+ ```
391
+
392
+ ### Response Format
393
+
394
+ ```python
395
+ {
396
+ "output": [
397
+ {
398
+ "type": "message",
399
+ "role": "assistant",
400
+ "content": [{"type": "output_text", "text": "I can see..."}]
401
+ },
402
+ {
403
+ "type": "computer_call",
404
+ "action": {"type": "screenshot"},
405
+ "call_id": "call_123"
406
+ },
407
+ {
408
+ "type": "computer_call_output",
409
+ "call_id": "call_123",
410
+ "output": {"image_url": "data:image/png;base64,..."}
411
+ }
412
+ ],
413
+ "usage": {
414
+ "prompt_tokens": 150,
415
+ "completion_tokens": 75,
416
+ "total_tokens": 225,
417
+ "response_cost": 0.01,
418
+ }
419
+ }
420
+ ```
421
+
422
+ ## License
423
+
424
+ MIT License - see LICENSE file for details.