cua-agent 0.3.2__py3-none-any.whl → 0.4.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +15 -51
- agent/__main__.py +21 -0
- agent/adapters/__init__.py +9 -0
- agent/adapters/huggingfacelocal_adapter.py +229 -0
- agent/agent.py +577 -0
- agent/callbacks/__init__.py +17 -0
- agent/callbacks/base.py +153 -0
- agent/callbacks/budget_manager.py +44 -0
- agent/callbacks/image_retention.py +139 -0
- agent/callbacks/logging.py +247 -0
- agent/callbacks/pii_anonymization.py +259 -0
- agent/callbacks/trajectory_saver.py +305 -0
- agent/cli.py +290 -0
- agent/computer_handler.py +107 -0
- agent/decorators.py +90 -0
- agent/loops/__init__.py +11 -0
- agent/loops/anthropic.py +728 -0
- agent/loops/omniparser.py +339 -0
- agent/loops/openai.py +95 -0
- agent/loops/uitars.py +688 -0
- agent/responses.py +207 -0
- agent/types.py +79 -0
- agent/ui/__init__.py +7 -1
- agent/ui/gradio/__init__.py +6 -19
- agent/ui/gradio/app.py +80 -1299
- agent/ui/gradio/ui_components.py +703 -0
- cua_agent-0.4.0b2.dist-info/METADATA +424 -0
- cua_agent-0.4.0b2.dist-info/RECORD +30 -0
- agent/core/__init__.py +0 -27
- agent/core/agent.py +0 -210
- agent/core/base.py +0 -217
- agent/core/callbacks.py +0 -200
- agent/core/experiment.py +0 -249
- agent/core/factory.py +0 -122
- agent/core/messages.py +0 -332
- agent/core/provider_config.py +0 -21
- agent/core/telemetry.py +0 -142
- agent/core/tools/__init__.py +0 -21
- agent/core/tools/base.py +0 -74
- agent/core/tools/bash.py +0 -52
- agent/core/tools/collection.py +0 -46
- agent/core/tools/computer.py +0 -113
- agent/core/tools/edit.py +0 -67
- agent/core/tools/manager.py +0 -56
- agent/core/tools.py +0 -32
- agent/core/types.py +0 -88
- agent/core/visualization.py +0 -197
- agent/providers/__init__.py +0 -4
- agent/providers/anthropic/__init__.py +0 -6
- agent/providers/anthropic/api/client.py +0 -360
- agent/providers/anthropic/api/logging.py +0 -150
- agent/providers/anthropic/api_handler.py +0 -140
- agent/providers/anthropic/callbacks/__init__.py +0 -5
- agent/providers/anthropic/callbacks/manager.py +0 -65
- agent/providers/anthropic/loop.py +0 -568
- agent/providers/anthropic/prompts.py +0 -23
- agent/providers/anthropic/response_handler.py +0 -226
- agent/providers/anthropic/tools/__init__.py +0 -33
- agent/providers/anthropic/tools/base.py +0 -88
- agent/providers/anthropic/tools/bash.py +0 -66
- agent/providers/anthropic/tools/collection.py +0 -34
- agent/providers/anthropic/tools/computer.py +0 -396
- agent/providers/anthropic/tools/edit.py +0 -326
- agent/providers/anthropic/tools/manager.py +0 -54
- agent/providers/anthropic/tools/run.py +0 -42
- agent/providers/anthropic/types.py +0 -16
- agent/providers/anthropic/utils.py +0 -381
- agent/providers/omni/__init__.py +0 -8
- agent/providers/omni/api_handler.py +0 -42
- agent/providers/omni/clients/anthropic.py +0 -103
- agent/providers/omni/clients/base.py +0 -35
- agent/providers/omni/clients/oaicompat.py +0 -195
- agent/providers/omni/clients/ollama.py +0 -122
- agent/providers/omni/clients/openai.py +0 -155
- agent/providers/omni/clients/utils.py +0 -25
- agent/providers/omni/image_utils.py +0 -34
- agent/providers/omni/loop.py +0 -990
- agent/providers/omni/parser.py +0 -307
- agent/providers/omni/prompts.py +0 -64
- agent/providers/omni/tools/__init__.py +0 -30
- agent/providers/omni/tools/base.py +0 -29
- agent/providers/omni/tools/bash.py +0 -74
- agent/providers/omni/tools/computer.py +0 -179
- agent/providers/omni/tools/manager.py +0 -61
- agent/providers/omni/utils.py +0 -236
- agent/providers/openai/__init__.py +0 -6
- agent/providers/openai/api_handler.py +0 -456
- agent/providers/openai/loop.py +0 -472
- agent/providers/openai/response_handler.py +0 -205
- agent/providers/openai/tools/__init__.py +0 -15
- agent/providers/openai/tools/base.py +0 -79
- agent/providers/openai/tools/computer.py +0 -326
- agent/providers/openai/tools/manager.py +0 -106
- agent/providers/openai/types.py +0 -36
- agent/providers/openai/utils.py +0 -98
- agent/providers/uitars/__init__.py +0 -1
- agent/providers/uitars/clients/base.py +0 -35
- agent/providers/uitars/clients/mlxvlm.py +0 -263
- agent/providers/uitars/clients/oaicompat.py +0 -214
- agent/providers/uitars/loop.py +0 -660
- agent/providers/uitars/prompts.py +0 -63
- agent/providers/uitars/tools/__init__.py +0 -1
- agent/providers/uitars/tools/computer.py +0 -283
- agent/providers/uitars/tools/manager.py +0 -60
- agent/providers/uitars/utils.py +0 -264
- agent/telemetry.py +0 -21
- agent/ui/__main__.py +0 -15
- cua_agent-0.3.2.dist-info/METADATA +0 -295
- cua_agent-0.3.2.dist-info/RECORD +0 -87
- {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/WHEEL +0 -0
- {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: cua-agent
|
|
3
|
+
Version: 0.4.0b2
|
|
4
|
+
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
|
+
Author-Email: TryCua <gh@trycua.com>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: httpx>=0.27.0
|
|
8
|
+
Requires-Dist: aiohttp>=3.9.3
|
|
9
|
+
Requires-Dist: asyncio
|
|
10
|
+
Requires-Dist: anyio>=4.4.1
|
|
11
|
+
Requires-Dist: typing-extensions>=4.12.2
|
|
12
|
+
Requires-Dist: pydantic>=2.6.4
|
|
13
|
+
Requires-Dist: rich>=13.7.1
|
|
14
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
15
|
+
Requires-Dist: cua-computer<0.5.0,>=0.3.0
|
|
16
|
+
Requires-Dist: cua-core<0.2.0,>=0.1.0
|
|
17
|
+
Requires-Dist: certifi>=2024.2.2
|
|
18
|
+
Requires-Dist: litellm>=1.74.8
|
|
19
|
+
Provides-Extra: openai
|
|
20
|
+
Provides-Extra: anthropic
|
|
21
|
+
Provides-Extra: omni
|
|
22
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "omni"
|
|
23
|
+
Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "omni"
|
|
24
|
+
Provides-Extra: uitars
|
|
25
|
+
Provides-Extra: uitars-mlx
|
|
26
|
+
Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "uitars-mlx"
|
|
27
|
+
Provides-Extra: uitars-hf
|
|
28
|
+
Requires-Dist: transformers>=4.54.0; extra == "uitars-hf"
|
|
29
|
+
Provides-Extra: ui
|
|
30
|
+
Requires-Dist: gradio>=5.23.3; extra == "ui"
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.1; extra == "ui"
|
|
32
|
+
Provides-Extra: cli
|
|
33
|
+
Requires-Dist: yaspin>=3.1.0; extra == "cli"
|
|
34
|
+
Provides-Extra: all
|
|
35
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "all"
|
|
36
|
+
Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
|
|
37
|
+
Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "all"
|
|
38
|
+
Requires-Dist: transformers>=4.54.0; extra == "all"
|
|
39
|
+
Requires-Dist: gradio>=5.23.3; extra == "all"
|
|
40
|
+
Requires-Dist: python-dotenv>=1.0.1; extra == "all"
|
|
41
|
+
Requires-Dist: yaspin>=3.1.0; extra == "all"
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
|
|
44
|
+
<div align="center">
|
|
45
|
+
<h1>
|
|
46
|
+
<div class="image-wrapper" style="display: inline-block;">
|
|
47
|
+
<picture>
|
|
48
|
+
<source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../../img/logo_white.png" style="display: block; margin: auto;">
|
|
49
|
+
<source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../../img/logo_black.png" style="display: block; margin: auto;">
|
|
50
|
+
<img alt="Shows my svg">
|
|
51
|
+
</picture>
|
|
52
|
+
</div>
|
|
53
|
+
|
|
54
|
+
[](#)
|
|
55
|
+
[](#)
|
|
56
|
+
[](https://discord.com/invite/mVnXXpdE85)
|
|
57
|
+
[](https://pypi.org/project/cua-computer/)
|
|
58
|
+
</h1>
|
|
59
|
+
</div>
|
|
60
|
+
|
|
61
|
+
**cua-agent** is a general Computer-Use framework with liteLLM integration for running agentic workflows on macOS, Windows, and Linux sandboxes. It provides a unified interface for computer-use agents across multiple LLM providers with advanced callback system for extensibility.
|
|
62
|
+
|
|
63
|
+
## Features
|
|
64
|
+
|
|
65
|
+
- **Safe Computer-Use/Tool-Use**: Using Computer SDK for sandboxed desktops
|
|
66
|
+
- **Multi-Agent Support**: Anthropic Claude, OpenAI computer-use-preview, UI-TARS, Omniparser + any LLM
|
|
67
|
+
- **Multi-API Support**: Take advantage of liteLLM supporting 100+ LLMs / model APIs, including local models (`huggingface-local/`, `ollama_chat/`, `mlx/`)
|
|
68
|
+
- **Cross-Platform**: Works on Windows, macOS, and Linux with cloud and local computer instances
|
|
69
|
+
- **Extensible Callbacks**: Built-in support for image retention, cache control, PII anonymization, budget limits, and trajectory tracking
|
|
70
|
+
|
|
71
|
+
## Install
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install "cua-agent[all]"
|
|
75
|
+
|
|
76
|
+
# or install specific providers
|
|
77
|
+
pip install "cua-agent[openai]" # OpenAI computer-use-preview support
|
|
78
|
+
pip install "cua-agent[anthropic]" # Anthropic Claude support
|
|
79
|
+
pip install "cua-agent[omni]" # Omniparser + any LLM support
|
|
80
|
+
pip install "cua-agent[uitars]" # UI-TARS
|
|
81
|
+
pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support
|
|
82
|
+
pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support
|
|
83
|
+
pip install "cua-agent[ui]" # Gradio UI support
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Quick Start
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
import asyncio
|
|
90
|
+
import os
|
|
91
|
+
from agent import ComputerAgent
|
|
92
|
+
from computer import Computer
|
|
93
|
+
|
|
94
|
+
async def main():
|
|
95
|
+
# Set up computer instance
|
|
96
|
+
async with Computer(
|
|
97
|
+
os_type="linux",
|
|
98
|
+
provider_type="cloud",
|
|
99
|
+
name=os.getenv("CUA_CONTAINER_NAME"),
|
|
100
|
+
api_key=os.getenv("CUA_API_KEY")
|
|
101
|
+
) as computer:
|
|
102
|
+
|
|
103
|
+
# Create agent
|
|
104
|
+
agent = ComputerAgent(
|
|
105
|
+
model="anthropic/claude-3-5-sonnet-20241022",
|
|
106
|
+
tools=[computer],
|
|
107
|
+
only_n_most_recent_images=3,
|
|
108
|
+
trajectory_dir="trajectories",
|
|
109
|
+
max_trajectory_budget=5.0 # $5 budget limit
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Run agent
|
|
113
|
+
messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]
|
|
114
|
+
|
|
115
|
+
async for result in agent.run(messages):
|
|
116
|
+
for item in result["output"]:
|
|
117
|
+
if item["type"] == "message":
|
|
118
|
+
print(item["content"][0]["text"])
|
|
119
|
+
|
|
120
|
+
if __name__ == "__main__":
|
|
121
|
+
asyncio.run(main())
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Supported Models
|
|
125
|
+
|
|
126
|
+
### Anthropic Claude (Computer Use API)
|
|
127
|
+
```python
|
|
128
|
+
model="anthropic/claude-3-5-sonnet-20241022"
|
|
129
|
+
model="anthropic/claude-3-5-sonnet-20240620"
|
|
130
|
+
model="anthropic/claude-opus-4-20250514"
|
|
131
|
+
model="anthropic/claude-sonnet-4-20250514"
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### OpenAI Computer Use Preview
|
|
135
|
+
```python
|
|
136
|
+
model="openai/computer-use-preview"
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### UI-TARS (Local or Huggingface Inference)
|
|
140
|
+
```python
|
|
141
|
+
model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"
|
|
142
|
+
model="ollama_chat/0000/ui-tars-1.5-7b"
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Omniparser + Any LLM
|
|
146
|
+
```python
|
|
147
|
+
model="omniparser+ollama_chat/mistral-small3.2"
|
|
148
|
+
model="omniparser+vertex_ai/gemini-pro"
|
|
149
|
+
model="omniparser+anthropic/claude-3-5-sonnet-20241022"
|
|
150
|
+
model="omniparser+openai/gpt-4o"
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Custom Tools
|
|
154
|
+
|
|
155
|
+
Define custom tools using decorated functions:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from computer.helpers import sandboxed
|
|
159
|
+
|
|
160
|
+
@sandboxed()
|
|
161
|
+
def read_file(location: str) -> str:
|
|
162
|
+
"""Read contents of a file
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
location : str
|
|
167
|
+
Path to the file to read
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
str
|
|
172
|
+
Contents of the file or error message
|
|
173
|
+
"""
|
|
174
|
+
try:
|
|
175
|
+
with open(location, 'r') as f:
|
|
176
|
+
return f.read()
|
|
177
|
+
except Exception as e:
|
|
178
|
+
return f"Error reading file: {str(e)}"
|
|
179
|
+
|
|
180
|
+
def calculate(a: int, b: int) -> int:
|
|
181
|
+
"""Calculate the sum of two integers"""
|
|
182
|
+
return a + b
|
|
183
|
+
|
|
184
|
+
# Use with agent
|
|
185
|
+
agent = ComputerAgent(
|
|
186
|
+
model="anthropic/claude-3-5-sonnet-20241022",
|
|
187
|
+
tools=[computer, read_file, calculate]
|
|
188
|
+
)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Callbacks System
|
|
192
|
+
|
|
193
|
+
agent provides a comprehensive callback system for extending functionality:
|
|
194
|
+
|
|
195
|
+
### Built-in Callbacks
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from agent.callbacks import (
|
|
199
|
+
ImageRetentionCallback,
|
|
200
|
+
TrajectorySaverCallback,
|
|
201
|
+
BudgetManagerCallback,
|
|
202
|
+
LoggingCallback
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
agent = ComputerAgent(
|
|
206
|
+
model="anthropic/claude-3-5-sonnet-20241022",
|
|
207
|
+
tools=[computer],
|
|
208
|
+
callbacks=[
|
|
209
|
+
ImageRetentionCallback(only_n_most_recent_images=3),
|
|
210
|
+
TrajectorySaverCallback(trajectory_dir="trajectories"),
|
|
211
|
+
BudgetManagerCallback(max_budget=10.0, raise_error=True),
|
|
212
|
+
LoggingCallback(level=logging.INFO)
|
|
213
|
+
]
|
|
214
|
+
)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Custom Callbacks
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
from agent.callbacks.base import AsyncCallbackHandler
|
|
221
|
+
|
|
222
|
+
class CustomCallback(AsyncCallbackHandler):
|
|
223
|
+
async def on_llm_start(self, messages):
|
|
224
|
+
"""Preprocess messages before LLM call"""
|
|
225
|
+
# Add custom preprocessing logic
|
|
226
|
+
return messages
|
|
227
|
+
|
|
228
|
+
async def on_llm_end(self, messages):
|
|
229
|
+
"""Postprocess messages after LLM call"""
|
|
230
|
+
# Add custom postprocessing logic
|
|
231
|
+
return messages
|
|
232
|
+
|
|
233
|
+
async def on_usage(self, usage):
|
|
234
|
+
"""Track usage information"""
|
|
235
|
+
print(f"Tokens used: {usage.total_tokens}")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Budget Management
|
|
239
|
+
|
|
240
|
+
Control costs with built-in budget management:
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
# Simple budget limit
|
|
244
|
+
agent = ComputerAgent(
|
|
245
|
+
model="anthropic/claude-3-5-sonnet-20241022",
|
|
246
|
+
max_trajectory_budget=5.0 # $5 limit
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Advanced budget configuration
|
|
250
|
+
agent = ComputerAgent(
|
|
251
|
+
model="anthropic/claude-3-5-sonnet-20241022",
|
|
252
|
+
max_trajectory_budget={
|
|
253
|
+
"max_budget": 10.0,
|
|
254
|
+
"raise_error": True, # Raise error when exceeded
|
|
255
|
+
"reset_after_each_run": False # Persistent across runs
|
|
256
|
+
}
|
|
257
|
+
)
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Trajectory Management
|
|
261
|
+
|
|
262
|
+
Save and replay agent conversations:
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
agent = ComputerAgent(
|
|
266
|
+
model="anthropic/claude-3-5-sonnet-20241022",
|
|
267
|
+
trajectory_dir="trajectories", # Auto-save trajectories
|
|
268
|
+
tools=[computer]
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Trajectories are saved with:
|
|
272
|
+
# - Complete conversation history
|
|
273
|
+
# - Usage statistics and costs
|
|
274
|
+
# - Timestamps and metadata
|
|
275
|
+
# - Screenshots and computer actions
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Configuration Options
|
|
279
|
+
|
|
280
|
+
### ComputerAgent Parameters
|
|
281
|
+
|
|
282
|
+
- `model`: Model identifier (required)
|
|
283
|
+
- `tools`: List of computer objects and decorated functions
|
|
284
|
+
- `callbacks`: List of callback handlers for extensibility
|
|
285
|
+
- `only_n_most_recent_images`: Limit recent images to prevent context overflow
|
|
286
|
+
- `verbosity`: Logging level (logging.INFO, logging.DEBUG, etc.)
|
|
287
|
+
- `trajectory_dir`: Directory to save conversation trajectories
|
|
288
|
+
- `max_retries`: Maximum API call retries (default: 3)
|
|
289
|
+
- `screenshot_delay`: Delay between actions and screenshots (default: 0.5s)
|
|
290
|
+
- `use_prompt_caching`: Enable prompt caching for supported models
|
|
291
|
+
- `max_trajectory_budget`: Budget limit configuration
|
|
292
|
+
|
|
293
|
+
### Environment Variables
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
# Computer instance (cloud)
|
|
297
|
+
export CUA_CONTAINER_NAME="your-container-name"
|
|
298
|
+
export CUA_API_KEY="your-cua-api-key"
|
|
299
|
+
|
|
300
|
+
# LLM API keys
|
|
301
|
+
export ANTHROPIC_API_KEY="your-anthropic-key"
|
|
302
|
+
export OPENAI_API_KEY="your-openai-key"
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
## Advanced Usage
|
|
306
|
+
|
|
307
|
+
### Streaming Responses
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
async for result in agent.run(messages, stream=True):
|
|
311
|
+
# Process streaming chunks
|
|
312
|
+
for item in result["output"]:
|
|
313
|
+
if item["type"] == "message":
|
|
314
|
+
print(item["content"][0]["text"], end="", flush=True)
|
|
315
|
+
elif item["type"] == "computer_call":
|
|
316
|
+
action = item["action"]
|
|
317
|
+
print(f"\n[Action: {action['type']}]")
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
### Interactive Chat Loop
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
history = []
|
|
324
|
+
while True:
|
|
325
|
+
user_input = input("> ")
|
|
326
|
+
if user_input.lower() in ['quit', 'exit']:
|
|
327
|
+
break
|
|
328
|
+
|
|
329
|
+
history.append({"role": "user", "content": user_input})
|
|
330
|
+
|
|
331
|
+
async for result in agent.run(history):
|
|
332
|
+
history += result["output"]
|
|
333
|
+
|
|
334
|
+
# Display assistant responses
|
|
335
|
+
for item in result["output"]:
|
|
336
|
+
if item["type"] == "message":
|
|
337
|
+
print(item["content"][0]["text"])
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### Error Handling
|
|
341
|
+
|
|
342
|
+
```python
|
|
343
|
+
try:
|
|
344
|
+
async for result in agent.run(messages):
|
|
345
|
+
# Process results
|
|
346
|
+
pass
|
|
347
|
+
except BudgetExceededException:
|
|
348
|
+
print("Budget limit exceeded")
|
|
349
|
+
except Exception as e:
|
|
350
|
+
print(f"Agent error: {e}")
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
## API Reference
|
|
354
|
+
|
|
355
|
+
### ComputerAgent.run()
|
|
356
|
+
|
|
357
|
+
```python
|
|
358
|
+
async def run(
|
|
359
|
+
self,
|
|
360
|
+
messages: Messages,
|
|
361
|
+
stream: bool = False,
|
|
362
|
+
**kwargs
|
|
363
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
|
364
|
+
"""
|
|
365
|
+
Run the agent with the given messages.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
messages: List of message dictionaries
|
|
369
|
+
stream: Whether to stream the response
|
|
370
|
+
**kwargs: Additional arguments
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
AsyncGenerator that yields response chunks
|
|
374
|
+
"""
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
### Message Format
|
|
378
|
+
|
|
379
|
+
```python
|
|
380
|
+
messages = [
|
|
381
|
+
{
|
|
382
|
+
"role": "user",
|
|
383
|
+
"content": "Take a screenshot and describe what you see"
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
"role": "assistant",
|
|
387
|
+
"content": "I'll take a screenshot for you."
|
|
388
|
+
}
|
|
389
|
+
]
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### Response Format
|
|
393
|
+
|
|
394
|
+
```python
|
|
395
|
+
{
|
|
396
|
+
"output": [
|
|
397
|
+
{
|
|
398
|
+
"type": "message",
|
|
399
|
+
"role": "assistant",
|
|
400
|
+
"content": [{"type": "output_text", "text": "I can see..."}]
|
|
401
|
+
},
|
|
402
|
+
{
|
|
403
|
+
"type": "computer_call",
|
|
404
|
+
"action": {"type": "screenshot"},
|
|
405
|
+
"call_id": "call_123"
|
|
406
|
+
},
|
|
407
|
+
{
|
|
408
|
+
"type": "computer_call_output",
|
|
409
|
+
"call_id": "call_123",
|
|
410
|
+
"output": {"image_url": "data:image/png;base64,..."}
|
|
411
|
+
}
|
|
412
|
+
],
|
|
413
|
+
"usage": {
|
|
414
|
+
"prompt_tokens": 150,
|
|
415
|
+
"completion_tokens": 75,
|
|
416
|
+
"total_tokens": 225,
|
|
417
|
+
"response_cost": 0.01,
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
## License
|
|
423
|
+
|
|
424
|
+
MIT License - see LICENSE file for details.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
agent/__init__.py,sha256=rZk-enq3xJXRrl6LrpsQhJWfBtahjUP-0yY9raO72O0,356
|
|
2
|
+
agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
|
|
3
|
+
agent/adapters/__init__.py,sha256=szM2HMten2WkcqXeRnan__-sXjpyS4eyvIW0LXSfj4U,178
|
|
4
|
+
agent/adapters/huggingfacelocal_adapter.py,sha256=dnzzxYCvFiuDdNzsb_1uM-boWv1eS__dWMve_fAnlUc,8038
|
|
5
|
+
agent/agent.py,sha256=tXVnqzwC721UUN57OWEatfoZXFAzaDfrZ0G-EuVK3Ug,24022
|
|
6
|
+
agent/callbacks/__init__.py,sha256=SO9NKTrmk4sZ7ZwvWFhTtK9co1FgwubUe3bwkIXYwn0,472
|
|
7
|
+
agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
|
|
8
|
+
agent/callbacks/budget_manager.py,sha256=XNrL1z3pxAZkywy6f8kfbRjDWq2ZvKcpnP2ElC8LBJs,1875
|
|
9
|
+
agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
|
|
10
|
+
agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
|
|
11
|
+
agent/callbacks/pii_anonymization.py,sha256=UKAqNacHG3z92_6uocVzOIl8gJoqyofldCoCmB4UVIE,10268
|
|
12
|
+
agent/callbacks/trajectory_saver.py,sha256=POE8aPT-MBzfW873wr6C7iiVUHtp483KwvLPxC1S3EY,11626
|
|
13
|
+
agent/cli.py,sha256=QqIgDYjBkXf_0-bxugafxQr-Otp4avoA1SI5ZbtFhBc,9768
|
|
14
|
+
agent/computer_handler.py,sha256=2gfFBeDk9Vd54x9mOqnswMo8BdjUduLo5I0RbBPLovY,3964
|
|
15
|
+
agent/decorators.py,sha256=bCmcCjP31WEjWg1D91OE2jo7AZTfGa9cNgCnYUvjiyw,2832
|
|
16
|
+
agent/loops/__init__.py,sha256=_qpP_--3ePdFkTZP8qmUEFlBsy6m4h8fj0gGLDKA7zw,217
|
|
17
|
+
agent/loops/anthropic.py,sha256=w5s_zvkXdcHt0DgBMYjDQGDMBXK4bPu-SyeIMhA1Rrs,32243
|
|
18
|
+
agent/loops/omniparser.py,sha256=qfQk9GsuSd4MAJL7mRSWv_fZBOn2dfp3HMhDuMGHKxk,11436
|
|
19
|
+
agent/loops/openai.py,sha256=ArTqadeJY8F9N8ZLKfswlzgHV_54HbWJgLd4l6ele9w,3010
|
|
20
|
+
agent/loops/uitars.py,sha256=L0NYxKoIiMfIHbyomnaiK3ZGLmLv3QMx9nX57GruAk0,26323
|
|
21
|
+
agent/responses.py,sha256=8vnCqi21x3gzo59yDmcsA_E_-CyYxIuCwgfQF0PIomg,6867
|
|
22
|
+
agent/types.py,sha256=GiLxIcF7s1XIh_WaY7tjdQPFpdTXb5MWVe_ZUPA0gkY,2364
|
|
23
|
+
agent/ui/__init__.py,sha256=o7NIpZGFiCTUhgj6eB7gWRRpVhrUAsrOdWnPgbfDgVM,124
|
|
24
|
+
agent/ui/gradio/__init__.py,sha256=j3dc1i14daVIDEo-3K1w_v5EyDsHeB3TdgKkuteDivM,143
|
|
25
|
+
agent/ui/gradio/app.py,sha256=9i4YzLZRkvPrYKnrl6eDx1mR2QaSSsWgG-YQVlhedX8,8788
|
|
26
|
+
agent/ui/gradio/ui_components.py,sha256=IZsYOwn3dwQN8E5-3cFVlLhGlu77I35YhdPY8QxB0Uk,33513
|
|
27
|
+
cua_agent-0.4.0b2.dist-info/METADATA,sha256=1DV8S2V-FjsRcyszafS1cQDBuNy_M1R-MVZBoo14-aY,12062
|
|
28
|
+
cua_agent-0.4.0b2.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
29
|
+
cua_agent-0.4.0b2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
30
|
+
cua_agent-0.4.0b2.dist-info/RECORD,,
|
agent/core/__init__.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
"""Core agent components."""
|
|
2
|
-
|
|
3
|
-
from .factory import BaseLoop
|
|
4
|
-
from .messages import (
|
|
5
|
-
StandardMessageManager,
|
|
6
|
-
ImageRetentionConfig,
|
|
7
|
-
)
|
|
8
|
-
from .callbacks import (
|
|
9
|
-
CallbackManager,
|
|
10
|
-
CallbackHandler,
|
|
11
|
-
BaseCallbackManager,
|
|
12
|
-
ContentCallback,
|
|
13
|
-
ToolCallback,
|
|
14
|
-
APICallback,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"BaseLoop",
|
|
19
|
-
"CallbackManager",
|
|
20
|
-
"CallbackHandler",
|
|
21
|
-
"StandardMessageManager",
|
|
22
|
-
"ImageRetentionConfig",
|
|
23
|
-
"BaseCallbackManager",
|
|
24
|
-
"ContentCallback",
|
|
25
|
-
"ToolCallback",
|
|
26
|
-
"APICallback",
|
|
27
|
-
]
|
agent/core/agent.py
DELETED
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
"""Main entry point for computer agents."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import logging
|
|
5
|
-
import os
|
|
6
|
-
from typing import AsyncGenerator, Optional
|
|
7
|
-
|
|
8
|
-
from computer import Computer
|
|
9
|
-
from .types import LLM, AgentLoop
|
|
10
|
-
from .types import AgentResponse
|
|
11
|
-
from .factory import LoopFactory
|
|
12
|
-
from .provider_config import DEFAULT_MODELS, ENV_VARS
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
class ComputerAgent:
|
|
17
|
-
"""A computer agent that can perform automated tasks using natural language instructions."""
|
|
18
|
-
|
|
19
|
-
def __init__(
|
|
20
|
-
self,
|
|
21
|
-
computer: Computer,
|
|
22
|
-
model: LLM,
|
|
23
|
-
loop: AgentLoop,
|
|
24
|
-
max_retries: int = 3,
|
|
25
|
-
screenshot_dir: Optional[str] = None,
|
|
26
|
-
log_dir: Optional[str] = None,
|
|
27
|
-
api_key: Optional[str] = None,
|
|
28
|
-
save_trajectory: bool = True,
|
|
29
|
-
trajectory_dir: str = "trajectories",
|
|
30
|
-
only_n_most_recent_images: Optional[int] = None,
|
|
31
|
-
verbosity: int = logging.INFO,
|
|
32
|
-
):
|
|
33
|
-
"""Initialize the ComputerAgent.
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
computer: Computer instance. If not provided, one will be created with default settings.
|
|
37
|
-
max_retries: Maximum number of retry attempts.
|
|
38
|
-
screenshot_dir: Directory to save screenshots.
|
|
39
|
-
log_dir: Directory to save logs (set to None to disable logging to files).
|
|
40
|
-
model: LLM object containing provider and model name. Takes precedence over provider/model_name.
|
|
41
|
-
provider: The AI provider to use (e.g., LLMProvider.ANTHROPIC). Only used if model is None.
|
|
42
|
-
api_key: The API key for the provider. If not provided, will look for environment variable.
|
|
43
|
-
model_name: The model name to use. Only used if model is None.
|
|
44
|
-
save_trajectory: Whether to save the trajectory.
|
|
45
|
-
trajectory_dir: Directory to save the trajectory.
|
|
46
|
-
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
|
|
47
|
-
verbosity: Logging level.
|
|
48
|
-
"""
|
|
49
|
-
# Basic agent configuration
|
|
50
|
-
self.max_retries = max_retries
|
|
51
|
-
self.computer = computer
|
|
52
|
-
self.queue = asyncio.Queue()
|
|
53
|
-
self.screenshot_dir = screenshot_dir
|
|
54
|
-
self.log_dir = log_dir
|
|
55
|
-
self._retry_count = 0
|
|
56
|
-
self._initialized = False
|
|
57
|
-
self._in_context = False
|
|
58
|
-
|
|
59
|
-
# Set logging level
|
|
60
|
-
logger.setLevel(verbosity)
|
|
61
|
-
|
|
62
|
-
# Setup logging
|
|
63
|
-
if self.log_dir:
|
|
64
|
-
os.makedirs(self.log_dir, exist_ok=True)
|
|
65
|
-
logger.info(f"Created logs directory: {self.log_dir}")
|
|
66
|
-
|
|
67
|
-
# Setup screenshots directory
|
|
68
|
-
if self.screenshot_dir:
|
|
69
|
-
os.makedirs(self.screenshot_dir, exist_ok=True)
|
|
70
|
-
logger.info(f"Created screenshots directory: {self.screenshot_dir}")
|
|
71
|
-
|
|
72
|
-
# Use the provided LLM object
|
|
73
|
-
self.provider = model.provider
|
|
74
|
-
actual_model_name = model.name or DEFAULT_MODELS.get(self.provider, "")
|
|
75
|
-
self.provider_base_url = getattr(model, "provider_base_url", None)
|
|
76
|
-
|
|
77
|
-
# Ensure we have a valid model name
|
|
78
|
-
if not actual_model_name:
|
|
79
|
-
actual_model_name = DEFAULT_MODELS.get(self.provider, "")
|
|
80
|
-
if not actual_model_name:
|
|
81
|
-
raise ValueError(
|
|
82
|
-
f"No model specified for provider {self.provider} and no default found"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
# Get API key from environment if not provided
|
|
86
|
-
actual_api_key = api_key or os.environ.get(ENV_VARS[self.provider], "")
|
|
87
|
-
# Ollama and OpenAI-compatible APIs typically don't require an API key
|
|
88
|
-
if (
|
|
89
|
-
not actual_api_key
|
|
90
|
-
and str(self.provider) not in ["ollama", "oaicompat"]
|
|
91
|
-
and ENV_VARS[self.provider] != "none"
|
|
92
|
-
):
|
|
93
|
-
raise ValueError(f"No API key provided for {self.provider}")
|
|
94
|
-
|
|
95
|
-
# Create the appropriate loop using the factory
|
|
96
|
-
try:
|
|
97
|
-
# Let the factory create the appropriate loop with needed components
|
|
98
|
-
self._loop = LoopFactory.create_loop(
|
|
99
|
-
loop_type=loop,
|
|
100
|
-
provider=self.provider,
|
|
101
|
-
computer=self.computer,
|
|
102
|
-
model_name=actual_model_name,
|
|
103
|
-
api_key=actual_api_key,
|
|
104
|
-
save_trajectory=save_trajectory,
|
|
105
|
-
trajectory_dir=trajectory_dir,
|
|
106
|
-
only_n_most_recent_images=only_n_most_recent_images,
|
|
107
|
-
provider_base_url=self.provider_base_url,
|
|
108
|
-
)
|
|
109
|
-
except ValueError as e:
|
|
110
|
-
logger.error(f"Failed to create loop: {str(e)}")
|
|
111
|
-
raise
|
|
112
|
-
|
|
113
|
-
# Initialize the message manager from the loop
|
|
114
|
-
self.message_manager = self._loop.message_manager
|
|
115
|
-
|
|
116
|
-
logger.info(
|
|
117
|
-
f"ComputerAgent initialized with provider: {self.provider}, model: {actual_model_name}"
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
async def __aenter__(self):
|
|
121
|
-
"""Initialize the agent when used as a context manager."""
|
|
122
|
-
logger.info("Entering ComputerAgent context")
|
|
123
|
-
self._in_context = True
|
|
124
|
-
|
|
125
|
-
# In case the computer wasn't initialized
|
|
126
|
-
try:
|
|
127
|
-
# Initialize the computer only if not already initialized
|
|
128
|
-
logger.info("Checking if computer is already initialized...")
|
|
129
|
-
if not self.computer._initialized:
|
|
130
|
-
logger.info("Initializing computer in __aenter__...")
|
|
131
|
-
# Use the computer's __aenter__ directly instead of calling run()
|
|
132
|
-
await self.computer.__aenter__()
|
|
133
|
-
logger.info("Computer initialized in __aenter__")
|
|
134
|
-
else:
|
|
135
|
-
logger.info("Computer already initialized, skipping initialization")
|
|
136
|
-
|
|
137
|
-
except Exception as e:
|
|
138
|
-
logger.error(f"Error initializing computer in __aenter__: {str(e)}")
|
|
139
|
-
raise
|
|
140
|
-
|
|
141
|
-
await self.initialize()
|
|
142
|
-
return self
|
|
143
|
-
|
|
144
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
145
|
-
"""Cleanup agent resources if needed."""
|
|
146
|
-
logger.info("Cleaning up agent resources")
|
|
147
|
-
self._in_context = False
|
|
148
|
-
|
|
149
|
-
# Do any necessary cleanup
|
|
150
|
-
# We're not shutting down the computer here as it might be shared
|
|
151
|
-
# Just log that we're exiting
|
|
152
|
-
if exc_type:
|
|
153
|
-
logger.error(f"Exiting agent context with error: {exc_type.__name__}: {exc_val}")
|
|
154
|
-
else:
|
|
155
|
-
logger.info("Exiting agent context normally")
|
|
156
|
-
|
|
157
|
-
# If we have a queue, make sure to signal it's done
|
|
158
|
-
if hasattr(self, "queue") and self.queue:
|
|
159
|
-
await self.queue.put(None) # Signal that we're done
|
|
160
|
-
|
|
161
|
-
async def initialize(self) -> None:
|
|
162
|
-
"""Initialize the agent and its components."""
|
|
163
|
-
if not self._initialized:
|
|
164
|
-
# Always initialize the computer if available
|
|
165
|
-
if self.computer and not self.computer._initialized:
|
|
166
|
-
await self.computer.run()
|
|
167
|
-
self._initialized = True
|
|
168
|
-
|
|
169
|
-
async def run(self, task: str) -> AsyncGenerator[AgentResponse, None]:
|
|
170
|
-
"""Run a task using the computer agent.
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
task: Task description
|
|
174
|
-
|
|
175
|
-
Yields:
|
|
176
|
-
Agent response format
|
|
177
|
-
"""
|
|
178
|
-
try:
|
|
179
|
-
logger.info(f"Running task: {task}")
|
|
180
|
-
logger.info(
|
|
181
|
-
f"Message history before task has {len(self.message_manager.messages)} messages"
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
# Initialize the computer if needed
|
|
185
|
-
if not self._initialized:
|
|
186
|
-
await self.initialize()
|
|
187
|
-
|
|
188
|
-
# Add task as a user message using the message manager
|
|
189
|
-
self.message_manager.add_user_message([{"type": "text", "text": task}])
|
|
190
|
-
logger.info(
|
|
191
|
-
f"Added task message. Message history now has {len(self.message_manager.messages)} messages"
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
# Pass properly formatted messages to the loop
|
|
195
|
-
if self._loop is None:
|
|
196
|
-
logger.error("Loop not initialized properly")
|
|
197
|
-
yield {"error": "Loop not initialized properly"}
|
|
198
|
-
return
|
|
199
|
-
|
|
200
|
-
# Execute the task and yield results
|
|
201
|
-
async for result in self._loop.run(self.message_manager.messages):
|
|
202
|
-
yield result
|
|
203
|
-
|
|
204
|
-
except Exception as e:
|
|
205
|
-
logger.error(f"Error in agent run method: {str(e)}")
|
|
206
|
-
yield {
|
|
207
|
-
"role": "assistant",
|
|
208
|
-
"content": f"Error: {str(e)}",
|
|
209
|
-
"metadata": {"title": "❌ Error"},
|
|
210
|
-
}
|