cua-agent 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show
  1. agent/__init__.py +21 -12
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +594 -0
  6. agent/callbacks/__init__.py +19 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/telemetry.py +210 -0
  13. agent/callbacks/trajectory_saver.py +305 -0
  14. agent/cli.py +297 -0
  15. agent/computer_handler.py +107 -0
  16. agent/decorators.py +90 -0
  17. agent/loops/__init__.py +11 -0
  18. agent/loops/anthropic.py +728 -0
  19. agent/loops/omniparser.py +339 -0
  20. agent/loops/openai.py +95 -0
  21. agent/loops/uitars.py +688 -0
  22. agent/responses.py +207 -0
  23. agent/telemetry.py +135 -14
  24. agent/types.py +79 -0
  25. agent/ui/__init__.py +7 -1
  26. agent/ui/__main__.py +2 -13
  27. agent/ui/gradio/__init__.py +6 -19
  28. agent/ui/gradio/app.py +94 -1313
  29. agent/ui/gradio/ui_components.py +721 -0
  30. cua_agent-0.4.0.dist-info/METADATA +424 -0
  31. cua_agent-0.4.0.dist-info/RECORD +33 -0
  32. agent/core/__init__.py +0 -27
  33. agent/core/agent.py +0 -210
  34. agent/core/base.py +0 -217
  35. agent/core/callbacks.py +0 -200
  36. agent/core/experiment.py +0 -249
  37. agent/core/factory.py +0 -122
  38. agent/core/messages.py +0 -332
  39. agent/core/provider_config.py +0 -21
  40. agent/core/telemetry.py +0 -142
  41. agent/core/tools/__init__.py +0 -21
  42. agent/core/tools/base.py +0 -74
  43. agent/core/tools/bash.py +0 -52
  44. agent/core/tools/collection.py +0 -46
  45. agent/core/tools/computer.py +0 -113
  46. agent/core/tools/edit.py +0 -67
  47. agent/core/tools/manager.py +0 -56
  48. agent/core/tools.py +0 -32
  49. agent/core/types.py +0 -88
  50. agent/core/visualization.py +0 -197
  51. agent/providers/__init__.py +0 -4
  52. agent/providers/anthropic/__init__.py +0 -6
  53. agent/providers/anthropic/api/client.py +0 -360
  54. agent/providers/anthropic/api/logging.py +0 -150
  55. agent/providers/anthropic/api_handler.py +0 -140
  56. agent/providers/anthropic/callbacks/__init__.py +0 -5
  57. agent/providers/anthropic/callbacks/manager.py +0 -65
  58. agent/providers/anthropic/loop.py +0 -568
  59. agent/providers/anthropic/prompts.py +0 -23
  60. agent/providers/anthropic/response_handler.py +0 -226
  61. agent/providers/anthropic/tools/__init__.py +0 -33
  62. agent/providers/anthropic/tools/base.py +0 -88
  63. agent/providers/anthropic/tools/bash.py +0 -66
  64. agent/providers/anthropic/tools/collection.py +0 -34
  65. agent/providers/anthropic/tools/computer.py +0 -396
  66. agent/providers/anthropic/tools/edit.py +0 -326
  67. agent/providers/anthropic/tools/manager.py +0 -54
  68. agent/providers/anthropic/tools/run.py +0 -42
  69. agent/providers/anthropic/types.py +0 -16
  70. agent/providers/anthropic/utils.py +0 -381
  71. agent/providers/omni/__init__.py +0 -8
  72. agent/providers/omni/api_handler.py +0 -42
  73. agent/providers/omni/clients/anthropic.py +0 -103
  74. agent/providers/omni/clients/base.py +0 -35
  75. agent/providers/omni/clients/oaicompat.py +0 -195
  76. agent/providers/omni/clients/ollama.py +0 -122
  77. agent/providers/omni/clients/openai.py +0 -155
  78. agent/providers/omni/clients/utils.py +0 -25
  79. agent/providers/omni/image_utils.py +0 -34
  80. agent/providers/omni/loop.py +0 -990
  81. agent/providers/omni/parser.py +0 -307
  82. agent/providers/omni/prompts.py +0 -64
  83. agent/providers/omni/tools/__init__.py +0 -30
  84. agent/providers/omni/tools/base.py +0 -29
  85. agent/providers/omni/tools/bash.py +0 -74
  86. agent/providers/omni/tools/computer.py +0 -179
  87. agent/providers/omni/tools/manager.py +0 -61
  88. agent/providers/omni/utils.py +0 -236
  89. agent/providers/openai/__init__.py +0 -6
  90. agent/providers/openai/api_handler.py +0 -456
  91. agent/providers/openai/loop.py +0 -472
  92. agent/providers/openai/response_handler.py +0 -205
  93. agent/providers/openai/tools/__init__.py +0 -15
  94. agent/providers/openai/tools/base.py +0 -79
  95. agent/providers/openai/tools/computer.py +0 -326
  96. agent/providers/openai/tools/manager.py +0 -106
  97. agent/providers/openai/types.py +0 -36
  98. agent/providers/openai/utils.py +0 -98
  99. agent/providers/uitars/__init__.py +0 -1
  100. agent/providers/uitars/clients/base.py +0 -35
  101. agent/providers/uitars/clients/mlxvlm.py +0 -263
  102. agent/providers/uitars/clients/oaicompat.py +0 -214
  103. agent/providers/uitars/loop.py +0 -660
  104. agent/providers/uitars/prompts.py +0 -63
  105. agent/providers/uitars/tools/__init__.py +0 -1
  106. agent/providers/uitars/tools/computer.py +0 -283
  107. agent/providers/uitars/tools/manager.py +0 -60
  108. agent/providers/uitars/utils.py +0 -264
  109. cua_agent-0.3.2.dist-info/METADATA +0 -295
  110. cua_agent-0.3.2.dist-info/RECORD +0 -87
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +0 -0
  112. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,424 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-agent
3
+ Version: 0.4.0
4
+ Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: httpx>=0.27.0
8
+ Requires-Dist: aiohttp>=3.9.3
9
+ Requires-Dist: asyncio
10
+ Requires-Dist: anyio>=4.4.1
11
+ Requires-Dist: typing-extensions>=4.12.2
12
+ Requires-Dist: pydantic>=2.6.4
13
+ Requires-Dist: rich>=13.7.1
14
+ Requires-Dist: python-dotenv>=1.0.1
15
+ Requires-Dist: cua-computer<0.5.0,>=0.3.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.0
17
+ Requires-Dist: certifi>=2024.2.2
18
+ Requires-Dist: litellm>=1.74.8
19
+ Provides-Extra: openai
20
+ Provides-Extra: anthropic
21
+ Provides-Extra: omni
22
+ Requires-Dist: ultralytics>=8.0.0; extra == "omni"
23
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "omni"
24
+ Provides-Extra: uitars
25
+ Provides-Extra: uitars-mlx
26
+ Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "uitars-mlx"
27
+ Provides-Extra: uitars-hf
28
+ Requires-Dist: transformers>=4.54.0; extra == "uitars-hf"
29
+ Provides-Extra: ui
30
+ Requires-Dist: gradio>=5.23.3; extra == "ui"
31
+ Requires-Dist: python-dotenv>=1.0.1; extra == "ui"
32
+ Provides-Extra: cli
33
+ Requires-Dist: yaspin>=3.1.0; extra == "cli"
34
+ Provides-Extra: all
35
+ Requires-Dist: ultralytics>=8.0.0; extra == "all"
36
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
37
+ Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "all"
38
+ Requires-Dist: transformers>=4.54.0; extra == "all"
39
+ Requires-Dist: gradio>=5.23.3; extra == "all"
40
+ Requires-Dist: python-dotenv>=1.0.1; extra == "all"
41
+ Requires-Dist: yaspin>=3.1.0; extra == "all"
42
+ Description-Content-Type: text/markdown
43
+
44
+ <div align="center">
45
+ <h1>
46
+ <div class="image-wrapper" style="display: inline-block;">
47
+ <picture>
48
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../../img/logo_white.png" style="display: block; margin: auto;">
49
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../../img/logo_black.png" style="display: block; margin: auto;">
50
+ <img alt="Shows my svg">
51
+ </picture>
52
+ </div>
53
+
54
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
55
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
56
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
57
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
58
+ </h1>
59
+ </div>
60
+
61
+ **cua-agent** is a general Computer-Use framework with liteLLM integration for running agentic workflows on macOS, Windows, and Linux sandboxes. It provides a unified interface for computer-use agents across multiple LLM providers with advanced callback system for extensibility.
62
+
63
+ ## Features
64
+
65
+ - **Safe Computer-Use/Tool-Use**: Using Computer SDK for sandboxed desktops
66
+ - **Multi-Agent Support**: Anthropic Claude, OpenAI computer-use-preview, UI-TARS, Omniparser + any LLM
67
+ - **Multi-API Support**: Take advantage of liteLLM supporting 100+ LLMs / model APIs, including local models (`huggingface-local/`, `ollama_chat/`, `mlx/`)
68
+ - **Cross-Platform**: Works on Windows, macOS, and Linux with cloud and local computer instances
69
+ - **Extensible Callbacks**: Built-in support for image retention, cache control, PII anonymization, budget limits, and trajectory tracking
70
+
71
+ ## Install
72
+
73
+ ```bash
74
+ pip install "cua-agent[all]"
75
+
76
+ # or install specific providers
77
+ pip install "cua-agent[openai]" # OpenAI computer-use-preview support
78
+ pip install "cua-agent[anthropic]" # Anthropic Claude support
79
+ pip install "cua-agent[omni]" # Omniparser + any LLM support
80
+ pip install "cua-agent[uitars]" # UI-TARS
81
+ pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support
82
+ pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support
83
+ pip install "cua-agent[ui]" # Gradio UI support
84
+ ```
85
+
86
+ ## Quick Start
87
+
88
+ ```python
89
+ import asyncio
90
+ import os
91
+ from agent import ComputerAgent
92
+ from computer import Computer
93
+
94
+ async def main():
95
+ # Set up computer instance
96
+ async with Computer(
97
+ os_type="linux",
98
+ provider_type="cloud",
99
+ name=os.getenv("CUA_CONTAINER_NAME"),
100
+ api_key=os.getenv("CUA_API_KEY")
101
+ ) as computer:
102
+
103
+ # Create agent
104
+ agent = ComputerAgent(
105
+ model="anthropic/claude-3-5-sonnet-20241022",
106
+ tools=[computer],
107
+ only_n_most_recent_images=3,
108
+ trajectory_dir="trajectories",
109
+ max_trajectory_budget=5.0 # $5 budget limit
110
+ )
111
+
112
+ # Run agent
113
+ messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]
114
+
115
+ async for result in agent.run(messages):
116
+ for item in result["output"]:
117
+ if item["type"] == "message":
118
+ print(item["content"][0]["text"])
119
+
120
+ if __name__ == "__main__":
121
+ asyncio.run(main())
122
+ ```
123
+
124
+ ## Supported Models
125
+
126
+ ### Anthropic Claude (Computer Use API)
127
+ ```python
128
+ model="anthropic/claude-3-5-sonnet-20241022"
129
+ model="anthropic/claude-3-5-sonnet-20240620"
130
+ model="anthropic/claude-opus-4-20250514"
131
+ model="anthropic/claude-sonnet-4-20250514"
132
+ ```
133
+
134
+ ### OpenAI Computer Use Preview
135
+ ```python
136
+ model="openai/computer-use-preview"
137
+ ```
138
+
139
+ ### UI-TARS (Local or Huggingface Inference)
140
+ ```python
141
+ model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"
142
+ model="ollama_chat/0000/ui-tars-1.5-7b"
143
+ ```
144
+
145
+ ### Omniparser + Any LLM
146
+ ```python
147
+ model="omniparser+ollama_chat/mistral-small3.2"
148
+ model="omniparser+vertex_ai/gemini-pro"
149
+ model="omniparser+anthropic/claude-3-5-sonnet-20241022"
150
+ model="omniparser+openai/gpt-4o"
151
+ ```
152
+
153
+ ## Custom Tools
154
+
155
+ Define custom tools using decorated functions:
156
+
157
+ ```python
158
+ from computer.helpers import sandboxed
159
+
160
+ @sandboxed()
161
+ def read_file(location: str) -> str:
162
+ """Read contents of a file
163
+
164
+ Parameters
165
+ ----------
166
+ location : str
167
+ Path to the file to read
168
+
169
+ Returns
170
+ -------
171
+ str
172
+ Contents of the file or error message
173
+ """
174
+ try:
175
+ with open(location, 'r') as f:
176
+ return f.read()
177
+ except Exception as e:
178
+ return f"Error reading file: {str(e)}"
179
+
180
+ def calculate(a: int, b: int) -> int:
181
+ """Calculate the sum of two integers"""
182
+ return a + b
183
+
184
+ # Use with agent
185
+ agent = ComputerAgent(
186
+ model="anthropic/claude-3-5-sonnet-20241022",
187
+ tools=[computer, read_file, calculate]
188
+ )
189
+ ```
190
+
191
+ ## Callbacks System
192
+
193
+ agent provides a comprehensive callback system for extending functionality:
194
+
195
+ ### Built-in Callbacks
196
+
197
+ ```python
198
+ from agent.callbacks import (
199
+ ImageRetentionCallback,
200
+ TrajectorySaverCallback,
201
+ BudgetManagerCallback,
202
+ LoggingCallback
203
+ )
204
+
205
+ agent = ComputerAgent(
206
+ model="anthropic/claude-3-5-sonnet-20241022",
207
+ tools=[computer],
208
+ callbacks=[
209
+ ImageRetentionCallback(only_n_most_recent_images=3),
210
+ TrajectorySaverCallback(trajectory_dir="trajectories"),
211
+ BudgetManagerCallback(max_budget=10.0, raise_error=True),
212
+ LoggingCallback(level=logging.INFO)
213
+ ]
214
+ )
215
+ ```
216
+
217
+ ### Custom Callbacks
218
+
219
+ ```python
220
+ from agent.callbacks.base import AsyncCallbackHandler
221
+
222
+ class CustomCallback(AsyncCallbackHandler):
223
+ async def on_llm_start(self, messages):
224
+ """Preprocess messages before LLM call"""
225
+ # Add custom preprocessing logic
226
+ return messages
227
+
228
+ async def on_llm_end(self, messages):
229
+ """Postprocess messages after LLM call"""
230
+ # Add custom postprocessing logic
231
+ return messages
232
+
233
+ async def on_usage(self, usage):
234
+ """Track usage information"""
235
+ print(f"Tokens used: {usage.total_tokens}")
236
+ ```
237
+
238
+ ## Budget Management
239
+
240
+ Control costs with built-in budget management:
241
+
242
+ ```python
243
+ # Simple budget limit
244
+ agent = ComputerAgent(
245
+ model="anthropic/claude-3-5-sonnet-20241022",
246
+ max_trajectory_budget=5.0 # $5 limit
247
+ )
248
+
249
+ # Advanced budget configuration
250
+ agent = ComputerAgent(
251
+ model="anthropic/claude-3-5-sonnet-20241022",
252
+ max_trajectory_budget={
253
+ "max_budget": 10.0,
254
+ "raise_error": True, # Raise error when exceeded
255
+ "reset_after_each_run": False # Persistent across runs
256
+ }
257
+ )
258
+ ```
259
+
260
+ ## Trajectory Management
261
+
262
+ Save and replay agent conversations:
263
+
264
+ ```python
265
+ agent = ComputerAgent(
266
+ model="anthropic/claude-3-5-sonnet-20241022",
267
+ trajectory_dir="trajectories", # Auto-save trajectories
268
+ tools=[computer]
269
+ )
270
+
271
+ # Trajectories are saved with:
272
+ # - Complete conversation history
273
+ # - Usage statistics and costs
274
+ # - Timestamps and metadata
275
+ # - Screenshots and computer actions
276
+ ```
277
+
278
+ ## Configuration Options
279
+
280
+ ### ComputerAgent Parameters
281
+
282
+ - `model`: Model identifier (required)
283
+ - `tools`: List of computer objects and decorated functions
284
+ - `callbacks`: List of callback handlers for extensibility
285
+ - `only_n_most_recent_images`: Limit recent images to prevent context overflow
286
+ - `verbosity`: Logging level (logging.INFO, logging.DEBUG, etc.)
287
+ - `trajectory_dir`: Directory to save conversation trajectories
288
+ - `max_retries`: Maximum API call retries (default: 3)
289
+ - `screenshot_delay`: Delay between actions and screenshots (default: 0.5s)
290
+ - `use_prompt_caching`: Enable prompt caching for supported models
291
+ - `max_trajectory_budget`: Budget limit configuration
292
+
293
+ ### Environment Variables
294
+
295
+ ```bash
296
+ # Computer instance (cloud)
297
+ export CUA_CONTAINER_NAME="your-container-name"
298
+ export CUA_API_KEY="your-cua-api-key"
299
+
300
+ # LLM API keys
301
+ export ANTHROPIC_API_KEY="your-anthropic-key"
302
+ export OPENAI_API_KEY="your-openai-key"
303
+ ```
304
+
305
+ ## Advanced Usage
306
+
307
+ ### Streaming Responses
308
+
309
+ ```python
310
+ async for result in agent.run(messages, stream=True):
311
+ # Process streaming chunks
312
+ for item in result["output"]:
313
+ if item["type"] == "message":
314
+ print(item["content"][0]["text"], end="", flush=True)
315
+ elif item["type"] == "computer_call":
316
+ action = item["action"]
317
+ print(f"\n[Action: {action['type']}]")
318
+ ```
319
+
320
+ ### Interactive Chat Loop
321
+
322
+ ```python
323
+ history = []
324
+ while True:
325
+ user_input = input("> ")
326
+ if user_input.lower() in ['quit', 'exit']:
327
+ break
328
+
329
+ history.append({"role": "user", "content": user_input})
330
+
331
+ async for result in agent.run(history):
332
+ history += result["output"]
333
+
334
+ # Display assistant responses
335
+ for item in result["output"]:
336
+ if item["type"] == "message":
337
+ print(item["content"][0]["text"])
338
+ ```
339
+
340
+ ### Error Handling
341
+
342
+ ```python
343
+ try:
344
+ async for result in agent.run(messages):
345
+ # Process results
346
+ pass
347
+ except BudgetExceededException:
348
+ print("Budget limit exceeded")
349
+ except Exception as e:
350
+ print(f"Agent error: {e}")
351
+ ```
352
+
353
+ ## API Reference
354
+
355
+ ### ComputerAgent.run()
356
+
357
+ ```python
358
+ async def run(
359
+ self,
360
+ messages: Messages,
361
+ stream: bool = False,
362
+ **kwargs
363
+ ) -> AsyncGenerator[Dict[str, Any], None]:
364
+ """
365
+ Run the agent with the given messages.
366
+
367
+ Args:
368
+ messages: List of message dictionaries
369
+ stream: Whether to stream the response
370
+ **kwargs: Additional arguments
371
+
372
+ Returns:
373
+ AsyncGenerator that yields response chunks
374
+ """
375
+ ```
376
+
377
+ ### Message Format
378
+
379
+ ```python
380
+ messages = [
381
+ {
382
+ "role": "user",
383
+ "content": "Take a screenshot and describe what you see"
384
+ },
385
+ {
386
+ "role": "assistant",
387
+ "content": "I'll take a screenshot for you."
388
+ }
389
+ ]
390
+ ```
391
+
392
+ ### Response Format
393
+
394
+ ```python
395
+ {
396
+ "output": [
397
+ {
398
+ "type": "message",
399
+ "role": "assistant",
400
+ "content": [{"type": "output_text", "text": "I can see..."}]
401
+ },
402
+ {
403
+ "type": "computer_call",
404
+ "action": {"type": "screenshot"},
405
+ "call_id": "call_123"
406
+ },
407
+ {
408
+ "type": "computer_call_output",
409
+ "call_id": "call_123",
410
+ "output": {"image_url": "data:image/png;base64,..."}
411
+ }
412
+ ],
413
+ "usage": {
414
+ "prompt_tokens": 150,
415
+ "completion_tokens": 75,
416
+ "total_tokens": 225,
417
+ "response_cost": 0.01,
418
+ }
419
+ }
420
+ ```
421
+
422
+ ## License
423
+
424
+ MIT License - see LICENSE file for details.
@@ -0,0 +1,33 @@
1
+ agent/__init__.py,sha256=PfRgVa_aJQL9fK0D1g2r__Kdg3627EigNS31_M8Ivkk,1539
2
+ agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
3
+ agent/adapters/__init__.py,sha256=szM2HMten2WkcqXeRnan__-sXjpyS4eyvIW0LXSfj4U,178
4
+ agent/adapters/huggingfacelocal_adapter.py,sha256=dnzzxYCvFiuDdNzsb_1uM-boWv1eS__dWMve_fAnlUc,8038
5
+ agent/agent.py,sha256=Vn7ygehx19It5FarZJ2NwVwNTOtNYtD21x8LEBhlWcE,24609
6
+ agent/callbacks/__init__.py,sha256=yxxBXUqpXQ-jRi_ixJMtmQPxoNRy5Vz1PUBzNNa1Dwg,538
7
+ agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
8
+ agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
9
+ agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
10
+ agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
11
+ agent/callbacks/pii_anonymization.py,sha256=UKAqNacHG3z92_6uocVzOIl8gJoqyofldCoCmB4UVIE,10268
12
+ agent/callbacks/telemetry.py,sha256=sYsE_-tnZkt1ydIRbp_GfCETlz7QG9DNbawq6hM4Bqw,7445
13
+ agent/callbacks/trajectory_saver.py,sha256=POE8aPT-MBzfW873wr6C7iiVUHtp483KwvLPxC1S3EY,11626
14
+ agent/cli.py,sha256=WZFyhmTbFnA7QgZmqKO5tGoWsKeO12-GVlBab314o9Q,10002
15
+ agent/computer_handler.py,sha256=2gfFBeDk9Vd54x9mOqnswMo8BdjUduLo5I0RbBPLovY,3964
16
+ agent/decorators.py,sha256=bCmcCjP31WEjWg1D91OE2jo7AZTfGa9cNgCnYUvjiyw,2832
17
+ agent/loops/__init__.py,sha256=_qpP_--3ePdFkTZP8qmUEFlBsy6m4h8fj0gGLDKA7zw,217
18
+ agent/loops/anthropic.py,sha256=w5s_zvkXdcHt0DgBMYjDQGDMBXK4bPu-SyeIMhA1Rrs,32243
19
+ agent/loops/omniparser.py,sha256=m3bDNQ0Igc_HHVoAbjVNj599uRoC9Eap3DCALg6RZ54,11422
20
+ agent/loops/openai.py,sha256=ArTqadeJY8F9N8ZLKfswlzgHV_54HbWJgLd4l6ele9w,3010
21
+ agent/loops/uitars.py,sha256=L0NYxKoIiMfIHbyomnaiK3ZGLmLv3QMx9nX57GruAk0,26323
22
+ agent/responses.py,sha256=ztSMEz8q4ykQAXF21UyQxkSZ6GeoDMydT5HZqKBPAXg,6660
23
+ agent/telemetry.py,sha256=87ZTyBaT0wEPQn4v76II3g0V3GERuIVbypoX-Ug6FKQ,4786
24
+ agent/types.py,sha256=GiLxIcF7s1XIh_WaY7tjdQPFpdTXb5MWVe_ZUPA0gkY,2364
25
+ agent/ui/__init__.py,sha256=DTZpK85QXscXK2nM9HtpAhVBF13yAamUrtwrQSuV-kM,126
26
+ agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
27
+ agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
28
+ agent/ui/gradio/app.py,sha256=X7he4jzyFqWJDP1y_M8yfZvfdy6GHNuclLn4k9iIwAw,8824
29
+ agent/ui/gradio/ui_components.py,sha256=WxFE-4wvdEgj7FPLNXUrs118sXJ9vN3kLkZxtto-weo,34474
30
+ cua_agent-0.4.0.dist-info/METADATA,sha256=b_qbWdEF5DVWLgsnSIc0wm1-cdfPlimlxntkaAJNzNU,12060
31
+ cua_agent-0.4.0.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
32
+ cua_agent-0.4.0.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
33
+ cua_agent-0.4.0.dist-info/RECORD,,
agent/core/__init__.py DELETED
@@ -1,27 +0,0 @@
1
- """Core agent components."""
2
-
3
- from .factory import BaseLoop
4
- from .messages import (
5
- StandardMessageManager,
6
- ImageRetentionConfig,
7
- )
8
- from .callbacks import (
9
- CallbackManager,
10
- CallbackHandler,
11
- BaseCallbackManager,
12
- ContentCallback,
13
- ToolCallback,
14
- APICallback,
15
- )
16
-
17
- __all__ = [
18
- "BaseLoop",
19
- "CallbackManager",
20
- "CallbackHandler",
21
- "StandardMessageManager",
22
- "ImageRetentionConfig",
23
- "BaseCallbackManager",
24
- "ContentCallback",
25
- "ToolCallback",
26
- "APICallback",
27
- ]
agent/core/agent.py DELETED
@@ -1,210 +0,0 @@
1
- """Main entry point for computer agents."""
2
-
3
- import asyncio
4
- import logging
5
- import os
6
- from typing import AsyncGenerator, Optional
7
-
8
- from computer import Computer
9
- from .types import LLM, AgentLoop
10
- from .types import AgentResponse
11
- from .factory import LoopFactory
12
- from .provider_config import DEFAULT_MODELS, ENV_VARS
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- class ComputerAgent:
17
- """A computer agent that can perform automated tasks using natural language instructions."""
18
-
19
- def __init__(
20
- self,
21
- computer: Computer,
22
- model: LLM,
23
- loop: AgentLoop,
24
- max_retries: int = 3,
25
- screenshot_dir: Optional[str] = None,
26
- log_dir: Optional[str] = None,
27
- api_key: Optional[str] = None,
28
- save_trajectory: bool = True,
29
- trajectory_dir: str = "trajectories",
30
- only_n_most_recent_images: Optional[int] = None,
31
- verbosity: int = logging.INFO,
32
- ):
33
- """Initialize the ComputerAgent.
34
-
35
- Args:
36
- computer: Computer instance. If not provided, one will be created with default settings.
37
- max_retries: Maximum number of retry attempts.
38
- screenshot_dir: Directory to save screenshots.
39
- log_dir: Directory to save logs (set to None to disable logging to files).
40
- model: LLM object containing provider and model name. Takes precedence over provider/model_name.
41
- provider: The AI provider to use (e.g., LLMProvider.ANTHROPIC). Only used if model is None.
42
- api_key: The API key for the provider. If not provided, will look for environment variable.
43
- model_name: The model name to use. Only used if model is None.
44
- save_trajectory: Whether to save the trajectory.
45
- trajectory_dir: Directory to save the trajectory.
46
- only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
47
- verbosity: Logging level.
48
- """
49
- # Basic agent configuration
50
- self.max_retries = max_retries
51
- self.computer = computer
52
- self.queue = asyncio.Queue()
53
- self.screenshot_dir = screenshot_dir
54
- self.log_dir = log_dir
55
- self._retry_count = 0
56
- self._initialized = False
57
- self._in_context = False
58
-
59
- # Set logging level
60
- logger.setLevel(verbosity)
61
-
62
- # Setup logging
63
- if self.log_dir:
64
- os.makedirs(self.log_dir, exist_ok=True)
65
- logger.info(f"Created logs directory: {self.log_dir}")
66
-
67
- # Setup screenshots directory
68
- if self.screenshot_dir:
69
- os.makedirs(self.screenshot_dir, exist_ok=True)
70
- logger.info(f"Created screenshots directory: {self.screenshot_dir}")
71
-
72
- # Use the provided LLM object
73
- self.provider = model.provider
74
- actual_model_name = model.name or DEFAULT_MODELS.get(self.provider, "")
75
- self.provider_base_url = getattr(model, "provider_base_url", None)
76
-
77
- # Ensure we have a valid model name
78
- if not actual_model_name:
79
- actual_model_name = DEFAULT_MODELS.get(self.provider, "")
80
- if not actual_model_name:
81
- raise ValueError(
82
- f"No model specified for provider {self.provider} and no default found"
83
- )
84
-
85
- # Get API key from environment if not provided
86
- actual_api_key = api_key or os.environ.get(ENV_VARS[self.provider], "")
87
- # Ollama and OpenAI-compatible APIs typically don't require an API key
88
- if (
89
- not actual_api_key
90
- and str(self.provider) not in ["ollama", "oaicompat"]
91
- and ENV_VARS[self.provider] != "none"
92
- ):
93
- raise ValueError(f"No API key provided for {self.provider}")
94
-
95
- # Create the appropriate loop using the factory
96
- try:
97
- # Let the factory create the appropriate loop with needed components
98
- self._loop = LoopFactory.create_loop(
99
- loop_type=loop,
100
- provider=self.provider,
101
- computer=self.computer,
102
- model_name=actual_model_name,
103
- api_key=actual_api_key,
104
- save_trajectory=save_trajectory,
105
- trajectory_dir=trajectory_dir,
106
- only_n_most_recent_images=only_n_most_recent_images,
107
- provider_base_url=self.provider_base_url,
108
- )
109
- except ValueError as e:
110
- logger.error(f"Failed to create loop: {str(e)}")
111
- raise
112
-
113
- # Initialize the message manager from the loop
114
- self.message_manager = self._loop.message_manager
115
-
116
- logger.info(
117
- f"ComputerAgent initialized with provider: {self.provider}, model: {actual_model_name}"
118
- )
119
-
120
- async def __aenter__(self):
121
- """Initialize the agent when used as a context manager."""
122
- logger.info("Entering ComputerAgent context")
123
- self._in_context = True
124
-
125
- # In case the computer wasn't initialized
126
- try:
127
- # Initialize the computer only if not already initialized
128
- logger.info("Checking if computer is already initialized...")
129
- if not self.computer._initialized:
130
- logger.info("Initializing computer in __aenter__...")
131
- # Use the computer's __aenter__ directly instead of calling run()
132
- await self.computer.__aenter__()
133
- logger.info("Computer initialized in __aenter__")
134
- else:
135
- logger.info("Computer already initialized, skipping initialization")
136
-
137
- except Exception as e:
138
- logger.error(f"Error initializing computer in __aenter__: {str(e)}")
139
- raise
140
-
141
- await self.initialize()
142
- return self
143
-
144
- async def __aexit__(self, exc_type, exc_val, exc_tb):
145
- """Cleanup agent resources if needed."""
146
- logger.info("Cleaning up agent resources")
147
- self._in_context = False
148
-
149
- # Do any necessary cleanup
150
- # We're not shutting down the computer here as it might be shared
151
- # Just log that we're exiting
152
- if exc_type:
153
- logger.error(f"Exiting agent context with error: {exc_type.__name__}: {exc_val}")
154
- else:
155
- logger.info("Exiting agent context normally")
156
-
157
- # If we have a queue, make sure to signal it's done
158
- if hasattr(self, "queue") and self.queue:
159
- await self.queue.put(None) # Signal that we're done
160
-
161
- async def initialize(self) -> None:
162
- """Initialize the agent and its components."""
163
- if not self._initialized:
164
- # Always initialize the computer if available
165
- if self.computer and not self.computer._initialized:
166
- await self.computer.run()
167
- self._initialized = True
168
-
169
- async def run(self, task: str) -> AsyncGenerator[AgentResponse, None]:
170
- """Run a task using the computer agent.
171
-
172
- Args:
173
- task: Task description
174
-
175
- Yields:
176
- Agent response format
177
- """
178
- try:
179
- logger.info(f"Running task: {task}")
180
- logger.info(
181
- f"Message history before task has {len(self.message_manager.messages)} messages"
182
- )
183
-
184
- # Initialize the computer if needed
185
- if not self._initialized:
186
- await self.initialize()
187
-
188
- # Add task as a user message using the message manager
189
- self.message_manager.add_user_message([{"type": "text", "text": task}])
190
- logger.info(
191
- f"Added task message. Message history now has {len(self.message_manager.messages)} messages"
192
- )
193
-
194
- # Pass properly formatted messages to the loop
195
- if self._loop is None:
196
- logger.error("Loop not initialized properly")
197
- yield {"error": "Loop not initialized properly"}
198
- return
199
-
200
- # Execute the task and yield results
201
- async for result in self._loop.run(self.message_manager.messages):
202
- yield result
203
-
204
- except Exception as e:
205
- logger.error(f"Error in agent run method: {str(e)}")
206
- yield {
207
- "role": "assistant",
208
- "content": f"Error: {str(e)}",
209
- "metadata": {"title": "❌ Error"},
210
- }