cua-agent 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +1 -1
- agent/core/agent.py +9 -3
- agent/core/factory.py +3 -5
- agent/core/provider_config.py +5 -1
- agent/core/types.py +59 -1
- agent/providers/omni/__init__.py +1 -1
- agent/providers/omni/clients/base.py +8 -17
- agent/providers/omni/clients/oaicompat.py +177 -0
- agent/providers/omni/clients/ollama.py +122 -0
- agent/providers/omni/clients/openai.py +0 -4
- agent/providers/omni/loop.py +43 -1
- agent/providers/omni/tools/manager.py +1 -1
- agent/ui/__init__.py +1 -0
- agent/ui/gradio/__init__.py +21 -0
- agent/ui/gradio/app.py +872 -0
- {cua_agent-0.1.21.dist-info → cua_agent-0.1.23.dist-info}/METADATA +67 -3
- {cua_agent-0.1.21.dist-info → cua_agent-0.1.23.dist-info}/RECORD +19 -16
- agent/core/README.md +0 -101
- agent/providers/omni/types.py +0 -44
- {cua_agent-0.1.21.dist-info → cua_agent-0.1.23.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.21.dist-info → cua_agent-0.1.23.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.23
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: <3.13,>=3.10
|
|
@@ -21,6 +21,9 @@ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
|
|
|
21
21
|
Provides-Extra: openai
|
|
22
22
|
Requires-Dist: openai<2.0.0,>=1.14.0; extra == "openai"
|
|
23
23
|
Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "openai"
|
|
24
|
+
Provides-Extra: ui
|
|
25
|
+
Requires-Dist: gradio<6.0.0,>=5.23.3; extra == "ui"
|
|
26
|
+
Requires-Dist: python-dotenv<2.0.0,>=1.0.1; extra == "ui"
|
|
24
27
|
Provides-Extra: som
|
|
25
28
|
Requires-Dist: torch>=2.2.1; extra == "som"
|
|
26
29
|
Requires-Dist: torchvision>=0.17.1; extra == "som"
|
|
@@ -33,6 +36,19 @@ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "som"
|
|
|
33
36
|
Requires-Dist: groq<0.5.0,>=0.4.0; extra == "som"
|
|
34
37
|
Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "som"
|
|
35
38
|
Requires-Dist: requests<3.0.0,>=2.31.0; extra == "som"
|
|
39
|
+
Provides-Extra: omni
|
|
40
|
+
Requires-Dist: torch>=2.2.1; extra == "omni"
|
|
41
|
+
Requires-Dist: torchvision>=0.17.1; extra == "omni"
|
|
42
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "omni"
|
|
43
|
+
Requires-Dist: transformers>=4.38.2; extra == "omni"
|
|
44
|
+
Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "omni"
|
|
45
|
+
Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "omni"
|
|
46
|
+
Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "omni"
|
|
47
|
+
Requires-Dist: openai<2.0.0,>=1.14.0; extra == "omni"
|
|
48
|
+
Requires-Dist: groq<0.5.0,>=0.4.0; extra == "omni"
|
|
49
|
+
Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "omni"
|
|
50
|
+
Requires-Dist: requests<3.0.0,>=2.31.0; extra == "omni"
|
|
51
|
+
Requires-Dist: ollama<0.5.0,>=0.4.7; extra == "omni"
|
|
36
52
|
Provides-Extra: all
|
|
37
53
|
Requires-Dist: torch>=2.2.1; extra == "all"
|
|
38
54
|
Requires-Dist: torchvision>=0.17.1; extra == "all"
|
|
@@ -45,6 +61,9 @@ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "all"
|
|
|
45
61
|
Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
|
|
46
62
|
Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
|
|
47
63
|
Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
|
|
64
|
+
Requires-Dist: ollama<0.5.0,>=0.4.7; extra == "all"
|
|
65
|
+
Requires-Dist: gradio<6.0.0,>=5.23.3; extra == "all"
|
|
66
|
+
Requires-Dist: python-dotenv<2.0.0,>=1.0.1; extra == "all"
|
|
48
67
|
Description-Content-Type: text/markdown
|
|
49
68
|
|
|
50
69
|
<div align="center">
|
|
@@ -80,7 +99,8 @@ pip install "cua-agent[all]"
|
|
|
80
99
|
# or install specific loop providers
|
|
81
100
|
pip install "cua-agent[openai]" # OpenAI Cua Loop
|
|
82
101
|
pip install "cua-agent[anthropic]" # Anthropic Cua Loop
|
|
83
|
-
pip install "cua-agent[omni]" # Cua Loop based on OmniParser
|
|
102
|
+
pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
|
|
103
|
+
pip install "cua-agent[ui]" # Gradio UI for the agent
|
|
84
104
|
```
|
|
85
105
|
|
|
86
106
|
## Run
|
|
@@ -92,6 +112,12 @@ async with Computer() as macos_computer:
|
|
|
92
112
|
computer=macos_computer,
|
|
93
113
|
loop=AgentLoop.OPENAI,
|
|
94
114
|
model=LLM(provider=LLMProvider.OPENAI)
|
|
115
|
+
# or
|
|
116
|
+
# loop=AgentLoop.ANTHROPIC,
|
|
117
|
+
# model=LLM(provider=LLMProvider.ANTHROPIC)
|
|
118
|
+
# or
|
|
119
|
+
# loop=AgentLoop.OMNI,
|
|
120
|
+
# model=LLM(provider=LLMProvider.OLLAMA, model="gemma3")
|
|
95
121
|
)
|
|
96
122
|
|
|
97
123
|
tasks = [
|
|
@@ -115,6 +141,44 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
|
|
|
115
141
|
|
|
116
142
|
- [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
|
|
117
143
|
|
|
144
|
+
## Using the Gradio UI
|
|
145
|
+
|
|
146
|
+
The agent includes a Gradio-based user interface for easy interaction. To use it:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# Install with Gradio support
|
|
150
|
+
pip install "cua-agent[ui]"
|
|
151
|
+
|
|
152
|
+
# Create a simple launcher script
|
|
153
|
+
```python
|
|
154
|
+
from agent.ui.gradio.app import create_gradio_ui
|
|
155
|
+
|
|
156
|
+
app = create_gradio_ui()
|
|
157
|
+
app.launch(share=False)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
The Gradio UI provides:
|
|
161
|
+
- Selection of different agent loops (OpenAI, Anthropic, OMNI)
|
|
162
|
+
- Model selection for each provider
|
|
163
|
+
- Configuration of agent parameters
|
|
164
|
+
- Chat interface for interacting with the agent
|
|
165
|
+
|
|
166
|
+
You can also embed the Gradio UI in your own application:
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
# Import directly in your application
|
|
170
|
+
from agent.ui.gradio.app import create_gradio_ui
|
|
171
|
+
|
|
172
|
+
# Create the UI with advanced features
|
|
173
|
+
demo = create_gradio_ui()
|
|
174
|
+
demo.launch()
|
|
175
|
+
|
|
176
|
+
# Or for a simpler interface
|
|
177
|
+
from agent.ui.gradio import registry
|
|
178
|
+
demo = registry(name='cua:gpt-4o')
|
|
179
|
+
demo.launch()
|
|
180
|
+
```
|
|
181
|
+
|
|
118
182
|
## Agent Loops
|
|
119
183
|
|
|
120
184
|
The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
|
|
@@ -123,7 +187,7 @@ The `cua-agent` package provides three agent loops variations, based on differen
|
|
|
123
187
|
|:-----------|:-----------------|:------------|:-------------|
|
|
124
188
|
| `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
|
|
125
189
|
| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
|
|
126
|
-
| `AgentLoop.OMNI`
|
|
190
|
+
| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
|
|
127
191
|
|
|
128
192
|
## AgentResponse
|
|
129
193
|
The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
agent/__init__.py,sha256=
|
|
2
|
-
agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
|
|
1
|
+
agent/__init__.py,sha256=guFGtorDBF6R5hVep0Bvci3_sUJfBlcsq9ss5Kwrej8,1484
|
|
3
2
|
agent/core/__init__.py,sha256=7DhJ_6KKooM6uTmDIlumCnd7OFcU67BYIIR1dpIYUB0,506
|
|
4
|
-
agent/core/agent.py,sha256=
|
|
3
|
+
agent/core/agent.py,sha256=HUfBe7Uam3TObAmf6KH0GDKuNCNunNmmMcuxS7aZg0Q,8332
|
|
5
4
|
agent/core/base.py,sha256=EoutyMJ2kSJ72Di8KVRiUXc0ZJ1OkA0e7Ej14Y3F87w,7124
|
|
6
5
|
agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
|
|
7
6
|
agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
|
|
8
|
-
agent/core/factory.py,sha256=
|
|
7
|
+
agent/core/factory.py,sha256=rGlSQDjcm61hNLxe9jLZvmMwPypYatNq25yf_SqUghU,3820
|
|
9
8
|
agent/core/messages.py,sha256=-OVMDqcxK5MUHPEkHliK29XFJYMRAc1keFvzrUyrOmM,16231
|
|
10
|
-
agent/core/provider_config.py,sha256=
|
|
9
|
+
agent/core/provider_config.py,sha256=Hr9kDFSXdPeqC6hbid3OTykNF0-XVi0wzZyd44a7kww,627
|
|
11
10
|
agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
|
|
12
11
|
agent/core/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
|
|
13
12
|
agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
|
|
@@ -17,7 +16,7 @@ agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABS
|
|
|
17
16
|
agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
|
|
18
17
|
agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
|
|
19
18
|
agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
|
|
20
|
-
agent/core/types.py,sha256=
|
|
19
|
+
agent/core/types.py,sha256=4XnjuCkZAeyOidqixHp3pWVVf3pxc2l-0hNoYlB3Mrk,2914
|
|
21
20
|
agent/core/visualization.py,sha256=1DuFF5sSeSf5BRSevBMDxml9-ajl7BQLFm5KBUwMbI8,6573
|
|
22
21
|
agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
|
|
23
22
|
agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
|
|
@@ -39,22 +38,23 @@ agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2
|
|
|
39
38
|
agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
|
|
40
39
|
agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
|
|
41
40
|
agent/providers/anthropic/utils.py,sha256=qDp0bFGQhK1dG9U461iaeCiyoVUsksXmD43g9cedRW8,14367
|
|
42
|
-
agent/providers/omni/__init__.py,sha256=
|
|
41
|
+
agent/providers/omni/__init__.py,sha256=5ix67iJdtQNGuGJEjEOF65PwFWO7vdo1QlXD28bRbW4,179
|
|
43
42
|
agent/providers/omni/api_handler.py,sha256=7CpD43lYAqTyNKWfrD8XcM9ekbajqKCTH9p0TWtEQyg,1163
|
|
44
43
|
agent/providers/omni/clients/anthropic.py,sha256=nC_lj3UwrLqx9TIew58yxLqKwrH1_LwJD6EqVSEfp3g,3670
|
|
45
|
-
agent/providers/omni/clients/base.py,sha256=
|
|
46
|
-
agent/providers/omni/clients/
|
|
44
|
+
agent/providers/omni/clients/base.py,sha256=6lN86XKZT3cgBT9EQdz2akKoqbIvc-NXXIOkYKwXObE,946
|
|
45
|
+
agent/providers/omni/clients/oaicompat.py,sha256=KHFyOBttNUlxJdOEqrR7sS0S-S0LjMz7EVbTIkXyD_Y,7241
|
|
46
|
+
agent/providers/omni/clients/ollama.py,sha256=PmR5EhU9Mi43_o5mZN36XcpiGKp5HbQwlXpiRF9gO3I,4174
|
|
47
|
+
agent/providers/omni/clients/openai.py,sha256=iTSYWEJEM8INFPGJMiUVs8rFn0781XF_ofRkd7NT3gk,5920
|
|
47
48
|
agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
|
|
48
49
|
agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
|
|
49
|
-
agent/providers/omni/loop.py,sha256=
|
|
50
|
+
agent/providers/omni/loop.py,sha256=h9c-Ie4MA84H3XKYiAKA6J4Tec3_ACYxmU--eRuiS8A,39591
|
|
50
51
|
agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
|
|
51
52
|
agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
|
|
52
53
|
agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
|
|
53
54
|
agent/providers/omni/tools/base.py,sha256=HiQ8dp9NbFGlGopbE1wxo0ZbujA7bzCGjCg4tl2lnPE,824
|
|
54
55
|
agent/providers/omni/tools/bash.py,sha256=wocYvWwoaVjHba19CVqc3bvwj8_1qwqYjNaPBbMRlWA,2241
|
|
55
56
|
agent/providers/omni/tools/computer.py,sha256=cB5PrhPmk6acKSENIvzw4rdpjeWx4HQHfSxBLGHzGRE,6964
|
|
56
|
-
agent/providers/omni/tools/manager.py,sha256=
|
|
57
|
-
agent/providers/omni/types.py,sha256=EoEiqtW98R2ZlhZb1-II4t1Ctf7qCOG6rUn_uQUHNdM,1021
|
|
57
|
+
agent/providers/omni/tools/manager.py,sha256=UhtasaxGcmkxtz-bP1UJ1a4xdYnD3Cv8PbtB0n2QCDg,2101
|
|
58
58
|
agent/providers/omni/utils.py,sha256=Ikp6ONL1HO637o3KDtv5yv6q-4uIWAzMSQDvGetWXC8,8724
|
|
59
59
|
agent/providers/openai/__init__.py,sha256=8DS6YNZp42NLCacwXsfRaghyczaOCVovX8TgzXUZf_o,165
|
|
60
60
|
agent/providers/openai/api_handler.py,sha256=L1K56dR1j4JsX1sX4OFYeKoCUMM25Fwj2y9nqv8oOhw,17736
|
|
@@ -67,7 +67,10 @@ agent/providers/openai/tools/manager.py,sha256=-wM641dLf8vcv6QF9x_ViGJeDl2YTuUV9
|
|
|
67
67
|
agent/providers/openai/types.py,sha256=0mFUxeFy23fJhMwc6lAFVXKngg2fJIXkPS5oV284V1M,898
|
|
68
68
|
agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgPdc,3175
|
|
69
69
|
agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
cua_agent-0.1.
|
|
70
|
+
agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
|
|
71
|
+
agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
|
|
72
|
+
agent/ui/gradio/app.py,sha256=6n0c_3HBb6ZeN213izyurL8oML1peet1cI8fx82DLZg,33980
|
|
73
|
+
cua_agent-0.1.23.dist-info/METADATA,sha256=88aLbVo6etPVlHUPYmxmOpCTRfmeIJ1axKfsrznGG10,9238
|
|
74
|
+
cua_agent-0.1.23.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
75
|
+
cua_agent-0.1.23.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
76
|
+
cua_agent-0.1.23.dist-info/RECORD,,
|
agent/core/README.md
DELETED
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
# Unified ComputerAgent
|
|
2
|
-
|
|
3
|
-
The `ComputerAgent` class provides a unified implementation that consolidates the previously separate agent implementations (AnthropicComputerAgent and OmniComputerAgent) into a single, configurable class.
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
- **Multiple Loop Types**: Switch between different agentic loop implementations using the `loop_type` parameter (Anthropic or Omni).
|
|
8
|
-
- **Provider Support**: Use different AI providers (OpenAI, Anthropic, etc.) with the appropriate loop.
|
|
9
|
-
- **Trajectory Saving**: Control whether to save screenshots and logs with the `save_trajectory` parameter.
|
|
10
|
-
- **Consistent Interface**: Maintains a consistent interface regardless of the underlying loop implementation.
|
|
11
|
-
|
|
12
|
-
## API Key Requirements
|
|
13
|
-
|
|
14
|
-
To use the ComputerAgent, you'll need API keys for the providers you want to use:
|
|
15
|
-
|
|
16
|
-
- For **OpenAI**: Set the `OPENAI_API_KEY` environment variable or pass it directly as `api_key`.
|
|
17
|
-
- For **Anthropic**: Set the `ANTHROPIC_API_KEY` environment variable or pass it directly as `api_key`.
|
|
18
|
-
- For **Groq**: Set the `GROQ_API_KEY` environment variable or pass it directly as `api_key`.
|
|
19
|
-
|
|
20
|
-
You can set environment variables in several ways:
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
# In your terminal before running the code
|
|
24
|
-
export OPENAI_API_KEY=your_api_key_here
|
|
25
|
-
|
|
26
|
-
# Or in a .env file
|
|
27
|
-
OPENAI_API_KEY=your_api_key_here
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
## Usage
|
|
31
|
-
|
|
32
|
-
Here's how to use the unified ComputerAgent:
|
|
33
|
-
|
|
34
|
-
```python
|
|
35
|
-
from agent.core.agent import ComputerAgent
|
|
36
|
-
from agent.types.base import AgenticLoop
|
|
37
|
-
from agent.providers.omni.types import LLMProvider
|
|
38
|
-
from computer import Computer
|
|
39
|
-
|
|
40
|
-
# Create a Computer instance
|
|
41
|
-
computer = Computer()
|
|
42
|
-
|
|
43
|
-
# Create an agent with the OMNI loop and OpenAI provider
|
|
44
|
-
agent = ComputerAgent(
|
|
45
|
-
computer=computer,
|
|
46
|
-
loop_type=AgenticLoop.OMNI,
|
|
47
|
-
provider=LLMProvider.OPENAI,
|
|
48
|
-
model="gpt-4o",
|
|
49
|
-
api_key="your_api_key_here", # Can also use OPENAI_API_KEY environment variable
|
|
50
|
-
save_trajectory=True,
|
|
51
|
-
only_n_most_recent_images=5
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
# Create an agent with the ANTHROPIC loop
|
|
55
|
-
agent = ComputerAgent(
|
|
56
|
-
computer=computer,
|
|
57
|
-
loop_type=AgenticLoop.ANTHROPIC,
|
|
58
|
-
model="claude-3-7-sonnet-20250219",
|
|
59
|
-
api_key="your_api_key_here", # Can also use ANTHROPIC_API_KEY environment variable
|
|
60
|
-
save_trajectory=True,
|
|
61
|
-
only_n_most_recent_images=5
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Use the agent
|
|
65
|
-
async with agent:
|
|
66
|
-
async for result in agent.run("Your task description here"):
|
|
67
|
-
# Process the result
|
|
68
|
-
title = result["metadata"].get("title", "Screen Analysis")
|
|
69
|
-
content = result["content"]
|
|
70
|
-
print(f"\n{title}")
|
|
71
|
-
print(content)
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
## Parameters
|
|
75
|
-
|
|
76
|
-
- `computer`: Computer instance to control
|
|
77
|
-
- `loop_type`: The type of loop to use (AgenticLoop.ANTHROPIC or AgenticLoop.OMNI)
|
|
78
|
-
- `provider`: AI provider to use (required for Omni loop)
|
|
79
|
-
- `api_key`: Optional API key (will use environment variable if not provided)
|
|
80
|
-
- `model`: Optional model name (will use provider default if not specified)
|
|
81
|
-
- `save_trajectory`: Whether to save screenshots and logs
|
|
82
|
-
- `only_n_most_recent_images`: Only keep N most recent images
|
|
83
|
-
- `max_retries`: Maximum number of retry attempts
|
|
84
|
-
|
|
85
|
-
## Directory Structure
|
|
86
|
-
|
|
87
|
-
When `save_trajectory` is enabled, the agent will create the following directory structure:
|
|
88
|
-
|
|
89
|
-
```
|
|
90
|
-
experiments/
|
|
91
|
-
├── screenshots/ # Screenshots captured during agent execution
|
|
92
|
-
└── logs/ # API call logs and other logging information
|
|
93
|
-
```
|
|
94
|
-
|
|
95
|
-
## Extending with New Loop Types
|
|
96
|
-
|
|
97
|
-
To add a new loop type:
|
|
98
|
-
|
|
99
|
-
1. Implement a new loop class
|
|
100
|
-
2. Add a new value to the `AgenticLoop` enum
|
|
101
|
-
3. Update the `_initialize_loop` method in `ComputerAgent` to handle the new loop type
|
agent/providers/omni/types.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
"""Type definitions for the Omni provider."""
|
|
2
|
-
|
|
3
|
-
from enum import StrEnum
|
|
4
|
-
from typing import Dict, Optional
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class LLMProvider(StrEnum):
|
|
9
|
-
"""Supported LLM providers."""
|
|
10
|
-
|
|
11
|
-
ANTHROPIC = "anthropic"
|
|
12
|
-
OMNI = "omni"
|
|
13
|
-
OPENAI = "openai"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass
|
|
17
|
-
class LLM:
|
|
18
|
-
"""Configuration for LLM model and provider."""
|
|
19
|
-
|
|
20
|
-
provider: LLMProvider
|
|
21
|
-
name: Optional[str] = None
|
|
22
|
-
|
|
23
|
-
def __post_init__(self):
|
|
24
|
-
"""Set default model name if not provided."""
|
|
25
|
-
if self.name is None:
|
|
26
|
-
self.name = PROVIDER_TO_DEFAULT_MODEL.get(self.provider)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# For backward compatibility
|
|
30
|
-
LLMModel = LLM
|
|
31
|
-
Model = LLM
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# Default models for each provider
|
|
35
|
-
PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
|
|
36
|
-
LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
|
|
37
|
-
LLMProvider.OPENAI: "gpt-4o",
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
# Environment variable names for each provider
|
|
41
|
-
PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
|
|
42
|
-
LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
|
|
43
|
-
LLMProvider.OPENAI: "OPENAI_API_KEY",
|
|
44
|
-
}
|
|
File without changes
|
|
File without changes
|