cua-agent 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +1 -1
- agent/core/agent.py +9 -4
- agent/core/factory.py +3 -5
- agent/core/provider_config.py +4 -2
- agent/core/types.py +41 -1
- agent/providers/omni/__init__.py +1 -1
- agent/providers/omni/clients/oaicompat.py +177 -0
- agent/providers/omni/loop.py +25 -1
- agent/providers/omni/tools/manager.py +1 -1
- agent/ui/__init__.py +1 -0
- agent/ui/gradio/__init__.py +21 -0
- agent/ui/gradio/app.py +872 -0
- {cua_agent-0.1.22.dist-info → cua_agent-0.1.24.dist-info}/METADATA +74 -2
- {cua_agent-0.1.22.dist-info → cua_agent-0.1.24.dist-info}/RECORD +16 -14
- agent/core/README.md +0 -101
- agent/providers/omni/types.py +0 -47
- {cua_agent-0.1.22.dist-info → cua_agent-0.1.24.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.22.dist-info → cua_agent-0.1.24.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.24
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: <3.13,>=3.10
|
|
@@ -21,6 +21,9 @@ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
|
|
|
21
21
|
Provides-Extra: openai
|
|
22
22
|
Requires-Dist: openai<2.0.0,>=1.14.0; extra == "openai"
|
|
23
23
|
Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "openai"
|
|
24
|
+
Provides-Extra: ui
|
|
25
|
+
Requires-Dist: gradio<6.0.0,>=5.23.3; extra == "ui"
|
|
26
|
+
Requires-Dist: python-dotenv<2.0.0,>=1.0.1; extra == "ui"
|
|
24
27
|
Provides-Extra: som
|
|
25
28
|
Requires-Dist: torch>=2.2.1; extra == "som"
|
|
26
29
|
Requires-Dist: torchvision>=0.17.1; extra == "som"
|
|
@@ -59,6 +62,8 @@ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
|
|
|
59
62
|
Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
|
|
60
63
|
Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
|
|
61
64
|
Requires-Dist: ollama<0.5.0,>=0.4.7; extra == "all"
|
|
65
|
+
Requires-Dist: gradio<6.0.0,>=5.23.3; extra == "all"
|
|
66
|
+
Requires-Dist: python-dotenv<2.0.0,>=1.0.1; extra == "all"
|
|
62
67
|
Description-Content-Type: text/markdown
|
|
63
68
|
|
|
64
69
|
<div align="center">
|
|
@@ -95,6 +100,7 @@ pip install "cua-agent[all]"
|
|
|
95
100
|
pip install "cua-agent[openai]" # OpenAI Cua Loop
|
|
96
101
|
pip install "cua-agent[anthropic]" # Anthropic Cua Loop
|
|
97
102
|
pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
|
|
103
|
+
pip install "cua-agent[ui]" # Gradio UI for the agent
|
|
98
104
|
```
|
|
99
105
|
|
|
100
106
|
## Run
|
|
@@ -106,6 +112,12 @@ async with Computer() as macos_computer:
|
|
|
106
112
|
computer=macos_computer,
|
|
107
113
|
loop=AgentLoop.OPENAI,
|
|
108
114
|
model=LLM(provider=LLMProvider.OPENAI)
|
|
115
|
+
# or
|
|
116
|
+
# loop=AgentLoop.ANTHROPIC,
|
|
117
|
+
# model=LLM(provider=LLMProvider.ANTHROPIC)
|
|
118
|
+
# or
|
|
119
|
+
# loop=AgentLoop.OMNI,
|
|
120
|
+
# model=LLM(provider=LLMProvider.OLLAMA, model="gemma3")
|
|
109
121
|
)
|
|
110
122
|
|
|
111
123
|
tasks = [
|
|
@@ -129,6 +141,66 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
|
|
|
129
141
|
|
|
130
142
|
- [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
|
|
131
143
|
|
|
144
|
+
## Using the Gradio UI
|
|
145
|
+
|
|
146
|
+
The agent includes a Gradio-based user interface for easy interaction. To use it:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# Install with Gradio support
|
|
150
|
+
pip install "cua-agent[ui]"
|
|
151
|
+
|
|
152
|
+
# Create a simple launcher script
|
|
153
|
+
```python
|
|
154
|
+
# launch_ui.py
|
|
155
|
+
from agent.ui.gradio.app import create_gradio_ui
|
|
156
|
+
|
|
157
|
+
app = create_gradio_ui()
|
|
158
|
+
app.launch(share=False)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
# Run the launcher
|
|
162
|
+
python launch_ui.py
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Setting up API Keys
|
|
166
|
+
|
|
167
|
+
For the Gradio UI to show available models, you need to set API keys as environment variables:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# For OpenAI models
|
|
171
|
+
export OPENAI_API_KEY=your_openai_key_here
|
|
172
|
+
|
|
173
|
+
# For Anthropic models
|
|
174
|
+
export ANTHROPIC_API_KEY=your_anthropic_key_here
|
|
175
|
+
|
|
176
|
+
# Launch with both keys set
|
|
177
|
+
OPENAI_API_KEY=your_key ANTHROPIC_API_KEY=your_key python launch_ui.py
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Without these environment variables, the UI will show "No models available" for the corresponding providers, but you can still use local models with the OMNI loop provider.
|
|
181
|
+
|
|
182
|
+
The Gradio UI provides:
|
|
183
|
+
- Selection of different agent loops (OpenAI, Anthropic, OMNI)
|
|
184
|
+
- Model selection for each provider
|
|
185
|
+
- Configuration of agent parameters
|
|
186
|
+
- Chat interface for interacting with the agent
|
|
187
|
+
|
|
188
|
+
You can also embed the Gradio UI in your own application:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# Import directly in your application
|
|
192
|
+
from agent.ui.gradio.app import create_gradio_ui
|
|
193
|
+
|
|
194
|
+
# Create the UI with advanced features
|
|
195
|
+
demo = create_gradio_ui()
|
|
196
|
+
demo.launch()
|
|
197
|
+
|
|
198
|
+
# Or for a simpler interface
|
|
199
|
+
from agent.ui.gradio import registry
|
|
200
|
+
demo = registry(name='cua:gpt-4o')
|
|
201
|
+
demo.launch()
|
|
202
|
+
```
|
|
203
|
+
|
|
132
204
|
## Agent Loops
|
|
133
205
|
|
|
134
206
|
The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
|
|
@@ -137,7 +209,7 @@ The `cua-agent` package provides three agent loops variations, based on differen
|
|
|
137
209
|
|:-----------|:-----------------|:------------|:-------------|
|
|
138
210
|
| `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
|
|
139
211
|
| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
|
|
140
|
-
| `AgentLoop.OMNI`
|
|
212
|
+
| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
|
|
141
213
|
|
|
142
214
|
## AgentResponse
|
|
143
215
|
The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
agent/__init__.py,sha256=
|
|
2
|
-
agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
|
|
1
|
+
agent/__init__.py,sha256=guFGtorDBF6R5hVep0Bvci3_sUJfBlcsq9ss5Kwrej8,1484
|
|
3
2
|
agent/core/__init__.py,sha256=7DhJ_6KKooM6uTmDIlumCnd7OFcU67BYIIR1dpIYUB0,506
|
|
4
|
-
agent/core/agent.py,sha256=
|
|
3
|
+
agent/core/agent.py,sha256=HUfBe7Uam3TObAmf6KH0GDKuNCNunNmmMcuxS7aZg0Q,8332
|
|
5
4
|
agent/core/base.py,sha256=EoutyMJ2kSJ72Di8KVRiUXc0ZJ1OkA0e7Ej14Y3F87w,7124
|
|
6
5
|
agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
|
|
7
6
|
agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
|
|
8
|
-
agent/core/factory.py,sha256=
|
|
7
|
+
agent/core/factory.py,sha256=rGlSQDjcm61hNLxe9jLZvmMwPypYatNq25yf_SqUghU,3820
|
|
9
8
|
agent/core/messages.py,sha256=-OVMDqcxK5MUHPEkHliK29XFJYMRAc1keFvzrUyrOmM,16231
|
|
10
|
-
agent/core/provider_config.py,sha256=
|
|
9
|
+
agent/core/provider_config.py,sha256=Hr9kDFSXdPeqC6hbid3OTykNF0-XVi0wzZyd44a7kww,627
|
|
11
10
|
agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
|
|
12
11
|
agent/core/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
|
|
13
12
|
agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
|
|
@@ -17,7 +16,7 @@ agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABS
|
|
|
17
16
|
agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
|
|
18
17
|
agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
|
|
19
18
|
agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
|
|
20
|
-
agent/core/types.py,sha256=
|
|
19
|
+
agent/core/types.py,sha256=2RKDVzBd6O6woeH7A0oisbdpD_nx67B8ITnkMGu-g2E,2375
|
|
21
20
|
agent/core/visualization.py,sha256=1DuFF5sSeSf5BRSevBMDxml9-ajl7BQLFm5KBUwMbI8,6573
|
|
22
21
|
agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
|
|
23
22
|
agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
|
|
@@ -39,23 +38,23 @@ agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2
|
|
|
39
38
|
agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
|
|
40
39
|
agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
|
|
41
40
|
agent/providers/anthropic/utils.py,sha256=qDp0bFGQhK1dG9U461iaeCiyoVUsksXmD43g9cedRW8,14367
|
|
42
|
-
agent/providers/omni/__init__.py,sha256=
|
|
41
|
+
agent/providers/omni/__init__.py,sha256=5ix67iJdtQNGuGJEjEOF65PwFWO7vdo1QlXD28bRbW4,179
|
|
43
42
|
agent/providers/omni/api_handler.py,sha256=7CpD43lYAqTyNKWfrD8XcM9ekbajqKCTH9p0TWtEQyg,1163
|
|
44
43
|
agent/providers/omni/clients/anthropic.py,sha256=nC_lj3UwrLqx9TIew58yxLqKwrH1_LwJD6EqVSEfp3g,3670
|
|
45
44
|
agent/providers/omni/clients/base.py,sha256=6lN86XKZT3cgBT9EQdz2akKoqbIvc-NXXIOkYKwXObE,946
|
|
45
|
+
agent/providers/omni/clients/oaicompat.py,sha256=KHFyOBttNUlxJdOEqrR7sS0S-S0LjMz7EVbTIkXyD_Y,7241
|
|
46
46
|
agent/providers/omni/clients/ollama.py,sha256=PmR5EhU9Mi43_o5mZN36XcpiGKp5HbQwlXpiRF9gO3I,4174
|
|
47
47
|
agent/providers/omni/clients/openai.py,sha256=iTSYWEJEM8INFPGJMiUVs8rFn0781XF_ofRkd7NT3gk,5920
|
|
48
48
|
agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
|
|
49
49
|
agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
|
|
50
|
-
agent/providers/omni/loop.py,sha256=
|
|
50
|
+
agent/providers/omni/loop.py,sha256=h9c-Ie4MA84H3XKYiAKA6J4Tec3_ACYxmU--eRuiS8A,39591
|
|
51
51
|
agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
|
|
52
52
|
agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
|
|
53
53
|
agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
|
|
54
54
|
agent/providers/omni/tools/base.py,sha256=HiQ8dp9NbFGlGopbE1wxo0ZbujA7bzCGjCg4tl2lnPE,824
|
|
55
55
|
agent/providers/omni/tools/bash.py,sha256=wocYvWwoaVjHba19CVqc3bvwj8_1qwqYjNaPBbMRlWA,2241
|
|
56
56
|
agent/providers/omni/tools/computer.py,sha256=cB5PrhPmk6acKSENIvzw4rdpjeWx4HQHfSxBLGHzGRE,6964
|
|
57
|
-
agent/providers/omni/tools/manager.py,sha256=
|
|
58
|
-
agent/providers/omni/types.py,sha256=dKKucLXv9m2SjM5DpCYeOMWUA4NUU-6JsoeXb6DMfgU,1122
|
|
57
|
+
agent/providers/omni/tools/manager.py,sha256=UhtasaxGcmkxtz-bP1UJ1a4xdYnD3Cv8PbtB0n2QCDg,2101
|
|
59
58
|
agent/providers/omni/utils.py,sha256=Ikp6ONL1HO637o3KDtv5yv6q-4uIWAzMSQDvGetWXC8,8724
|
|
60
59
|
agent/providers/openai/__init__.py,sha256=8DS6YNZp42NLCacwXsfRaghyczaOCVovX8TgzXUZf_o,165
|
|
61
60
|
agent/providers/openai/api_handler.py,sha256=L1K56dR1j4JsX1sX4OFYeKoCUMM25Fwj2y9nqv8oOhw,17736
|
|
@@ -68,7 +67,10 @@ agent/providers/openai/tools/manager.py,sha256=-wM641dLf8vcv6QF9x_ViGJeDl2YTuUV9
|
|
|
68
67
|
agent/providers/openai/types.py,sha256=0mFUxeFy23fJhMwc6lAFVXKngg2fJIXkPS5oV284V1M,898
|
|
69
68
|
agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgPdc,3175
|
|
70
69
|
agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
cua_agent-0.1.
|
|
70
|
+
agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
|
|
71
|
+
agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
|
|
72
|
+
agent/ui/gradio/app.py,sha256=6n0c_3HBb6ZeN213izyurL8oML1peet1cI8fx82DLZg,33980
|
|
73
|
+
cua_agent-0.1.24.dist-info/METADATA,sha256=QMXWyaIbPsTaBvKIbHgknaT2nx9I32zqva7fvGa1BhM,9841
|
|
74
|
+
cua_agent-0.1.24.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
75
|
+
cua_agent-0.1.24.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
76
|
+
cua_agent-0.1.24.dist-info/RECORD,,
|
agent/core/README.md
DELETED
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
# Unified ComputerAgent
|
|
2
|
-
|
|
3
|
-
The `ComputerAgent` class provides a unified implementation that consolidates the previously separate agent implementations (AnthropicComputerAgent and OmniComputerAgent) into a single, configurable class.
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
- **Multiple Loop Types**: Switch between different agentic loop implementations using the `loop_type` parameter (Anthropic or Omni).
|
|
8
|
-
- **Provider Support**: Use different AI providers (OpenAI, Anthropic, etc.) with the appropriate loop.
|
|
9
|
-
- **Trajectory Saving**: Control whether to save screenshots and logs with the `save_trajectory` parameter.
|
|
10
|
-
- **Consistent Interface**: Maintains a consistent interface regardless of the underlying loop implementation.
|
|
11
|
-
|
|
12
|
-
## API Key Requirements
|
|
13
|
-
|
|
14
|
-
To use the ComputerAgent, you'll need API keys for the providers you want to use:
|
|
15
|
-
|
|
16
|
-
- For **OpenAI**: Set the `OPENAI_API_KEY` environment variable or pass it directly as `api_key`.
|
|
17
|
-
- For **Anthropic**: Set the `ANTHROPIC_API_KEY` environment variable or pass it directly as `api_key`.
|
|
18
|
-
- For **Groq**: Set the `GROQ_API_KEY` environment variable or pass it directly as `api_key`.
|
|
19
|
-
|
|
20
|
-
You can set environment variables in several ways:
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
# In your terminal before running the code
|
|
24
|
-
export OPENAI_API_KEY=your_api_key_here
|
|
25
|
-
|
|
26
|
-
# Or in a .env file
|
|
27
|
-
OPENAI_API_KEY=your_api_key_here
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
## Usage
|
|
31
|
-
|
|
32
|
-
Here's how to use the unified ComputerAgent:
|
|
33
|
-
|
|
34
|
-
```python
|
|
35
|
-
from agent.core.agent import ComputerAgent
|
|
36
|
-
from agent.types.base import AgenticLoop
|
|
37
|
-
from agent.providers.omni.types import LLMProvider
|
|
38
|
-
from computer import Computer
|
|
39
|
-
|
|
40
|
-
# Create a Computer instance
|
|
41
|
-
computer = Computer()
|
|
42
|
-
|
|
43
|
-
# Create an agent with the OMNI loop and OpenAI provider
|
|
44
|
-
agent = ComputerAgent(
|
|
45
|
-
computer=computer,
|
|
46
|
-
loop_type=AgenticLoop.OMNI,
|
|
47
|
-
provider=LLMProvider.OPENAI,
|
|
48
|
-
model="gpt-4o",
|
|
49
|
-
api_key="your_api_key_here", # Can also use OPENAI_API_KEY environment variable
|
|
50
|
-
save_trajectory=True,
|
|
51
|
-
only_n_most_recent_images=5
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
# Create an agent with the ANTHROPIC loop
|
|
55
|
-
agent = ComputerAgent(
|
|
56
|
-
computer=computer,
|
|
57
|
-
loop_type=AgenticLoop.ANTHROPIC,
|
|
58
|
-
model="claude-3-7-sonnet-20250219",
|
|
59
|
-
api_key="your_api_key_here", # Can also use ANTHROPIC_API_KEY environment variable
|
|
60
|
-
save_trajectory=True,
|
|
61
|
-
only_n_most_recent_images=5
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Use the agent
|
|
65
|
-
async with agent:
|
|
66
|
-
async for result in agent.run("Your task description here"):
|
|
67
|
-
# Process the result
|
|
68
|
-
title = result["metadata"].get("title", "Screen Analysis")
|
|
69
|
-
content = result["content"]
|
|
70
|
-
print(f"\n{title}")
|
|
71
|
-
print(content)
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
## Parameters
|
|
75
|
-
|
|
76
|
-
- `computer`: Computer instance to control
|
|
77
|
-
- `loop_type`: The type of loop to use (AgenticLoop.ANTHROPIC or AgenticLoop.OMNI)
|
|
78
|
-
- `provider`: AI provider to use (required for Omni loop)
|
|
79
|
-
- `api_key`: Optional API key (will use environment variable if not provided)
|
|
80
|
-
- `model`: Optional model name (will use provider default if not specified)
|
|
81
|
-
- `save_trajectory`: Whether to save screenshots and logs
|
|
82
|
-
- `only_n_most_recent_images`: Only keep N most recent images
|
|
83
|
-
- `max_retries`: Maximum number of retry attempts
|
|
84
|
-
|
|
85
|
-
## Directory Structure
|
|
86
|
-
|
|
87
|
-
When `save_trajectory` is enabled, the agent will create the following directory structure:
|
|
88
|
-
|
|
89
|
-
```
|
|
90
|
-
experiments/
|
|
91
|
-
├── screenshots/ # Screenshots captured during agent execution
|
|
92
|
-
└── logs/ # API call logs and other logging information
|
|
93
|
-
```
|
|
94
|
-
|
|
95
|
-
## Extending with New Loop Types
|
|
96
|
-
|
|
97
|
-
To add a new loop type:
|
|
98
|
-
|
|
99
|
-
1. Implement a new loop class
|
|
100
|
-
2. Add a new value to the `AgenticLoop` enum
|
|
101
|
-
3. Update the `_initialize_loop` method in `ComputerAgent` to handle the new loop type
|
agent/providers/omni/types.py
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"""Type definitions for the Omni provider."""
|
|
2
|
-
|
|
3
|
-
from enum import StrEnum
|
|
4
|
-
from typing import Dict, Optional
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class LLMProvider(StrEnum):
|
|
9
|
-
"""Supported LLM providers."""
|
|
10
|
-
|
|
11
|
-
ANTHROPIC = "anthropic"
|
|
12
|
-
OMNI = "omni"
|
|
13
|
-
OPENAI = "openai"
|
|
14
|
-
OLLAMA = "ollama"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@dataclass
|
|
18
|
-
class LLM:
|
|
19
|
-
"""Configuration for LLM model and provider."""
|
|
20
|
-
|
|
21
|
-
provider: LLMProvider
|
|
22
|
-
name: Optional[str] = None
|
|
23
|
-
|
|
24
|
-
def __post_init__(self):
|
|
25
|
-
"""Set default model name if not provided."""
|
|
26
|
-
if self.name is None:
|
|
27
|
-
self.name = PROVIDER_TO_DEFAULT_MODEL.get(self.provider)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
# For backward compatibility
|
|
31
|
-
LLMModel = LLM
|
|
32
|
-
Model = LLM
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# Default models for each provider
|
|
36
|
-
PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
|
|
37
|
-
LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
|
|
38
|
-
LLMProvider.OPENAI: "gpt-4o",
|
|
39
|
-
LLMProvider.OLLAMA: "gemma3:4b-it-q4_K_M",
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
# Environment variable names for each provider
|
|
43
|
-
PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
|
|
44
|
-
LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
|
|
45
|
-
LLMProvider.OPENAI: "OPENAI_API_KEY",
|
|
46
|
-
LLMProvider.OLLAMA: "none",
|
|
47
|
-
}
|
|
File without changes
|
|
File without changes
|