cua-agent 0.1.6__py3-none-any.whl → 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +3 -2
- agent/core/__init__.py +1 -6
- agent/core/{computer_agent.py → agent.py} +31 -76
- agent/core/{loop.py → base.py} +68 -127
- agent/core/factory.py +104 -0
- agent/core/messages.py +279 -125
- agent/core/provider_config.py +15 -0
- agent/core/types.py +45 -0
- agent/core/visualization.py +197 -0
- agent/providers/anthropic/api/client.py +142 -1
- agent/providers/anthropic/api_handler.py +140 -0
- agent/providers/anthropic/callbacks/__init__.py +5 -0
- agent/providers/anthropic/loop.py +207 -221
- agent/providers/anthropic/response_handler.py +226 -0
- agent/providers/anthropic/tools/bash.py +0 -97
- agent/providers/anthropic/utils.py +368 -0
- agent/providers/omni/__init__.py +1 -20
- agent/providers/omni/api_handler.py +42 -0
- agent/providers/omni/clients/anthropic.py +4 -0
- agent/providers/omni/image_utils.py +0 -72
- agent/providers/omni/loop.py +491 -607
- agent/providers/omni/parser.py +58 -4
- agent/providers/omni/tools/__init__.py +25 -7
- agent/providers/omni/tools/base.py +29 -0
- agent/providers/omni/tools/bash.py +43 -38
- agent/providers/omni/tools/computer.py +144 -182
- agent/providers/omni/tools/manager.py +25 -45
- agent/providers/omni/types.py +1 -3
- agent/providers/omni/utils.py +224 -145
- agent/providers/openai/__init__.py +6 -0
- agent/providers/openai/api_handler.py +453 -0
- agent/providers/openai/loop.py +440 -0
- agent/providers/openai/response_handler.py +205 -0
- agent/providers/openai/tools/__init__.py +15 -0
- agent/providers/openai/tools/base.py +79 -0
- agent/providers/openai/tools/computer.py +319 -0
- agent/providers/openai/tools/manager.py +106 -0
- agent/providers/openai/types.py +36 -0
- agent/providers/openai/utils.py +98 -0
- cua_agent-0.1.18.dist-info/METADATA +165 -0
- cua_agent-0.1.18.dist-info/RECORD +73 -0
- agent/README.md +0 -63
- agent/providers/anthropic/messages/manager.py +0 -112
- agent/providers/omni/callbacks.py +0 -78
- agent/providers/omni/clients/groq.py +0 -101
- agent/providers/omni/experiment.py +0 -276
- agent/providers/omni/messages.py +0 -171
- agent/providers/omni/tool_manager.py +0 -91
- agent/providers/omni/visualization.py +0 -130
- agent/types/__init__.py +0 -23
- agent/types/base.py +0 -41
- agent/types/messages.py +0 -36
- cua_agent-0.1.6.dist-info/METADATA +0 -120
- cua_agent-0.1.6.dist-info/RECORD +0 -64
- /agent/{types → core}/tools.py +0 -0
- {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: cua-agent
|
|
3
|
+
Version: 0.1.18
|
|
4
|
+
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
|
+
Author-Email: TryCua <gh@trycua.com>
|
|
6
|
+
Requires-Python: <3.13,>=3.10
|
|
7
|
+
Requires-Dist: httpx<0.29.0,>=0.27.0
|
|
8
|
+
Requires-Dist: aiohttp<4.0.0,>=3.9.3
|
|
9
|
+
Requires-Dist: asyncio
|
|
10
|
+
Requires-Dist: anyio<5.0.0,>=4.4.1
|
|
11
|
+
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
12
|
+
Requires-Dist: pydantic<3.0.0,>=2.6.4
|
|
13
|
+
Requires-Dist: rich<14.0.0,>=13.7.1
|
|
14
|
+
Requires-Dist: python-dotenv<2.0.0,>=1.0.1
|
|
15
|
+
Requires-Dist: cua-computer<0.2.0,>=0.1.0
|
|
16
|
+
Requires-Dist: cua-core<0.2.0,>=0.1.0
|
|
17
|
+
Requires-Dist: certifi>=2024.2.2
|
|
18
|
+
Provides-Extra: anthropic
|
|
19
|
+
Requires-Dist: anthropic>=0.49.0; extra == "anthropic"
|
|
20
|
+
Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
|
|
21
|
+
Provides-Extra: openai
|
|
22
|
+
Requires-Dist: openai<2.0.0,>=1.14.0; extra == "openai"
|
|
23
|
+
Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "openai"
|
|
24
|
+
Provides-Extra: som
|
|
25
|
+
Requires-Dist: torch>=2.2.1; extra == "som"
|
|
26
|
+
Requires-Dist: torchvision>=0.17.1; extra == "som"
|
|
27
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "som"
|
|
28
|
+
Requires-Dist: transformers>=4.38.2; extra == "som"
|
|
29
|
+
Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "som"
|
|
30
|
+
Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "som"
|
|
31
|
+
Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "som"
|
|
32
|
+
Requires-Dist: openai<2.0.0,>=1.14.0; extra == "som"
|
|
33
|
+
Requires-Dist: groq<0.5.0,>=0.4.0; extra == "som"
|
|
34
|
+
Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "som"
|
|
35
|
+
Requires-Dist: requests<3.0.0,>=2.31.0; extra == "som"
|
|
36
|
+
Provides-Extra: all
|
|
37
|
+
Requires-Dist: torch>=2.2.1; extra == "all"
|
|
38
|
+
Requires-Dist: torchvision>=0.17.1; extra == "all"
|
|
39
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "all"
|
|
40
|
+
Requires-Dist: transformers>=4.38.2; extra == "all"
|
|
41
|
+
Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
|
|
42
|
+
Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "all"
|
|
43
|
+
Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "all"
|
|
44
|
+
Requires-Dist: openai<2.0.0,>=1.14.0; extra == "all"
|
|
45
|
+
Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
|
|
46
|
+
Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
|
|
47
|
+
Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
<div align="center">
|
|
51
|
+
<h1>
|
|
52
|
+
<div class="image-wrapper" style="display: inline-block;">
|
|
53
|
+
<picture>
|
|
54
|
+
<source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
|
|
55
|
+
<source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
|
|
56
|
+
<img alt="Shows my svg">
|
|
57
|
+
</picture>
|
|
58
|
+
</div>
|
|
59
|
+
|
|
60
|
+
[](#)
|
|
61
|
+
[](#)
|
|
62
|
+
[](https://discord.com/invite/mVnXXpdE85)
|
|
63
|
+
[](https://pypi.org/project/cua-computer/)
|
|
64
|
+
</h1>
|
|
65
|
+
</div>
|
|
66
|
+
|
|
67
|
+
**cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with Cua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen).
|
|
68
|
+
|
|
69
|
+
### Get started with Agent
|
|
70
|
+
|
|
71
|
+
<div align="center">
|
|
72
|
+
<img src="../../img/agent.png"/>
|
|
73
|
+
</div>
|
|
74
|
+
|
|
75
|
+
## Install
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install "cua-agent[all]"
|
|
79
|
+
|
|
80
|
+
# or install specific loop providers
|
|
81
|
+
pip install "cua-agent[openai]" # OpenAI Cua Loop
|
|
82
|
+
pip install "cua-agent[anthropic]" # Anthropic Cua Loop
|
|
83
|
+
pip install "cua-agent[omni]" # Cua Loop based on OmniParser
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Run
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
async with Computer() as macos_computer:
|
|
90
|
+
# Create agent with loop and provider
|
|
91
|
+
agent = ComputerAgent(
|
|
92
|
+
computer=macos_computer,
|
|
93
|
+
loop=AgentLoop.OPENAI,
|
|
94
|
+
model=LLM(provider=LLMProvider.OPENAI)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
tasks = [
|
|
98
|
+
"Look for a repository named trycua/cua on GitHub.",
|
|
99
|
+
"Check the open issues, open the most recent one and read it.",
|
|
100
|
+
"Clone the repository in users/lume/projects if it doesn't exist yet.",
|
|
101
|
+
"Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
|
|
102
|
+
"From Cursor, open Composer if not already open.",
|
|
103
|
+
"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
for i, task in enumerate(tasks):
|
|
107
|
+
print(f"\nExecuting task {i}/{len(tasks)}: {task}")
|
|
108
|
+
async for result in agent.run(task):
|
|
109
|
+
print(result)
|
|
110
|
+
|
|
111
|
+
print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
|
|
115
|
+
|
|
116
|
+
- [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
|
|
117
|
+
|
|
118
|
+
## Agent Loops
|
|
119
|
+
|
|
120
|
+
The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
|
|
121
|
+
|
|
122
|
+
| Agent Loop | Supported Models | Description | Set-Of-Marks |
|
|
123
|
+
|:-----------|:-----------------|:------------|:-------------|
|
|
124
|
+
| `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
|
|
125
|
+
| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
|
|
126
|
+
| `AgentLoop.OMNI` <br>(preview) | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `gpt-3.5-turbo` | Use OmniParser for element pixel-detection (SoM) and any VLMs | OmniParser |
|
|
127
|
+
|
|
128
|
+
## AgentResponse
|
|
129
|
+
The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
async for result in agent.run(task):
|
|
133
|
+
print("Response ID: ", result.get("id"))
|
|
134
|
+
|
|
135
|
+
# Print detailed usage information
|
|
136
|
+
usage = result.get("usage")
|
|
137
|
+
if usage:
|
|
138
|
+
print("\nUsage Details:")
|
|
139
|
+
print(f" Input Tokens: {usage.get('input_tokens')}")
|
|
140
|
+
if "input_tokens_details" in usage:
|
|
141
|
+
print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
|
|
142
|
+
print(f" Output Tokens: {usage.get('output_tokens')}")
|
|
143
|
+
if "output_tokens_details" in usage:
|
|
144
|
+
print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
|
|
145
|
+
print(f" Total Tokens: {usage.get('total_tokens')}")
|
|
146
|
+
|
|
147
|
+
print("Response Text: ", result.get("text"))
|
|
148
|
+
|
|
149
|
+
# Print tools information
|
|
150
|
+
tools = result.get("tools")
|
|
151
|
+
if tools:
|
|
152
|
+
print("\nTools:")
|
|
153
|
+
print(tools)
|
|
154
|
+
|
|
155
|
+
# Print reasoning and tool call outputs
|
|
156
|
+
outputs = result.get("output", [])
|
|
157
|
+
for output in outputs:
|
|
158
|
+
output_type = output.get("type")
|
|
159
|
+
if output_type == "reasoning":
|
|
160
|
+
print("\nReasoning Output:")
|
|
161
|
+
print(output)
|
|
162
|
+
elif output_type == "computer_call":
|
|
163
|
+
print("\nTool Call Output:")
|
|
164
|
+
print(output)
|
|
165
|
+
```
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
agent/__init__.py,sha256=ZOK-dcYN3o9CQ9XnUipkDR7YKQNNZ3HL_7MJbqHY_-c,1494
|
|
2
|
+
agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
|
|
3
|
+
agent/core/__init__.py,sha256=7DhJ_6KKooM6uTmDIlumCnd7OFcU67BYIIR1dpIYUB0,506
|
|
4
|
+
agent/core/agent.py,sha256=WutrveYndS-YBy9BVu1I-Eyj-7AAk1CgACkn5OdWvhU,8010
|
|
5
|
+
agent/core/base.py,sha256=EoutyMJ2kSJ72Di8KVRiUXc0ZJ1OkA0e7Ej14Y3F87w,7124
|
|
6
|
+
agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
|
|
7
|
+
agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
|
|
8
|
+
agent/core/factory.py,sha256=sy7k323ZPDf8T7UEKNSV9wT2OylckliaPr8KyYhMAi0,3829
|
|
9
|
+
agent/core/messages.py,sha256=-OVMDqcxK5MUHPEkHliK29XFJYMRAc1keFvzrUyrOmM,16231
|
|
10
|
+
agent/core/provider_config.py,sha256=GndsvFdFQHNlxOSE6XcxRAZcyOJmLQ_jSOd66QY0uyk,414
|
|
11
|
+
agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
|
|
12
|
+
agent/core/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
|
|
13
|
+
agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
|
|
14
|
+
agent/core/tools/base.py,sha256=CdzRFNuOjNfzgyTUN4ZoCGkUDR5HI0ECQVpvrUdEij8,2295
|
|
15
|
+
agent/core/tools/bash.py,sha256=jnJKVlHn8np8e0gWd8EO0_qqjMkfQzutSugA_Iol4jE,1585
|
|
16
|
+
agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABSQ,1338
|
|
17
|
+
agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
|
|
18
|
+
agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
|
|
19
|
+
agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
|
|
20
|
+
agent/core/types.py,sha256=fSOrbGVjidfrTA1XEy7k5uAcq3Ir1sMNXojs8AS0B1Y,1202
|
|
21
|
+
agent/core/visualization.py,sha256=1DuFF5sSeSf5BRSevBMDxml9-ajl7BQLFm5KBUwMbI8,6573
|
|
22
|
+
agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
|
|
23
|
+
agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
|
|
24
|
+
agent/providers/anthropic/api/client.py,sha256=IVCntHAlkHFHPb6h4cEpb4wsBESy0wC6IPG8_Yydtqw,13258
|
|
25
|
+
agent/providers/anthropic/api/logging.py,sha256=vHpwkIyOZdkSTVIH4ycbBPd4a_rzhP7Osu1I-Ayouwc,5154
|
|
26
|
+
agent/providers/anthropic/api_handler.py,sha256=pWXcqDs0ruviDhRNRrz5Ac9ZH4yDv6ZlwpeG3a42cDg,5206
|
|
27
|
+
agent/providers/anthropic/callbacks/__init__.py,sha256=PciBb6Z6MKSwfXqDjU3pV_0FS4MOn_Np_A7_skD-6dA,104
|
|
28
|
+
agent/providers/anthropic/callbacks/manager.py,sha256=dRKN7MuBze2dLal0iHDxCKYqMdh_KShSphuwn7zC-c4,1878
|
|
29
|
+
agent/providers/anthropic/loop.py,sha256=jW2PwJ8EBzfFKyD-cy7hKRsz1ZWyw8m-xHLUojsP4qE,20226
|
|
30
|
+
agent/providers/anthropic/prompts.py,sha256=nHFfgPrfvnWrEdVP7EUBGUHAI85D2X9HeZirk9EwncU,1941
|
|
31
|
+
agent/providers/anthropic/response_handler.py,sha256=ZTprV4NTP9Eb9jQ7QgEKZBX0L6rMj5nqBRiE3Zfws8I,8008
|
|
32
|
+
agent/providers/anthropic/tools/__init__.py,sha256=JyZwuVtPUnZwRSZBSCdQv9yxbLCsygm3l8Ywjjt9qTQ,661
|
|
33
|
+
agent/providers/anthropic/tools/base.py,sha256=WnRDbqO25tQzLpS2RU2ZXTLF5wd5IqU7SiyRAglQat4,2752
|
|
34
|
+
agent/providers/anthropic/tools/bash.py,sha256=QODuFjWuHM4GgGTqK2HizSyYqGqQwX70AdwrFiGSp2Q,2218
|
|
35
|
+
agent/providers/anthropic/tools/collection.py,sha256=RBK_6hxfHExR-EOxadiLl0OznmFj07nyIUjFgaYZ6Eo,960
|
|
36
|
+
agent/providers/anthropic/tools/computer.py,sha256=vYni1jDOOgzSSBOJxHcEKxvKUYRp5_nQ-9dmpGdLwm4,25858
|
|
37
|
+
agent/providers/anthropic/tools/edit.py,sha256=EGRP61MDA4Oue1D7Q-_vLpd6LdGbdBA1Z4HSZ66DbmI,13465
|
|
38
|
+
agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2PipFEH-PUXpoY,2020
|
|
39
|
+
agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
|
|
40
|
+
agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
|
|
41
|
+
agent/providers/anthropic/utils.py,sha256=qDp0bFGQhK1dG9U461iaeCiyoVUsksXmD43g9cedRW8,14367
|
|
42
|
+
agent/providers/omni/__init__.py,sha256=59Eqpr3Nc3EE61VirUkfecAnQuGELdg0t44q5tg3SW8,172
|
|
43
|
+
agent/providers/omni/api_handler.py,sha256=7CpD43lYAqTyNKWfrD8XcM9ekbajqKCTH9p0TWtEQyg,1163
|
|
44
|
+
agent/providers/omni/clients/anthropic.py,sha256=nC_lj3UwrLqx9TIew58yxLqKwrH1_LwJD6EqVSEfp3g,3670
|
|
45
|
+
agent/providers/omni/clients/base.py,sha256=zAAgPi0jl3SWPC730R9l79E8bfYPSo39UtCSE-mrK6I,1076
|
|
46
|
+
agent/providers/omni/clients/openai.py,sha256=E4TAXMUFoYTunJETCWCNx5XAc6xutiN4rB6PlVpzC5s,5972
|
|
47
|
+
agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
|
|
48
|
+
agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
|
|
49
|
+
agent/providers/omni/loop.py,sha256=6521KV1hBdxnovzT0wfUcYiRCe7agx2TCC0hgQA3TRw,37459
|
|
50
|
+
agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
|
|
51
|
+
agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
|
|
52
|
+
agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
|
|
53
|
+
agent/providers/omni/tools/base.py,sha256=HiQ8dp9NbFGlGopbE1wxo0ZbujA7bzCGjCg4tl2lnPE,824
|
|
54
|
+
agent/providers/omni/tools/bash.py,sha256=wocYvWwoaVjHba19CVqc3bvwj8_1qwqYjNaPBbMRlWA,2241
|
|
55
|
+
agent/providers/omni/tools/computer.py,sha256=cB5PrhPmk6acKSENIvzw4rdpjeWx4HQHfSxBLGHzGRE,6964
|
|
56
|
+
agent/providers/omni/tools/manager.py,sha256=GVLudHNpOQnl6aA_IOvqAEMDoKW62ozMZuwst6Z1Hco,2094
|
|
57
|
+
agent/providers/omni/types.py,sha256=EoEiqtW98R2ZlhZb1-II4t1Ctf7qCOG6rUn_uQUHNdM,1021
|
|
58
|
+
agent/providers/omni/utils.py,sha256=Ikp6ONL1HO637o3KDtv5yv6q-4uIWAzMSQDvGetWXC8,8724
|
|
59
|
+
agent/providers/openai/__init__.py,sha256=8DS6YNZp42NLCacwXsfRaghyczaOCVovX8TgzXUZf_o,165
|
|
60
|
+
agent/providers/openai/api_handler.py,sha256=zIb9JtfTT9TI4Gd7iyjz5m_MCc9sP5mNXLZQHAr0Rw0,17647
|
|
61
|
+
agent/providers/openai/loop.py,sha256=3gruChABTE6IMQemQPwiER7xk6cmjuHuFssa0KUipAk,18782
|
|
62
|
+
agent/providers/openai/response_handler.py,sha256=K8v_92uSr9R74Y5INY4naeEZZZm35CLIl4h74MBZhsw,7953
|
|
63
|
+
agent/providers/openai/tools/__init__.py,sha256=-KbHMWcd2OVTk5RYQ3ACBEMygwbH-VW6n_98p0lwM4A,344
|
|
64
|
+
agent/providers/openai/tools/base.py,sha256=Np_BC9Cm6TslK99etE9hVTtsBlcEaGhoNCK3NXdB_Lw,2474
|
|
65
|
+
agent/providers/openai/tools/computer.py,sha256=51uRgCXbeqlnKotlnA0-iHkXHHJkqede2GUZfwtGxx4,12271
|
|
66
|
+
agent/providers/openai/tools/manager.py,sha256=-wM641dLf8vcv6QF9x_ViGJeDl2YTuUV93j6u7GBI18,3903
|
|
67
|
+
agent/providers/openai/types.py,sha256=0mFUxeFy23fJhMwc6lAFVXKngg2fJIXkPS5oV284V1M,898
|
|
68
|
+
agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgPdc,3175
|
|
69
|
+
agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
|
|
70
|
+
cua_agent-0.1.18.dist-info/METADATA,sha256=t09Oyz6kHe2fgkGGLwpb2Cyk27I6TRXiMoCL8qQBgD0,7019
|
|
71
|
+
cua_agent-0.1.18.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
72
|
+
cua_agent-0.1.18.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
73
|
+
cua_agent-0.1.18.dist-info/RECORD,,
|
agent/README.md
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
# Agent Package Structure
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
The agent package provides a modular and extensible framework for AI-powered computer agents.
|
|
5
|
-
|
|
6
|
-
## Directory Structure
|
|
7
|
-
```
|
|
8
|
-
agent/
|
|
9
|
-
├── __init__.py # Package exports
|
|
10
|
-
├── core/ # Core functionality
|
|
11
|
-
│ ├── __init__.py
|
|
12
|
-
│ ├── computer_agent.py # Main entry point
|
|
13
|
-
│ └── factory.py # Provider factory
|
|
14
|
-
├── base/ # Base implementations
|
|
15
|
-
│ ├── __init__.py
|
|
16
|
-
│ ├── agent.py # Base agent class
|
|
17
|
-
│ ├── core/ # Core components
|
|
18
|
-
│ │ ├── callbacks.py
|
|
19
|
-
│ │ ├── loop.py
|
|
20
|
-
│ │ └── messages.py
|
|
21
|
-
│ └── tools/ # Tool implementations
|
|
22
|
-
├── providers/ # Provider implementations
|
|
23
|
-
│ ├── __init__.py
|
|
24
|
-
│ ├── anthropic/ # Anthropic provider
|
|
25
|
-
│ │ ├── agent.py
|
|
26
|
-
│ │ ├── loop.py
|
|
27
|
-
│ │ └── tool_manager.py
|
|
28
|
-
│ └── omni/ # Omni provider
|
|
29
|
-
│ ├── agent.py
|
|
30
|
-
│ ├── loop.py
|
|
31
|
-
│ └── tool_manager.py
|
|
32
|
-
└── types/ # Type definitions
|
|
33
|
-
├── __init__.py
|
|
34
|
-
├── base.py # Core types
|
|
35
|
-
├── messages.py # Message types
|
|
36
|
-
├── tools.py # Tool types
|
|
37
|
-
└── providers/ # Provider-specific types
|
|
38
|
-
├── anthropic.py
|
|
39
|
-
└── omni.py
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
## Key Components
|
|
43
|
-
|
|
44
|
-
### Core
|
|
45
|
-
- `computer_agent.py`: Main entry point for creating and using agents
|
|
46
|
-
- `factory.py`: Factory for creating provider-specific implementations
|
|
47
|
-
|
|
48
|
-
### Base
|
|
49
|
-
- `agent.py`: Base agent implementation with shared functionality
|
|
50
|
-
- `core/`: Core components used across providers
|
|
51
|
-
- `tools/`: Shared tool implementations
|
|
52
|
-
|
|
53
|
-
### Providers
|
|
54
|
-
Each provider follows the same structure:
|
|
55
|
-
- `agent.py`: Provider-specific agent implementation
|
|
56
|
-
- `loop.py`: Provider-specific message loop
|
|
57
|
-
- `tool_manager.py`: Tool management for provider
|
|
58
|
-
|
|
59
|
-
### Types
|
|
60
|
-
- `base.py`: Core type definitions
|
|
61
|
-
- `messages.py`: Message-related types
|
|
62
|
-
- `tools.py`: Tool-related types
|
|
63
|
-
- `providers/`: Provider-specific type definitions
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from typing import cast
|
|
3
|
-
from anthropic.types.beta import (
|
|
4
|
-
BetaMessageParam,
|
|
5
|
-
BetaCacheControlEphemeralParam,
|
|
6
|
-
BetaToolResultBlockParam,
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@dataclass
|
|
11
|
-
class ImageRetentionConfig:
|
|
12
|
-
"""Configuration for image retention in messages."""
|
|
13
|
-
|
|
14
|
-
num_images_to_keep: int | None = None
|
|
15
|
-
min_removal_threshold: int = 1
|
|
16
|
-
enable_caching: bool = True
|
|
17
|
-
|
|
18
|
-
def should_retain_images(self) -> bool:
|
|
19
|
-
"""Check if image retention is enabled."""
|
|
20
|
-
return self.num_images_to_keep is not None and self.num_images_to_keep > 0
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class MessageManager:
|
|
24
|
-
"""Manages message preparation, including image retention and caching."""
|
|
25
|
-
|
|
26
|
-
def __init__(self, image_retention_config: ImageRetentionConfig):
|
|
27
|
-
"""Initialize the message manager.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
image_retention_config: Configuration for image retention
|
|
31
|
-
"""
|
|
32
|
-
if image_retention_config.min_removal_threshold < 1:
|
|
33
|
-
raise ValueError("min_removal_threshold must be at least 1")
|
|
34
|
-
self.image_retention_config = image_retention_config
|
|
35
|
-
|
|
36
|
-
def prepare_messages(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
|
|
37
|
-
"""Prepare messages by applying image retention and caching as configured."""
|
|
38
|
-
if self.image_retention_config.should_retain_images():
|
|
39
|
-
self._filter_images(messages)
|
|
40
|
-
if self.image_retention_config.enable_caching:
|
|
41
|
-
self._inject_caching(messages)
|
|
42
|
-
return messages
|
|
43
|
-
|
|
44
|
-
def _filter_images(self, messages: list[BetaMessageParam]) -> None:
|
|
45
|
-
"""Filter messages to retain only the specified number of most recent images."""
|
|
46
|
-
tool_result_blocks = cast(
|
|
47
|
-
list[BetaToolResultBlockParam],
|
|
48
|
-
[
|
|
49
|
-
item
|
|
50
|
-
for message in messages
|
|
51
|
-
for item in (message["content"] if isinstance(message["content"], list) else [])
|
|
52
|
-
if isinstance(item, dict) and item.get("type") == "tool_result"
|
|
53
|
-
],
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
total_images = sum(
|
|
57
|
-
1
|
|
58
|
-
for tool_result in tool_result_blocks
|
|
59
|
-
for content in tool_result.get("content", [])
|
|
60
|
-
if isinstance(content, dict) and content.get("type") == "image"
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
images_to_remove = total_images - (self.image_retention_config.num_images_to_keep or 0)
|
|
64
|
-
# Round down to nearest min_removal_threshold for better cache behavior
|
|
65
|
-
images_to_remove -= images_to_remove % self.image_retention_config.min_removal_threshold
|
|
66
|
-
|
|
67
|
-
# Remove oldest images first
|
|
68
|
-
for tool_result in tool_result_blocks:
|
|
69
|
-
if isinstance(tool_result.get("content"), list):
|
|
70
|
-
new_content = []
|
|
71
|
-
for content in tool_result.get("content", []):
|
|
72
|
-
if isinstance(content, dict) and content.get("type") == "image":
|
|
73
|
-
if images_to_remove > 0:
|
|
74
|
-
images_to_remove -= 1
|
|
75
|
-
continue
|
|
76
|
-
new_content.append(content)
|
|
77
|
-
tool_result["content"] = new_content
|
|
78
|
-
|
|
79
|
-
def _inject_caching(self, messages: list[BetaMessageParam]) -> None:
|
|
80
|
-
"""Inject caching control for the most recent turns, limited to 3 blocks max to avoid API errors."""
|
|
81
|
-
# Anthropic API allows a maximum of 4 blocks with cache_control
|
|
82
|
-
# We use 3 here to be safe, as the system block may also have cache_control
|
|
83
|
-
blocks_with_cache_control = 0
|
|
84
|
-
max_cache_control_blocks = 3
|
|
85
|
-
|
|
86
|
-
for message in reversed(messages):
|
|
87
|
-
if message["role"] == "user" and isinstance(content := message["content"], list):
|
|
88
|
-
# Only add cache control to the latest message in each turn
|
|
89
|
-
if blocks_with_cache_control < max_cache_control_blocks:
|
|
90
|
-
blocks_with_cache_control += 1
|
|
91
|
-
# Add cache control to the last content block only
|
|
92
|
-
if content and len(content) > 0:
|
|
93
|
-
content[-1]["cache_control"] = BetaCacheControlEphemeralParam(
|
|
94
|
-
type="ephemeral"
|
|
95
|
-
)
|
|
96
|
-
else:
|
|
97
|
-
# Remove any existing cache control
|
|
98
|
-
if content and len(content) > 0:
|
|
99
|
-
content[-1].pop("cache_control", None)
|
|
100
|
-
|
|
101
|
-
# Ensure we're not exceeding the limit by checking the total
|
|
102
|
-
if blocks_with_cache_control > max_cache_control_blocks:
|
|
103
|
-
# If we somehow exceeded the limit, remove excess cache controls
|
|
104
|
-
excess = blocks_with_cache_control - max_cache_control_blocks
|
|
105
|
-
for message in messages:
|
|
106
|
-
if excess <= 0:
|
|
107
|
-
break
|
|
108
|
-
|
|
109
|
-
if message["role"] == "user" and isinstance(content := message["content"], list):
|
|
110
|
-
if content and len(content) > 0 and "cache_control" in content[-1]:
|
|
111
|
-
content[-1].pop("cache_control", None)
|
|
112
|
-
excess -= 1
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
"""Omni callback manager implementation."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from typing import Any, Dict, Optional, Set
|
|
5
|
-
|
|
6
|
-
from ...core.callbacks import BaseCallbackManager, ContentCallback, ToolCallback, APICallback
|
|
7
|
-
from ...types.tools import ToolResult
|
|
8
|
-
|
|
9
|
-
logger = logging.getLogger(__name__)
|
|
10
|
-
|
|
11
|
-
class OmniCallbackManager(BaseCallbackManager):
|
|
12
|
-
"""Callback manager for multi-provider support."""
|
|
13
|
-
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
content_callback: ContentCallback,
|
|
17
|
-
tool_callback: ToolCallback,
|
|
18
|
-
api_callback: APICallback,
|
|
19
|
-
):
|
|
20
|
-
"""Initialize Omni callback manager.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
content_callback: Callback for content updates
|
|
24
|
-
tool_callback: Callback for tool execution results
|
|
25
|
-
api_callback: Callback for API interactions
|
|
26
|
-
"""
|
|
27
|
-
super().__init__(
|
|
28
|
-
content_callback=content_callback,
|
|
29
|
-
tool_callback=tool_callback,
|
|
30
|
-
api_callback=api_callback
|
|
31
|
-
)
|
|
32
|
-
self._active_tools: Set[str] = set()
|
|
33
|
-
|
|
34
|
-
def on_content(self, content: Any) -> None:
|
|
35
|
-
"""Handle content updates.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
content: Content update data
|
|
39
|
-
"""
|
|
40
|
-
logger.debug(f"Content update: {content}")
|
|
41
|
-
self.content_callback(content)
|
|
42
|
-
|
|
43
|
-
def on_tool_result(self, result: ToolResult, tool_id: str) -> None:
|
|
44
|
-
"""Handle tool execution results.
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
result: Tool execution result
|
|
48
|
-
tool_id: ID of the tool
|
|
49
|
-
"""
|
|
50
|
-
logger.debug(f"Tool result for {tool_id}: {result}")
|
|
51
|
-
self.tool_callback(result, tool_id)
|
|
52
|
-
|
|
53
|
-
def on_api_interaction(
|
|
54
|
-
self,
|
|
55
|
-
request: Any,
|
|
56
|
-
response: Any,
|
|
57
|
-
error: Optional[Exception] = None
|
|
58
|
-
) -> None:
|
|
59
|
-
"""Handle API interactions.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
request: API request data
|
|
63
|
-
response: API response data
|
|
64
|
-
error: Optional error that occurred
|
|
65
|
-
"""
|
|
66
|
-
if error:
|
|
67
|
-
logger.error(f"API error: {str(error)}")
|
|
68
|
-
else:
|
|
69
|
-
logger.debug(f"API interaction - Request: {request}, Response: {response}")
|
|
70
|
-
self.api_callback(request, response, error)
|
|
71
|
-
|
|
72
|
-
def get_active_tools(self) -> Set[str]:
|
|
73
|
-
"""Get currently active tools.
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
Set of active tool names
|
|
77
|
-
"""
|
|
78
|
-
return self._active_tools.copy()
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
"""Groq client implementation."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import logging
|
|
5
|
-
from typing import Dict, List, Optional, Any, Tuple
|
|
6
|
-
|
|
7
|
-
from groq import Groq
|
|
8
|
-
import re
|
|
9
|
-
from .utils import is_image_path
|
|
10
|
-
from .base import BaseOmniClient
|
|
11
|
-
|
|
12
|
-
logger = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class GroqClient(BaseOmniClient):
|
|
16
|
-
"""Client for making Groq API calls."""
|
|
17
|
-
|
|
18
|
-
def __init__(
|
|
19
|
-
self,
|
|
20
|
-
api_key: Optional[str] = None,
|
|
21
|
-
model: str = "deepseek-r1-distill-llama-70b",
|
|
22
|
-
max_tokens: int = 4096,
|
|
23
|
-
temperature: float = 0.6,
|
|
24
|
-
):
|
|
25
|
-
"""Initialize Groq client.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
api_key: Groq API key (if not provided, will try to get from env)
|
|
29
|
-
model: Model name to use
|
|
30
|
-
max_tokens: Maximum tokens to generate
|
|
31
|
-
temperature: Temperature for sampling
|
|
32
|
-
"""
|
|
33
|
-
super().__init__(api_key=api_key, model=model)
|
|
34
|
-
self.api_key = api_key or os.getenv("GROQ_API_KEY")
|
|
35
|
-
if not self.api_key:
|
|
36
|
-
raise ValueError("No Groq API key provided")
|
|
37
|
-
|
|
38
|
-
self.max_tokens = max_tokens
|
|
39
|
-
self.temperature = temperature
|
|
40
|
-
self.client = Groq(api_key=self.api_key)
|
|
41
|
-
self.model: str = model # Add explicit type annotation
|
|
42
|
-
|
|
43
|
-
def run_interleaved(
|
|
44
|
-
self, messages: List[Dict[str, Any]], system: str, max_tokens: Optional[int] = None
|
|
45
|
-
) -> tuple[str, int]:
|
|
46
|
-
"""Run interleaved chat completion.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
messages: List of message dicts
|
|
50
|
-
system: System prompt
|
|
51
|
-
max_tokens: Optional max tokens override
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
Tuple of (response text, token usage)
|
|
55
|
-
"""
|
|
56
|
-
# Avoid using system messages for R1
|
|
57
|
-
final_messages = [{"role": "user", "content": system}]
|
|
58
|
-
|
|
59
|
-
# Process messages
|
|
60
|
-
if isinstance(messages, list):
|
|
61
|
-
for item in messages:
|
|
62
|
-
if isinstance(item, dict):
|
|
63
|
-
# For dict items, concatenate all text content, ignoring images
|
|
64
|
-
text_contents = []
|
|
65
|
-
for cnt in item["content"]:
|
|
66
|
-
if isinstance(cnt, str):
|
|
67
|
-
if not is_image_path(cnt): # Skip image paths
|
|
68
|
-
text_contents.append(cnt)
|
|
69
|
-
else:
|
|
70
|
-
text_contents.append(str(cnt))
|
|
71
|
-
|
|
72
|
-
if text_contents: # Only add if there's text content
|
|
73
|
-
message = {"role": "user", "content": " ".join(text_contents)}
|
|
74
|
-
final_messages.append(message)
|
|
75
|
-
else: # str
|
|
76
|
-
message = {"role": "user", "content": item}
|
|
77
|
-
final_messages.append(message)
|
|
78
|
-
|
|
79
|
-
elif isinstance(messages, str):
|
|
80
|
-
final_messages.append({"role": "user", "content": messages})
|
|
81
|
-
|
|
82
|
-
try:
|
|
83
|
-
completion = self.client.chat.completions.create( # type: ignore
|
|
84
|
-
model=self.model,
|
|
85
|
-
messages=final_messages, # type: ignore
|
|
86
|
-
temperature=self.temperature,
|
|
87
|
-
max_tokens=max_tokens or self.max_tokens,
|
|
88
|
-
top_p=0.95,
|
|
89
|
-
stream=False,
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
response = completion.choices[0].message.content
|
|
93
|
-
final_answer = response.split("</think>\n")[-1] if "</think>" in response else response
|
|
94
|
-
final_answer = final_answer.replace("<output>", "").replace("</output>", "")
|
|
95
|
-
token_usage = completion.usage.total_tokens
|
|
96
|
-
|
|
97
|
-
return final_answer, token_usage
|
|
98
|
-
|
|
99
|
-
except Exception as e:
|
|
100
|
-
logger.error(f"Error in Groq API call: {e}")
|
|
101
|
-
raise
|