cua-agent 0.1.30__tar.gz → 0.1.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- {cua_agent-0.1.30 → cua_agent-0.1.31}/PKG-INFO +9 -2
- {cua_agent-0.1.30 → cua_agent-0.1.31}/README.md +8 -1
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/oaicompat.py +12 -2
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/computer.py +3 -7
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/clients/oaicompat.py +16 -4
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/loop.py +6 -3
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/prompts.py +5 -1
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/tools/computer.py +6 -2
- {cua_agent-0.1.30 → cua_agent-0.1.31}/pyproject.toml +3 -3
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/agent.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/base.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/callbacks.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/experiment.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/factory.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/messages.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/provider_config.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/telemetry.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/base.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/bash.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/collection.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/computer.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/edit.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/manager.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/types.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/visualization.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/api/client.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/api/logging.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/api_handler.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/callbacks/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/callbacks/manager.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/loop.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/prompts.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/response_handler.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/base.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/bash.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/collection.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/computer.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/edit.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/manager.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/run.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/types.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/utils.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/api_handler.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/anthropic.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/base.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/ollama.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/openai.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/utils.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/image_utils.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/loop.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/parser.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/prompts.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/base.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/bash.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/computer.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/manager.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/utils.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/api_handler.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/loop.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/response_handler.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/base.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/manager.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/types.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/utils.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/clients/base.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/tools/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/tools/manager.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/utils.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/telemetry.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/ui/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/ui/gradio/__init__.py +0 -0
- {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/ui/gradio/app.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.31
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -101,6 +101,7 @@ pip install "cua-agent[all]"
|
|
|
101
101
|
# or install specific loop providers
|
|
102
102
|
pip install "cua-agent[openai]" # OpenAI Cua Loop
|
|
103
103
|
pip install "cua-agent[anthropic]" # Anthropic Cua Loop
|
|
104
|
+
pip install "cua-agent[uitars]" # UI-Tars support
|
|
104
105
|
pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
|
|
105
106
|
pip install "cua-agent[ui]" # Gradio UI for the agent
|
|
106
107
|
```
|
|
@@ -148,7 +149,13 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
|
|
|
148
149
|
|
|
149
150
|
## Using the Gradio UI
|
|
150
151
|
|
|
151
|
-
The agent includes a Gradio-based user interface for
|
|
152
|
+
The agent includes a Gradio-based user interface for easier interaction.
|
|
153
|
+
|
|
154
|
+
<div align="center">
|
|
155
|
+
<img src="../../img/agent_gradio_ui.png"/>
|
|
156
|
+
</div>
|
|
157
|
+
|
|
158
|
+
To use it:
|
|
152
159
|
|
|
153
160
|
```bash
|
|
154
161
|
# Install with Gradio support
|
|
@@ -31,6 +31,7 @@ pip install "cua-agent[all]"
|
|
|
31
31
|
# or install specific loop providers
|
|
32
32
|
pip install "cua-agent[openai]" # OpenAI Cua Loop
|
|
33
33
|
pip install "cua-agent[anthropic]" # Anthropic Cua Loop
|
|
34
|
+
pip install "cua-agent[uitars]" # UI-Tars support
|
|
34
35
|
pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
|
|
35
36
|
pip install "cua-agent[ui]" # Gradio UI for the agent
|
|
36
37
|
```
|
|
@@ -78,7 +79,13 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
|
|
|
78
79
|
|
|
79
80
|
## Using the Gradio UI
|
|
80
81
|
|
|
81
|
-
The agent includes a Gradio-based user interface for
|
|
82
|
+
The agent includes a Gradio-based user interface for easier interaction.
|
|
83
|
+
|
|
84
|
+
<div align="center">
|
|
85
|
+
<img src="../../img/agent_gradio_ui.png"/>
|
|
86
|
+
</div>
|
|
87
|
+
|
|
88
|
+
To use it:
|
|
82
89
|
|
|
83
90
|
```bash
|
|
84
91
|
# Install with Gradio support
|
|
@@ -93,7 +93,14 @@ class OAICompatClient(BaseOmniClient):
|
|
|
93
93
|
"""
|
|
94
94
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
|
|
95
95
|
|
|
96
|
-
final_messages = [
|
|
96
|
+
final_messages = [
|
|
97
|
+
{
|
|
98
|
+
"role": "system",
|
|
99
|
+
"content": [
|
|
100
|
+
{ "type": "text", "text": system }
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
]
|
|
97
104
|
|
|
98
105
|
# Process messages
|
|
99
106
|
for item in messages:
|
|
@@ -117,7 +124,10 @@ class OAICompatClient(BaseOmniClient):
|
|
|
117
124
|
else:
|
|
118
125
|
message = {
|
|
119
126
|
"role": item["role"],
|
|
120
|
-
"content": [{
|
|
127
|
+
"content": [{
|
|
128
|
+
"type": "text",
|
|
129
|
+
"text": item["content"]
|
|
130
|
+
}],
|
|
121
131
|
}
|
|
122
132
|
final_messages.append(message)
|
|
123
133
|
else:
|
|
@@ -162,8 +162,8 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
|
|
|
162
162
|
y = kwargs.get("y")
|
|
163
163
|
if x is None or y is None:
|
|
164
164
|
raise ToolError("x and y coordinates are required for scroll action")
|
|
165
|
-
scroll_x = kwargs.get("scroll_x", 0) //
|
|
166
|
-
scroll_y = kwargs.get("scroll_y", 0) //
|
|
165
|
+
scroll_x = kwargs.get("scroll_x", 0) // 50
|
|
166
|
+
scroll_y = kwargs.get("scroll_y", 0) // 50
|
|
167
167
|
return await self.handle_scroll(x, y, scroll_x, scroll_y)
|
|
168
168
|
elif type == "screenshot":
|
|
169
169
|
return await self.screenshot()
|
|
@@ -240,11 +240,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
|
|
|
240
240
|
|
|
241
241
|
if len(mapped_keys) > 1:
|
|
242
242
|
# For key combinations (like Ctrl+C)
|
|
243
|
-
|
|
244
|
-
await self.computer.interface.press_key(k)
|
|
245
|
-
await asyncio.sleep(0.1)
|
|
246
|
-
for k in reversed(mapped_keys):
|
|
247
|
-
await self.computer.interface.press_key(k)
|
|
243
|
+
await self.computer.interface.hotkey(*mapped_keys)
|
|
248
244
|
else:
|
|
249
245
|
# Single key press
|
|
250
246
|
await self.computer.interface.press_key(mapped_keys[0])
|
|
@@ -94,8 +94,15 @@ class OAICompatClient(BaseUITarsClient):
|
|
|
94
94
|
"""
|
|
95
95
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
|
|
96
96
|
|
|
97
|
-
final_messages = [
|
|
98
|
-
|
|
97
|
+
final_messages = [
|
|
98
|
+
{
|
|
99
|
+
"role": "system",
|
|
100
|
+
"content": [
|
|
101
|
+
{ "type": "text", "text": system }
|
|
102
|
+
]
|
|
103
|
+
}
|
|
104
|
+
]
|
|
105
|
+
|
|
99
106
|
# Process messages
|
|
100
107
|
for item in messages:
|
|
101
108
|
if isinstance(item, dict):
|
|
@@ -138,8 +145,13 @@ class OAICompatClient(BaseUITarsClient):
|
|
|
138
145
|
message = {"role": "user", "content": [{"type": "text", "text": item}]}
|
|
139
146
|
final_messages.append(message)
|
|
140
147
|
|
|
141
|
-
payload = {
|
|
142
|
-
|
|
148
|
+
payload = {
|
|
149
|
+
"model": self.model,
|
|
150
|
+
"messages": final_messages,
|
|
151
|
+
"max_tokens": max_tokens or self.max_tokens,
|
|
152
|
+
"temperature": self.temperature,
|
|
153
|
+
"top_p": 0.7,
|
|
154
|
+
}
|
|
143
155
|
|
|
144
156
|
try:
|
|
145
157
|
async with aiohttp.ClientSession() as session:
|
|
@@ -20,7 +20,7 @@ from computer import Computer
|
|
|
20
20
|
from .utils import add_box_token, parse_actions, parse_action_parameters
|
|
21
21
|
from .tools.manager import ToolManager
|
|
22
22
|
from .tools.computer import ToolResult
|
|
23
|
-
from .prompts import COMPUTER_USE, SYSTEM_PROMPT
|
|
23
|
+
from .prompts import COMPUTER_USE, SYSTEM_PROMPT, MAC_SPECIFIC_NOTES
|
|
24
24
|
|
|
25
25
|
from .clients.oaicompat import OAICompatClient
|
|
26
26
|
|
|
@@ -184,7 +184,7 @@ class UITARSLoop(BaseLoop):
|
|
|
184
184
|
if first_user_idx is not None and instruction:
|
|
185
185
|
# Create the computer use prompt
|
|
186
186
|
user_prompt = COMPUTER_USE.format(
|
|
187
|
-
instruction=instruction,
|
|
187
|
+
instruction='\n'.join([instruction, MAC_SPECIFIC_NOTES]),
|
|
188
188
|
language="English"
|
|
189
189
|
)
|
|
190
190
|
|
|
@@ -232,8 +232,11 @@ class UITARSLoop(BaseLoop):
|
|
|
232
232
|
if self.client is None:
|
|
233
233
|
raise RuntimeError("Failed to initialize client")
|
|
234
234
|
|
|
235
|
-
#
|
|
235
|
+
# Get messages in standard format from the message manager
|
|
236
|
+
self.message_manager.messages = messages.copy()
|
|
236
237
|
prepared_messages = self.message_manager.get_messages()
|
|
238
|
+
|
|
239
|
+
# Convert messages to UI-TARS format
|
|
237
240
|
uitars_messages = self.to_uitars_format(prepared_messages)
|
|
238
241
|
|
|
239
242
|
# Log request
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
"""Prompts for UI-TARS agent."""
|
|
2
2
|
|
|
3
|
+
MAC_SPECIFIC_NOTES = """
|
|
4
|
+
(You are operating on macOS, use 'cmd' instead of 'ctrl' for most shortcuts e.g., hotkey(key='cmd c') for copy, hotkey(key='cmd v') for paste, hotkey(key='cmd t') for new tab).)
|
|
5
|
+
"""
|
|
6
|
+
|
|
3
7
|
SYSTEM_PROMPT = "You are a helpful assistant."
|
|
4
8
|
|
|
5
9
|
COMPUTER_USE = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
@@ -56,4 +60,4 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
|
|
|
56
60
|
|
|
57
61
|
## User Instruction
|
|
58
62
|
{instruction}
|
|
59
|
-
"""
|
|
63
|
+
"""
|
|
@@ -173,9 +173,13 @@ class ComputerTool(BaseComputerTool):
|
|
|
173
173
|
elif action == "hotkey":
|
|
174
174
|
if "keys" in kwargs:
|
|
175
175
|
keys = kwargs["keys"]
|
|
176
|
-
for key in keys:
|
|
177
|
-
await self.computer.interface.press_key(key)
|
|
178
176
|
|
|
177
|
+
if len(keys) > 1:
|
|
178
|
+
await self.computer.interface.hotkey(*keys)
|
|
179
|
+
else:
|
|
180
|
+
# Single key press
|
|
181
|
+
await self.computer.interface.press_key(keys[0])
|
|
182
|
+
|
|
179
183
|
# Wait for UI to update
|
|
180
184
|
await asyncio.sleep(0.3)
|
|
181
185
|
|
|
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
|
|
|
6
6
|
|
|
7
7
|
[project]
|
|
8
8
|
name = "cua-agent"
|
|
9
|
-
version = "0.1.
|
|
9
|
+
version = "0.1.31"
|
|
10
10
|
description = "CUA (Computer Use) Agent for AI-driven computer interaction"
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
authors = [
|
|
@@ -108,7 +108,7 @@ target-version = [
|
|
|
108
108
|
|
|
109
109
|
[tool.ruff]
|
|
110
110
|
line-length = 100
|
|
111
|
-
target-version = "0.1.
|
|
111
|
+
target-version = "0.1.31"
|
|
112
112
|
select = [
|
|
113
113
|
"E",
|
|
114
114
|
"F",
|
|
@@ -122,7 +122,7 @@ docstring-code-format = true
|
|
|
122
122
|
|
|
123
123
|
[tool.mypy]
|
|
124
124
|
strict = true
|
|
125
|
-
python_version = "0.1.
|
|
125
|
+
python_version = "0.1.31"
|
|
126
126
|
ignore_missing_imports = true
|
|
127
127
|
disallow_untyped_defs = true
|
|
128
128
|
check_untyped_defs = true
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|