cua-agent 0.4.31__py3-none-any.whl → 0.4.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/adapters/huggingfacelocal_adapter.py +15 -66
- agent/adapters/models/__init__.py +33 -0
- agent/adapters/models/generic.py +75 -0
- agent/adapters/models/internvl.py +254 -0
- agent/adapters/models/opencua.py +100 -0
- agent/adapters/models/qwen2_5_vl.py +75 -0
- agent/agent.py +5 -1
- agent/callbacks/trajectory_saver.py +2 -0
- agent/cli.py +90 -1
- agent/loops/__init__.py +15 -1
- agent/loops/anthropic.py +2 -3
- agent/loops/composed_grounded.py +1 -1
- agent/loops/glm45v.py +3 -2
- agent/loops/gta1.py +1 -1
- agent/loops/holo.py +216 -0
- agent/loops/internvl.py +185 -0
- agent/loops/opencua.py +142 -0
- agent/loops/uitars.py +1 -1
- {cua_agent-0.4.31.dist-info → cua_agent-0.4.32.dist-info}/METADATA +20 -4
- {cua_agent-0.4.31.dist-info → cua_agent-0.4.32.dist-info}/RECORD +22 -14
- {cua_agent-0.4.31.dist-info → cua_agent-0.4.32.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.31.dist-info → cua_agent-0.4.32.dist-info}/entry_points.txt +0 -0
agent/loops/opencua.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenCUA agent loop implementation for click prediction using litellm.acompletion
|
|
3
|
+
Based on OpenCUA model for GUI grounding tasks.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
import base64
|
|
10
|
+
from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple
|
|
11
|
+
from io import BytesIO
|
|
12
|
+
import uuid
|
|
13
|
+
from PIL import Image
|
|
14
|
+
import litellm
|
|
15
|
+
import math
|
|
16
|
+
|
|
17
|
+
from .composed_grounded import ComposedGroundedConfig
|
|
18
|
+
from ..decorators import register_agent
|
|
19
|
+
from ..types import Messages, AgentResponse, Tools, AgentCapability
|
|
20
|
+
from ..loops.base import AsyncAgentConfig
|
|
21
|
+
|
|
22
|
+
def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]:
|
|
23
|
+
"""Extract coordinates from pyautogui.click(x=..., y=...) format."""
|
|
24
|
+
try:
|
|
25
|
+
# Look for pyautogui.click(x=1443, y=343) pattern
|
|
26
|
+
pattern = r"pyautogui\.click\(x=(\d+),\s*y=(\d+)\)"
|
|
27
|
+
match = re.search(pattern, text)
|
|
28
|
+
if match:
|
|
29
|
+
x, y = int(match.group(1)), int(match.group(2))
|
|
30
|
+
return (x, y)
|
|
31
|
+
return None
|
|
32
|
+
except Exception:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
@register_agent(models=r"(?i).*OpenCUA.*")
|
|
36
|
+
class OpenCUAConfig(ComposedGroundedConfig):
|
|
37
|
+
"""OpenCUA agent configuration implementing AsyncAgentConfig protocol for click prediction."""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
super().__init__()
|
|
41
|
+
self.current_model = None
|
|
42
|
+
self.last_screenshot_b64 = None
|
|
43
|
+
|
|
44
|
+
async def predict_step(
|
|
45
|
+
self,
|
|
46
|
+
messages: List[Dict[str, Any]],
|
|
47
|
+
model: str,
|
|
48
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
49
|
+
max_retries: Optional[int] = None,
|
|
50
|
+
stream: bool = False,
|
|
51
|
+
computer_handler=None,
|
|
52
|
+
_on_api_start=None,
|
|
53
|
+
_on_api_end=None,
|
|
54
|
+
_on_usage=None,
|
|
55
|
+
_on_screenshot=None,
|
|
56
|
+
**kwargs
|
|
57
|
+
) -> Dict[str, Any]:
|
|
58
|
+
"""Fallback to a self-composed model"""
|
|
59
|
+
return await super().predict_step(
|
|
60
|
+
messages=messages,
|
|
61
|
+
model=f"{model}+{model}",
|
|
62
|
+
tools=tools,
|
|
63
|
+
max_retries=max_retries,
|
|
64
|
+
stream=stream,
|
|
65
|
+
computer_handler=computer_handler,
|
|
66
|
+
_on_api_start=_on_api_start,
|
|
67
|
+
_on_api_end=_on_api_end,
|
|
68
|
+
_on_usage=_on_usage,
|
|
69
|
+
_on_screenshot=_on_screenshot,
|
|
70
|
+
**kwargs
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
async def predict_click(
|
|
74
|
+
self,
|
|
75
|
+
model: str,
|
|
76
|
+
image_b64: str,
|
|
77
|
+
instruction: str,
|
|
78
|
+
**kwargs
|
|
79
|
+
) -> Optional[Tuple[int, int]]:
|
|
80
|
+
"""
|
|
81
|
+
Predict click coordinates using OpenCUA model via litellm.acompletion.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
model: The OpenCUA model name
|
|
85
|
+
image_b64: Base64 encoded image
|
|
86
|
+
instruction: Instruction for where to click
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Tuple of (x, y) coordinates or None if prediction fails
|
|
90
|
+
"""
|
|
91
|
+
# Prepare system message
|
|
92
|
+
system_prompt = (
|
|
93
|
+
"You are a GUI agent. You are given a task and a screenshot of the screen. "
|
|
94
|
+
"You need to perform a series of pyautogui actions to complete the task."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
system_message = {
|
|
98
|
+
"role": "system",
|
|
99
|
+
"content": system_prompt
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# Prepare user message with image and instruction
|
|
103
|
+
user_message = {
|
|
104
|
+
"role": "user",
|
|
105
|
+
"content": [
|
|
106
|
+
{
|
|
107
|
+
"type": "image_url",
|
|
108
|
+
"image_url": {
|
|
109
|
+
"url": f"data:image/png;base64,{image_b64}"
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"type": "text",
|
|
114
|
+
"text": f"Click on {instruction}"
|
|
115
|
+
}
|
|
116
|
+
]
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Prepare API call kwargs
|
|
120
|
+
api_kwargs = {
|
|
121
|
+
"model": model,
|
|
122
|
+
"messages": [system_message, user_message],
|
|
123
|
+
"max_new_tokens": 2056,
|
|
124
|
+
"temperature": 0,
|
|
125
|
+
**kwargs
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Use liteLLM acompletion
|
|
129
|
+
response = await litellm.acompletion(**api_kwargs)
|
|
130
|
+
|
|
131
|
+
# Extract response text
|
|
132
|
+
output_text = response.choices[0].message.content
|
|
133
|
+
# print(output_text)
|
|
134
|
+
|
|
135
|
+
# Extract coordinates from pyautogui format
|
|
136
|
+
coordinates = extract_coordinates_from_pyautogui(output_text)
|
|
137
|
+
|
|
138
|
+
return coordinates
|
|
139
|
+
|
|
140
|
+
def get_capabilities(self) -> List[AgentCapability]:
|
|
141
|
+
"""Return the capabilities supported by this agent."""
|
|
142
|
+
return ["click"]
|
agent/loops/uitars.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.32
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: >=3.12
|
|
@@ -31,22 +31,38 @@ Provides-Extra: glm45v-hf
|
|
|
31
31
|
Requires-Dist: accelerate; extra == "glm45v-hf"
|
|
32
32
|
Requires-Dist: torch; extra == "glm45v-hf"
|
|
33
33
|
Requires-Dist: transformers-v4.55.0-GLM-4.5V-preview; extra == "glm45v-hf"
|
|
34
|
+
Provides-Extra: opencua-hf
|
|
35
|
+
Requires-Dist: accelerate; extra == "opencua-hf"
|
|
36
|
+
Requires-Dist: torch; extra == "opencua-hf"
|
|
37
|
+
Requires-Dist: transformers==4.53.0; extra == "opencua-hf"
|
|
38
|
+
Requires-Dist: tiktoken>=0.11.0; extra == "opencua-hf"
|
|
39
|
+
Requires-Dist: blobfile>=3.0.0; extra == "opencua-hf"
|
|
40
|
+
Provides-Extra: internvl-hf
|
|
41
|
+
Requires-Dist: accelerate; extra == "internvl-hf"
|
|
42
|
+
Requires-Dist: torch; extra == "internvl-hf"
|
|
43
|
+
Requires-Dist: transformers>=4.55.0; extra == "internvl-hf"
|
|
44
|
+
Requires-Dist: einops; extra == "internvl-hf"
|
|
45
|
+
Requires-Dist: timm; extra == "internvl-hf"
|
|
34
46
|
Provides-Extra: ui
|
|
35
47
|
Requires-Dist: gradio>=5.23.3; extra == "ui"
|
|
36
48
|
Requires-Dist: python-dotenv>=1.0.1; extra == "ui"
|
|
37
49
|
Provides-Extra: cli
|
|
38
50
|
Requires-Dist: yaspin>=3.1.0; extra == "cli"
|
|
39
51
|
Provides-Extra: hud
|
|
40
|
-
Requires-Dist: hud-python==0.4.
|
|
52
|
+
Requires-Dist: hud-python==0.4.26; extra == "hud"
|
|
41
53
|
Provides-Extra: all
|
|
42
54
|
Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "all"
|
|
43
55
|
Requires-Dist: accelerate; extra == "all"
|
|
44
56
|
Requires-Dist: torch; extra == "all"
|
|
45
|
-
Requires-Dist: transformers>=4.
|
|
57
|
+
Requires-Dist: transformers>=4.55.0; extra == "all"
|
|
58
|
+
Requires-Dist: einops; extra == "all"
|
|
59
|
+
Requires-Dist: timm; extra == "all"
|
|
60
|
+
Requires-Dist: tiktoken>=0.11.0; extra == "all"
|
|
61
|
+
Requires-Dist: blobfile>=3.0.0; extra == "all"
|
|
46
62
|
Requires-Dist: gradio>=5.23.3; extra == "all"
|
|
47
63
|
Requires-Dist: python-dotenv>=1.0.1; extra == "all"
|
|
48
64
|
Requires-Dist: yaspin>=3.1.0; extra == "all"
|
|
49
|
-
Requires-Dist: hud-python==0.4.
|
|
65
|
+
Requires-Dist: hud-python==0.4.26; extra == "all"
|
|
50
66
|
Description-Content-Type: text/markdown
|
|
51
67
|
|
|
52
68
|
<div align="center">
|
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
agent/__init__.py,sha256=MaW-BczJ-lCACPYH39DvFhE7ZWiSo7sBO6pBfyO7Nxc,1269
|
|
2
2
|
agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
|
|
3
3
|
agent/adapters/__init__.py,sha256=Q_OxxwXBcBIetQ_DtHS5bwZWXrvCKPX2grCg8R0UKek,301
|
|
4
|
-
agent/adapters/huggingfacelocal_adapter.py,sha256=
|
|
4
|
+
agent/adapters/huggingfacelocal_adapter.py,sha256=3ht4jCUP4rpjPxi7vj8xOJNelTgfsUq0YbS44FwVN0c,7089
|
|
5
5
|
agent/adapters/human_adapter.py,sha256=xT4nnfNXb1z-vnGFlLmFEZN7TMcoMBGS40MtR1Zwv4o,13079
|
|
6
6
|
agent/adapters/mlxvlm_adapter.py,sha256=4VhhKDZfLLKL5joL1v4PPFvYw-R8spoDsat3vOAGnpE,14864
|
|
7
|
-
agent/
|
|
7
|
+
agent/adapters/models/__init__.py,sha256=23ETHLbn1C3VW4zMzSn1Ql-gsZz-wQCeJX12Lv_0n8M,1412
|
|
8
|
+
agent/adapters/models/generic.py,sha256=hpJt73jQePV80NYRAJTtOaElcrtGcpYK-QnInZIUkk0,2768
|
|
9
|
+
agent/adapters/models/internvl.py,sha256=PbFjU_Fu1JIahlEtR5pTO7RSV2UYJ9CAlQrMXKXJrTA,11385
|
|
10
|
+
agent/adapters/models/opencua.py,sha256=gPoZBMXyjiPEJ7Py2mpRWHiyay8X-y-pOSKw1LC_ihU,3924
|
|
11
|
+
agent/adapters/models/qwen2_5_vl.py,sha256=kc9YrtCB0FFy-oB1EkD_zasxYnbJg1wGiss6i0ilwdo,2809
|
|
12
|
+
agent/agent.py,sha256=NCRK1xRGt_sa1Yh_do2IPRceV024P2p7KR81wwCYlY8,30462
|
|
8
13
|
agent/callbacks/__init__.py,sha256=VqYHFt_wk1mc3hKudMZk2Qakrh-bn2rVKh_4xebF0tI,725
|
|
9
14
|
agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
|
|
10
15
|
agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
|
|
@@ -14,8 +19,8 @@ agent/callbacks/operator_validator.py,sha256=T5tp62pkShkcdHu2rgREUGdk8fryL_ziJsI
|
|
|
14
19
|
agent/callbacks/pii_anonymization.py,sha256=NEkUTUjQBi82nqus7kT-1E4RaeQ2hQrY7YCnKndLhP8,3272
|
|
15
20
|
agent/callbacks/prompt_instructions.py,sha256=RUqsJhiNiXqaOM_P2AfyBinWUDdgDku46BExLMUJHn4,1517
|
|
16
21
|
agent/callbacks/telemetry.py,sha256=RbUDhE41mTi8g9hNre0EpltK_NUZkLj8buJLWBzs0Ek,7363
|
|
17
|
-
agent/callbacks/trajectory_saver.py,sha256
|
|
18
|
-
agent/cli.py,sha256=
|
|
22
|
+
agent/callbacks/trajectory_saver.py,sha256=-XNgiKU6T8Qw_i2AZMQuw0HuUe6MHkU89rjn_T386Rw,16128
|
|
23
|
+
agent/cli.py,sha256=HddU18IvvKdyvQu0ru21nAcNc6k7toYuyjgORIzX_qo,16110
|
|
19
24
|
agent/computers/__init__.py,sha256=39ISJsaREaQIZckpzxSuLhuR763wUU3TxUux78EKjAg,1477
|
|
20
25
|
agent/computers/base.py,sha256=hZntX4vgc1ahD3EnFeb9lUjtBmgka1vb27hndPl9tKQ,2187
|
|
21
26
|
agent/computers/cua.py,sha256=xp2A34kT2C1NKqSRo2GB6766gkraM-UtpFjRv8LUTSc,4889
|
|
@@ -28,16 +33,19 @@ agent/human_tool/ui.py,sha256=wu9eZorhxCkyPTlBSZjYaVzutoHMlucAz8UGNpAT4bM,30644
|
|
|
28
33
|
agent/integrations/hud/__init__.py,sha256=xir5BVAlG2cFc7rHSx_Ea_2b1kp2TtFuKJk07jny7qY,5969
|
|
29
34
|
agent/integrations/hud/agent.py,sha256=GBikd9MhjDNKMiMG8J7PE3OMSmvmC_JLZ1p5xr2cZoc,14006
|
|
30
35
|
agent/integrations/hud/proxy.py,sha256=8HUoh7uZ8Z3vkhPXK0dskgePGsP8oCqyYij0mE_E7X8,10902
|
|
31
|
-
agent/loops/__init__.py,sha256=
|
|
32
|
-
agent/loops/anthropic.py,sha256=
|
|
36
|
+
agent/loops/__init__.py,sha256=c6stEkT15smK8ZIf9j2kyOko84uz1YIvHXx0Mbe2wq8,472
|
|
37
|
+
agent/loops/anthropic.py,sha256=ODrMvmTkyzIOLjGq6HbKzzgBu19TE_Xlsi--7vc5T6o,70196
|
|
33
38
|
agent/loops/base.py,sha256=LK7kSTnc2CB88LI7qr2VP7LMq0eS5r2bSEnrxO6IN5U,2345
|
|
34
|
-
agent/loops/composed_grounded.py,sha256=
|
|
35
|
-
agent/loops/glm45v.py,sha256=
|
|
36
|
-
agent/loops/gta1.py,sha256=
|
|
39
|
+
agent/loops/composed_grounded.py,sha256=Um_8G0v5DEzF_A9wWIGp_IDPDMvv4IXDTFpEDH92Vto,12367
|
|
40
|
+
agent/loops/glm45v.py,sha256=EKAoh-PWkcCdzBVebjXbdqoDNkXgcmJpIqmTNPiZ8TM,35127
|
|
41
|
+
agent/loops/gta1.py,sha256=uGIcUH5ChzO75eGvoQxuKMBWjX-1J9-xmC7vPetobjU,5831
|
|
42
|
+
agent/loops/holo.py,sha256=peQ0xx4XQDBQ3g2XKRLCgyrU_2PkXe3RaysNBqFyS90,7481
|
|
43
|
+
agent/loops/internvl.py,sha256=iQs6DSoP9JOyUxRAz_HPuv4Hi2Sbv-Jc3022W-oPX5Y,6596
|
|
37
44
|
agent/loops/model_types.csv,sha256=GmFn4x80yoUpQZuQ-GXtJkPVlOLYWZ5u_5A73HRyeNE,112
|
|
38
45
|
agent/loops/omniparser.py,sha256=-db8JUL2Orn47ERIaLbuNShAXn4LeIgYzRWphn_9Dg4,15071
|
|
39
46
|
agent/loops/openai.py,sha256=3UEXdecqGkyknhTgp6zxr_cNCVg5vM-61I6SKMNl6m8,8692
|
|
40
|
-
agent/loops/
|
|
47
|
+
agent/loops/opencua.py,sha256=Chb4UASHDrdcX_fO__Gw2e9ay4Hl6Vq38K5x-IoHyuo,4432
|
|
48
|
+
agent/loops/uitars.py,sha256=mVPt4V-HabX7ZiQnM55BVQt73CuZUjmUAsbm4Tf6TXk,32351
|
|
41
49
|
agent/proxy/examples.py,sha256=GYFJ-sfDsSNZr9n_qpvDx_0rShqoKE5JW0ibbljWfoo,6192
|
|
42
50
|
agent/proxy/handlers.py,sha256=48mMNyZOU3dJQ6oI5r2kDDe29rcU49MConlB0MZeCsU,9602
|
|
43
51
|
agent/responses.py,sha256=_SoN4BkaTxMHMB21EOtDc_aDBIJlfDwsCzszMBnIkH0,30764
|
|
@@ -47,7 +55,7 @@ agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
|
|
|
47
55
|
agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
|
|
48
56
|
agent/ui/gradio/app.py,sha256=Ol97YEbwREZZQ9_PMjVHlfOcu9BGsawxgAGAm79hT80,9117
|
|
49
57
|
agent/ui/gradio/ui_components.py,sha256=dJUvKDmc1oSejtoR_gU_oWWYwxaOOQyPloSYRGMrUCQ,36068
|
|
50
|
-
cua_agent-0.4.
|
|
51
|
-
cua_agent-0.4.
|
|
52
|
-
cua_agent-0.4.
|
|
53
|
-
cua_agent-0.4.
|
|
58
|
+
cua_agent-0.4.32.dist-info/METADATA,sha256=9DM4yfZ8hH6-JeNvke6WOgzZLEF0i3A8cDeb3aTGpyk,6340
|
|
59
|
+
cua_agent-0.4.32.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
60
|
+
cua_agent-0.4.32.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
61
|
+
cua_agent-0.4.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|