cua-agent 0.1.30__py3-none-any.whl → 0.1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

@@ -93,7 +93,14 @@ class OAICompatClient(BaseOmniClient):
93
93
  """
94
94
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
95
95
 
96
- final_messages = [{"role": "system", "content": system}]
96
+ final_messages = [
97
+ {
98
+ "role": "system",
99
+ "content": [
100
+ { "type": "text", "text": system }
101
+ ]
102
+ }
103
+ ]
97
104
 
98
105
  # Process messages
99
106
  for item in messages:
@@ -117,7 +124,10 @@ class OAICompatClient(BaseOmniClient):
117
124
  else:
118
125
  message = {
119
126
  "role": item["role"],
120
- "content": [{"type": "text", "text": item["content"]}],
127
+ "content": [{
128
+ "type": "text",
129
+ "text": item["content"]
130
+ }],
121
131
  }
122
132
  final_messages.append(message)
123
133
  else:
@@ -162,8 +162,8 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
162
162
  y = kwargs.get("y")
163
163
  if x is None or y is None:
164
164
  raise ToolError("x and y coordinates are required for scroll action")
165
- scroll_x = kwargs.get("scroll_x", 0) // 20
166
- scroll_y = kwargs.get("scroll_y", 0) // 20
165
+ scroll_x = kwargs.get("scroll_x", 0) // 50
166
+ scroll_y = kwargs.get("scroll_y", 0) // 50
167
167
  return await self.handle_scroll(x, y, scroll_x, scroll_y)
168
168
  elif type == "screenshot":
169
169
  return await self.screenshot()
@@ -240,11 +240,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
240
240
 
241
241
  if len(mapped_keys) > 1:
242
242
  # For key combinations (like Ctrl+C)
243
- for k in mapped_keys:
244
- await self.computer.interface.press_key(k)
245
- await asyncio.sleep(0.1)
246
- for k in reversed(mapped_keys):
247
- await self.computer.interface.press_key(k)
243
+ await self.computer.interface.hotkey(*mapped_keys)
248
244
  else:
249
245
  # Single key press
250
246
  await self.computer.interface.press_key(mapped_keys[0])
@@ -94,8 +94,15 @@ class OAICompatClient(BaseUITarsClient):
94
94
  """
95
95
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
96
96
 
97
- final_messages = [{"role": "system", "content": system}]
98
-
97
+ final_messages = [
98
+ {
99
+ "role": "system",
100
+ "content": [
101
+ { "type": "text", "text": system }
102
+ ]
103
+ }
104
+ ]
105
+
99
106
  # Process messages
100
107
  for item in messages:
101
108
  if isinstance(item, dict):
@@ -138,8 +145,13 @@ class OAICompatClient(BaseUITarsClient):
138
145
  message = {"role": "user", "content": [{"type": "text", "text": item}]}
139
146
  final_messages.append(message)
140
147
 
141
- payload = {"model": self.model, "messages": final_messages, "temperature": self.temperature}
142
- payload["max_tokens"] = max_tokens or self.max_tokens
148
+ payload = {
149
+ "model": self.model,
150
+ "messages": final_messages,
151
+ "max_tokens": max_tokens or self.max_tokens,
152
+ "temperature": self.temperature,
153
+ "top_p": 0.7,
154
+ }
143
155
 
144
156
  try:
145
157
  async with aiohttp.ClientSession() as session:
@@ -20,7 +20,7 @@ from computer import Computer
20
20
  from .utils import add_box_token, parse_actions, parse_action_parameters
21
21
  from .tools.manager import ToolManager
22
22
  from .tools.computer import ToolResult
23
- from .prompts import COMPUTER_USE, SYSTEM_PROMPT
23
+ from .prompts import COMPUTER_USE, SYSTEM_PROMPT, MAC_SPECIFIC_NOTES
24
24
 
25
25
  from .clients.oaicompat import OAICompatClient
26
26
 
@@ -184,7 +184,7 @@ class UITARSLoop(BaseLoop):
184
184
  if first_user_idx is not None and instruction:
185
185
  # Create the computer use prompt
186
186
  user_prompt = COMPUTER_USE.format(
187
- instruction=instruction,
187
+ instruction='\n'.join([instruction, MAC_SPECIFIC_NOTES]),
188
188
  language="English"
189
189
  )
190
190
 
@@ -232,8 +232,11 @@ class UITARSLoop(BaseLoop):
232
232
  if self.client is None:
233
233
  raise RuntimeError("Failed to initialize client")
234
234
 
235
- # Convert messages to UI-TARS format
235
+ # Get messages in standard format from the message manager
236
+ self.message_manager.messages = messages.copy()
236
237
  prepared_messages = self.message_manager.get_messages()
238
+
239
+ # Convert messages to UI-TARS format
237
240
  uitars_messages = self.to_uitars_format(prepared_messages)
238
241
 
239
242
  # Log request
@@ -1,5 +1,9 @@
1
1
  """Prompts for UI-TARS agent."""
2
2
 
3
+ MAC_SPECIFIC_NOTES = """
4
+ (You are operating on macOS, use 'cmd' instead of 'ctrl' for most shortcuts e.g., hotkey(key='cmd c') for copy, hotkey(key='cmd v') for paste, hotkey(key='cmd t') for new tab).)
5
+ """
6
+
3
7
  SYSTEM_PROMPT = "You are a helpful assistant."
4
8
 
5
9
  COMPUTER_USE = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
@@ -56,4 +60,4 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
56
60
 
57
61
  ## User Instruction
58
62
  {instruction}
59
- """
63
+ """
@@ -173,9 +173,13 @@ class ComputerTool(BaseComputerTool):
173
173
  elif action == "hotkey":
174
174
  if "keys" in kwargs:
175
175
  keys = kwargs["keys"]
176
- for key in keys:
177
- await self.computer.interface.press_key(key)
178
176
 
177
+ if len(keys) > 1:
178
+ await self.computer.interface.hotkey(*keys)
179
+ else:
180
+ # Single key press
181
+ await self.computer.interface.press_key(keys[0])
182
+
179
183
  # Wait for UI to update
180
184
  await asyncio.sleep(0.3)
181
185
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.10
@@ -101,6 +101,7 @@ pip install "cua-agent[all]"
101
101
  # or install specific loop providers
102
102
  pip install "cua-agent[openai]" # OpenAI Cua Loop
103
103
  pip install "cua-agent[anthropic]" # Anthropic Cua Loop
104
+ pip install "cua-agent[uitars]" # UI-Tars support
104
105
  pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
105
106
  pip install "cua-agent[ui]" # Gradio UI for the agent
106
107
  ```
@@ -148,7 +149,13 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
148
149
 
149
150
  ## Using the Gradio UI
150
151
 
151
- The agent includes a Gradio-based user interface for easy interaction. To use it:
152
+ The agent includes a Gradio-based user interface for easier interaction.
153
+
154
+ <div align="center">
155
+ <img src="../../img/agent_gradio_ui.png"/>
156
+ </div>
157
+
158
+ To use it:
152
159
 
153
160
  ```bash
154
161
  # Install with Gradio support
@@ -42,7 +42,7 @@ agent/providers/omni/__init__.py,sha256=5ix67iJdtQNGuGJEjEOF65PwFWO7vdo1QlXD28bR
42
42
  agent/providers/omni/api_handler.py,sha256=7CpD43lYAqTyNKWfrD8XcM9ekbajqKCTH9p0TWtEQyg,1163
43
43
  agent/providers/omni/clients/anthropic.py,sha256=nC_lj3UwrLqx9TIew58yxLqKwrH1_LwJD6EqVSEfp3g,3670
44
44
  agent/providers/omni/clients/base.py,sha256=6lN86XKZT3cgBT9EQdz2akKoqbIvc-NXXIOkYKwXObE,946
45
- agent/providers/omni/clients/oaicompat.py,sha256=me8TMKt_GhXEcHokeVsotsmUOF7R6krF2OSKkRSuFP0,7743
45
+ agent/providers/omni/clients/oaicompat.py,sha256=ibGX4ezQSqb3-ITaVffADVjMMxWX-Af4gIpgXh-nKvI,7975
46
46
  agent/providers/omni/clients/ollama.py,sha256=PmR5EhU9Mi43_o5mZN36XcpiGKp5HbQwlXpiRF9gO3I,4174
47
47
  agent/providers/omni/clients/openai.py,sha256=iTSYWEJEM8INFPGJMiUVs8rFn0781XF_ofRkd7NT3gk,5920
48
48
  agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
@@ -62,24 +62,24 @@ agent/providers/openai/loop.py,sha256=l_sIdRcDhFewy4fXND2ALINdd63LK_G8oi_xCZdn4o
62
62
  agent/providers/openai/response_handler.py,sha256=K8v_92uSr9R74Y5INY4naeEZZZm35CLIl4h74MBZhsw,7953
63
63
  agent/providers/openai/tools/__init__.py,sha256=-KbHMWcd2OVTk5RYQ3ACBEMygwbH-VW6n_98p0lwM4A,344
64
64
  agent/providers/openai/tools/base.py,sha256=Np_BC9Cm6TslK99etE9hVTtsBlcEaGhoNCK3NXdB_Lw,2474
65
- agent/providers/openai/tools/computer.py,sha256=jZUr-IOjlYoGOYNiXF6AYdTY4Wch86aSu7bpr-SSKDU,12283
65
+ agent/providers/openai/tools/computer.py,sha256=6Hye9p_mgJKzMy3WzXgYGhAhML1eE41FrH52kbGnGU4,12097
66
66
  agent/providers/openai/tools/manager.py,sha256=-wM641dLf8vcv6QF9x_ViGJeDl2YTuUV93j6u7GBI18,3903
67
67
  agent/providers/openai/types.py,sha256=0mFUxeFy23fJhMwc6lAFVXKngg2fJIXkPS5oV284V1M,898
68
68
  agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgPdc,3175
69
69
  agent/providers/uitars/__init__.py,sha256=sq5OMVJP9E_sok9tIiKJreGkjmNWXPMObjPTClYv1es,38
70
70
  agent/providers/uitars/clients/base.py,sha256=5w8Ajmq1JiPyUQJUAq1lSkfpA8_Ts80NQiDxPMTtQrI,948
71
- agent/providers/uitars/clients/oaicompat.py,sha256=y3ieCZjNIdKUjSDYnP7SEJ5cCQzUJhv1rD8p_vpOWPw,8845
72
- agent/providers/uitars/loop.py,sha256=l6OZKJmBmA1qJJbACqU0HrUtzrJoJma-0ida6WrlZZY,23500
73
- agent/providers/uitars/prompts.py,sha256=XP8XE2KvDPxat8cDmIJuLHCq3iqO_7IOPWTKYB3WMHQ,2328
71
+ agent/providers/uitars/clients/oaicompat.py,sha256=Y5hE9rvo7Q7ZIB16c2YIp5U9jn17_uWXpl8Kqg_-ui8,9060
72
+ agent/providers/uitars/loop.py,sha256=CelcGCj8R8v5zI-IyeYz5m88FzaObnYBhOn5kHd15jg,23709
73
+ agent/providers/uitars/prompts.py,sha256=_pQNd438mFpZKZT0aMl6Bd0_GgQxuy9y08kQAMPi9UM,2536
74
74
  agent/providers/uitars/tools/__init__.py,sha256=0hc3W6u5TvcXYztYKIyve_C2G3XMfwt_y7grmH0ZHC0,29
75
- agent/providers/uitars/tools/computer.py,sha256=WpbpZA9tFcr3zGBlO0CpwUhKmiOsuwh5zlVzu0Ormks,11641
75
+ agent/providers/uitars/tools/computer.py,sha256=TeIg_aCtMroxWOBJEiYY_YI4krW_C3pYu51tgGsVUYU,11808
76
76
  agent/providers/uitars/tools/manager.py,sha256=2dK9STtz6NuZG3i0nH7ZuHJpb7vKJ2mOVbxGsb0t8lQ,1945
77
77
  agent/providers/uitars/utils.py,sha256=y3B91_a5D9hWx4PQl5KNEoZ2G2jUAGZe4m8-m_iI9qw,5184
78
78
  agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
79
79
  agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
80
80
  agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
81
81
  agent/ui/gradio/app.py,sha256=6dnGEF_YOrlEp8qcfMeQKcZvm3VAFzZFF-lsEpQF1as,41989
82
- cua_agent-0.1.30.dist-info/METADATA,sha256=wBhcFokroLwf-0hGXcn9ZP-KcN-KT4ZAPCveTcKKzQ8,11179
83
- cua_agent-0.1.30.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
84
- cua_agent-0.1.30.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
85
- cua_agent-0.1.30.dist-info/RECORD,,
82
+ cua_agent-0.1.31.dist-info/METADATA,sha256=jFsDbJrzIDNmVNzc65D-g1zK62Lsfx2XC7tT0rXRgs4,11311
83
+ cua_agent-0.1.31.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
84
+ cua_agent-0.1.31.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
85
+ cua_agent-0.1.31.dist-info/RECORD,,