cua-agent 0.1.30__tar.gz → 0.1.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (84) hide show
  1. {cua_agent-0.1.30 → cua_agent-0.1.31}/PKG-INFO +9 -2
  2. {cua_agent-0.1.30 → cua_agent-0.1.31}/README.md +8 -1
  3. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/oaicompat.py +12 -2
  4. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/computer.py +3 -7
  5. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/clients/oaicompat.py +16 -4
  6. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/loop.py +6 -3
  7. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/prompts.py +5 -1
  8. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/tools/computer.py +6 -2
  9. {cua_agent-0.1.30 → cua_agent-0.1.31}/pyproject.toml +3 -3
  10. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/__init__.py +0 -0
  11. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/__init__.py +0 -0
  12. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/agent.py +0 -0
  13. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/base.py +0 -0
  14. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/callbacks.py +0 -0
  15. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/experiment.py +0 -0
  16. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/factory.py +0 -0
  17. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/messages.py +0 -0
  18. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/provider_config.py +0 -0
  19. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/telemetry.py +0 -0
  20. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/__init__.py +0 -0
  21. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/base.py +0 -0
  22. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/bash.py +0 -0
  23. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/collection.py +0 -0
  24. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/computer.py +0 -0
  25. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/edit.py +0 -0
  26. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools/manager.py +0 -0
  27. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/tools.py +0 -0
  28. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/types.py +0 -0
  29. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/core/visualization.py +0 -0
  30. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/__init__.py +0 -0
  31. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/__init__.py +0 -0
  32. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/api/client.py +0 -0
  33. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/api/logging.py +0 -0
  34. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/api_handler.py +0 -0
  35. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/callbacks/__init__.py +0 -0
  36. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/callbacks/manager.py +0 -0
  37. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/loop.py +0 -0
  38. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/prompts.py +0 -0
  39. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/response_handler.py +0 -0
  40. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/__init__.py +0 -0
  41. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/base.py +0 -0
  42. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/bash.py +0 -0
  43. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/collection.py +0 -0
  44. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/computer.py +0 -0
  45. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/edit.py +0 -0
  46. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/manager.py +0 -0
  47. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/tools/run.py +0 -0
  48. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/types.py +0 -0
  49. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/anthropic/utils.py +0 -0
  50. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/__init__.py +0 -0
  51. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/api_handler.py +0 -0
  52. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/anthropic.py +0 -0
  53. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/base.py +0 -0
  54. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/ollama.py +0 -0
  55. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/openai.py +0 -0
  56. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/clients/utils.py +0 -0
  57. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/image_utils.py +0 -0
  58. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/loop.py +0 -0
  59. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/parser.py +0 -0
  60. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/prompts.py +0 -0
  61. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/__init__.py +0 -0
  62. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/base.py +0 -0
  63. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/bash.py +0 -0
  64. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/computer.py +0 -0
  65. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/tools/manager.py +0 -0
  66. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/omni/utils.py +0 -0
  67. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/__init__.py +0 -0
  68. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/api_handler.py +0 -0
  69. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/loop.py +0 -0
  70. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/response_handler.py +0 -0
  71. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/__init__.py +0 -0
  72. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/base.py +0 -0
  73. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/tools/manager.py +0 -0
  74. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/types.py +0 -0
  75. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/openai/utils.py +0 -0
  76. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/__init__.py +0 -0
  77. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/clients/base.py +0 -0
  78. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/tools/__init__.py +0 -0
  79. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/tools/manager.py +0 -0
  80. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/providers/uitars/utils.py +0 -0
  81. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/telemetry.py +0 -0
  82. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/ui/__init__.py +0 -0
  83. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/ui/gradio/__init__.py +0 -0
  84. {cua_agent-0.1.30 → cua_agent-0.1.31}/agent/ui/gradio/app.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.10
@@ -101,6 +101,7 @@ pip install "cua-agent[all]"
101
101
  # or install specific loop providers
102
102
  pip install "cua-agent[openai]" # OpenAI Cua Loop
103
103
  pip install "cua-agent[anthropic]" # Anthropic Cua Loop
104
+ pip install "cua-agent[uitars]" # UI-Tars support
104
105
  pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
105
106
  pip install "cua-agent[ui]" # Gradio UI for the agent
106
107
  ```
@@ -148,7 +149,13 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
148
149
 
149
150
  ## Using the Gradio UI
150
151
 
151
- The agent includes a Gradio-based user interface for easy interaction. To use it:
152
+ The agent includes a Gradio-based user interface for easier interaction.
153
+
154
+ <div align="center">
155
+ <img src="../../img/agent_gradio_ui.png"/>
156
+ </div>
157
+
158
+ To use it:
152
159
 
153
160
  ```bash
154
161
  # Install with Gradio support
@@ -31,6 +31,7 @@ pip install "cua-agent[all]"
31
31
  # or install specific loop providers
32
32
  pip install "cua-agent[openai]" # OpenAI Cua Loop
33
33
  pip install "cua-agent[anthropic]" # Anthropic Cua Loop
34
+ pip install "cua-agent[uitars]" # UI-Tars support
34
35
  pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
35
36
  pip install "cua-agent[ui]" # Gradio UI for the agent
36
37
  ```
@@ -78,7 +79,13 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
78
79
 
79
80
  ## Using the Gradio UI
80
81
 
81
- The agent includes a Gradio-based user interface for easy interaction. To use it:
82
+ The agent includes a Gradio-based user interface for easier interaction.
83
+
84
+ <div align="center">
85
+ <img src="../../img/agent_gradio_ui.png"/>
86
+ </div>
87
+
88
+ To use it:
82
89
 
83
90
  ```bash
84
91
  # Install with Gradio support
@@ -93,7 +93,14 @@ class OAICompatClient(BaseOmniClient):
93
93
  """
94
94
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
95
95
 
96
- final_messages = [{"role": "system", "content": system}]
96
+ final_messages = [
97
+ {
98
+ "role": "system",
99
+ "content": [
100
+ { "type": "text", "text": system }
101
+ ]
102
+ }
103
+ ]
97
104
 
98
105
  # Process messages
99
106
  for item in messages:
@@ -117,7 +124,10 @@ class OAICompatClient(BaseOmniClient):
117
124
  else:
118
125
  message = {
119
126
  "role": item["role"],
120
- "content": [{"type": "text", "text": item["content"]}],
127
+ "content": [{
128
+ "type": "text",
129
+ "text": item["content"]
130
+ }],
121
131
  }
122
132
  final_messages.append(message)
123
133
  else:
@@ -162,8 +162,8 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
162
162
  y = kwargs.get("y")
163
163
  if x is None or y is None:
164
164
  raise ToolError("x and y coordinates are required for scroll action")
165
- scroll_x = kwargs.get("scroll_x", 0) // 20
166
- scroll_y = kwargs.get("scroll_y", 0) // 20
165
+ scroll_x = kwargs.get("scroll_x", 0) // 50
166
+ scroll_y = kwargs.get("scroll_y", 0) // 50
167
167
  return await self.handle_scroll(x, y, scroll_x, scroll_y)
168
168
  elif type == "screenshot":
169
169
  return await self.screenshot()
@@ -240,11 +240,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
240
240
 
241
241
  if len(mapped_keys) > 1:
242
242
  # For key combinations (like Ctrl+C)
243
- for k in mapped_keys:
244
- await self.computer.interface.press_key(k)
245
- await asyncio.sleep(0.1)
246
- for k in reversed(mapped_keys):
247
- await self.computer.interface.press_key(k)
243
+ await self.computer.interface.hotkey(*mapped_keys)
248
244
  else:
249
245
  # Single key press
250
246
  await self.computer.interface.press_key(mapped_keys[0])
@@ -94,8 +94,15 @@ class OAICompatClient(BaseUITarsClient):
94
94
  """
95
95
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
96
96
 
97
- final_messages = [{"role": "system", "content": system}]
98
-
97
+ final_messages = [
98
+ {
99
+ "role": "system",
100
+ "content": [
101
+ { "type": "text", "text": system }
102
+ ]
103
+ }
104
+ ]
105
+
99
106
  # Process messages
100
107
  for item in messages:
101
108
  if isinstance(item, dict):
@@ -138,8 +145,13 @@ class OAICompatClient(BaseUITarsClient):
138
145
  message = {"role": "user", "content": [{"type": "text", "text": item}]}
139
146
  final_messages.append(message)
140
147
 
141
- payload = {"model": self.model, "messages": final_messages, "temperature": self.temperature}
142
- payload["max_tokens"] = max_tokens or self.max_tokens
148
+ payload = {
149
+ "model": self.model,
150
+ "messages": final_messages,
151
+ "max_tokens": max_tokens or self.max_tokens,
152
+ "temperature": self.temperature,
153
+ "top_p": 0.7,
154
+ }
143
155
 
144
156
  try:
145
157
  async with aiohttp.ClientSession() as session:
@@ -20,7 +20,7 @@ from computer import Computer
20
20
  from .utils import add_box_token, parse_actions, parse_action_parameters
21
21
  from .tools.manager import ToolManager
22
22
  from .tools.computer import ToolResult
23
- from .prompts import COMPUTER_USE, SYSTEM_PROMPT
23
+ from .prompts import COMPUTER_USE, SYSTEM_PROMPT, MAC_SPECIFIC_NOTES
24
24
 
25
25
  from .clients.oaicompat import OAICompatClient
26
26
 
@@ -184,7 +184,7 @@ class UITARSLoop(BaseLoop):
184
184
  if first_user_idx is not None and instruction:
185
185
  # Create the computer use prompt
186
186
  user_prompt = COMPUTER_USE.format(
187
- instruction=instruction,
187
+ instruction='\n'.join([instruction, MAC_SPECIFIC_NOTES]),
188
188
  language="English"
189
189
  )
190
190
 
@@ -232,8 +232,11 @@ class UITARSLoop(BaseLoop):
232
232
  if self.client is None:
233
233
  raise RuntimeError("Failed to initialize client")
234
234
 
235
- # Convert messages to UI-TARS format
235
+ # Get messages in standard format from the message manager
236
+ self.message_manager.messages = messages.copy()
236
237
  prepared_messages = self.message_manager.get_messages()
238
+
239
+ # Convert messages to UI-TARS format
237
240
  uitars_messages = self.to_uitars_format(prepared_messages)
238
241
 
239
242
  # Log request
@@ -1,5 +1,9 @@
1
1
  """Prompts for UI-TARS agent."""
2
2
 
3
+ MAC_SPECIFIC_NOTES = """
4
+ (You are operating on macOS, use 'cmd' instead of 'ctrl' for most shortcuts e.g., hotkey(key='cmd c') for copy, hotkey(key='cmd v') for paste, hotkey(key='cmd t') for new tab).)
5
+ """
6
+
3
7
  SYSTEM_PROMPT = "You are a helpful assistant."
4
8
 
5
9
  COMPUTER_USE = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
@@ -56,4 +60,4 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
56
60
 
57
61
  ## User Instruction
58
62
  {instruction}
59
- """
63
+ """
@@ -173,9 +173,13 @@ class ComputerTool(BaseComputerTool):
173
173
  elif action == "hotkey":
174
174
  if "keys" in kwargs:
175
175
  keys = kwargs["keys"]
176
- for key in keys:
177
- await self.computer.interface.press_key(key)
178
176
 
177
+ if len(keys) > 1:
178
+ await self.computer.interface.hotkey(*keys)
179
+ else:
180
+ # Single key press
181
+ await self.computer.interface.press_key(keys[0])
182
+
179
183
  # Wait for UI to update
180
184
  await asyncio.sleep(0.3)
181
185
 
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "cua-agent"
9
- version = "0.1.30"
9
+ version = "0.1.31"
10
10
  description = "CUA (Computer Use) Agent for AI-driven computer interaction"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -108,7 +108,7 @@ target-version = [
108
108
 
109
109
  [tool.ruff]
110
110
  line-length = 100
111
- target-version = "0.1.30"
111
+ target-version = "0.1.31"
112
112
  select = [
113
113
  "E",
114
114
  "F",
@@ -122,7 +122,7 @@ docstring-code-format = true
122
122
 
123
123
  [tool.mypy]
124
124
  strict = true
125
- python_version = "0.1.30"
125
+ python_version = "0.1.31"
126
126
  ignore_missing_imports = true
127
127
  disallow_untyped_defs = true
128
128
  check_untyped_defs = true
File without changes