cua-agent 0.4.2__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (32) hide show
  1. {cua_agent-0.4.2 → cua_agent-0.4.4}/PKG-INFO +1 -1
  2. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/cli.py +60 -10
  3. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/anthropic.py +16 -18
  4. {cua_agent-0.4.2 → cua_agent-0.4.4}/pyproject.toml +1 -1
  5. {cua_agent-0.4.2 → cua_agent-0.4.4}/README.md +0 -0
  6. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/__init__.py +0 -0
  7. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/__main__.py +0 -0
  8. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/adapters/__init__.py +0 -0
  9. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/adapters/huggingfacelocal_adapter.py +0 -0
  10. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/agent.py +0 -0
  11. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/__init__.py +0 -0
  12. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/base.py +0 -0
  13. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/budget_manager.py +0 -0
  14. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/image_retention.py +0 -0
  15. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/logging.py +0 -0
  16. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/pii_anonymization.py +0 -0
  17. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/telemetry.py +0 -0
  18. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/trajectory_saver.py +0 -0
  19. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/computer_handler.py +0 -0
  20. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/decorators.py +0 -0
  21. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/__init__.py +0 -0
  22. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/omniparser.py +0 -0
  23. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/openai.py +0 -0
  24. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/uitars.py +0 -0
  25. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/responses.py +0 -0
  26. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/telemetry.py +0 -0
  27. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/types.py +0 -0
  28. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/__init__.py +0 -0
  29. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/__main__.py +0 -0
  30. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/gradio/__init__.py +0 -0
  31. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/gradio/app.py +0 -0
  32. {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/gradio/ui_components.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.11
@@ -51,9 +51,8 @@ class Colors:
51
51
  BG_YELLOW = '\033[43m'
52
52
  BG_BLUE = '\033[44m'
53
53
 
54
-
55
- def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = False, end: str = "\n"):
56
- """Print colored text to terminal."""
54
+ def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = False, end: str = "\n", right: str = ""):
55
+ """Print colored text to terminal with optional right-aligned text."""
57
56
  prefix = ""
58
57
  if bold:
59
58
  prefix += Colors.BOLD
@@ -62,10 +61,35 @@ def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = Fa
62
61
  if color:
63
62
  prefix += color
64
63
 
65
- print(f"{prefix}{text}{Colors.RESET}", end=end)
64
+ if right:
65
+ # Get terminal width (default to 80 if unable to determine)
66
+ try:
67
+ import shutil
68
+ terminal_width = shutil.get_terminal_size().columns
69
+ except:
70
+ terminal_width = 80
71
+
72
+ # Add right margin
73
+ terminal_width -= 1
74
+
75
+ # Calculate padding needed
76
+ # Account for ANSI escape codes not taking visual space
77
+ visible_left_len = len(text)
78
+ visible_right_len = len(right)
79
+ padding = terminal_width - visible_left_len - visible_right_len
80
+
81
+ if padding > 0:
82
+ output = f"{prefix}{text}{' ' * padding}{right}{Colors.RESET}"
83
+ else:
84
+ # If not enough space, just put a single space between
85
+ output = f"{prefix}{text} {right}{Colors.RESET}"
86
+ else:
87
+ output = f"{prefix}{text}{Colors.RESET}"
88
+
89
+ print(output, end=end)
66
90
 
67
91
 
68
- def print_action(action_type: str, details: Dict[str, Any]):
92
+ def print_action(action_type: str, details: Dict[str, Any], total_cost: float):
69
93
  """Print computer action with nice formatting."""
70
94
  # Format action details
71
95
  args_str = ""
@@ -81,8 +105,10 @@ def print_action(action_type: str, details: Dict[str, Any]):
81
105
  elif action_type == "scroll" and "x" in details and "y" in details:
82
106
  args_str = f"({details['x']}, {details['y']})"
83
107
 
84
- print_colored(f"🛠️ {action_type}{args_str}", dim=True)
85
-
108
+ if total_cost > 0:
109
+ print_colored(f"🛠️ {action_type}{args_str}", dim=True, right=f"💸 ${total_cost:.2f}")
110
+ else:
111
+ print_colored(f"🛠️ {action_type}{args_str}", dim=True)
86
112
 
87
113
  def print_welcome(model: str, agent_loop: str, container_name: str):
88
114
  """Print welcome message."""
@@ -92,7 +118,7 @@ def print_welcome(model: str, agent_loop: str, container_name: str):
92
118
  async def ainput(prompt: str = ""):
93
119
  return await asyncio.to_thread(input, prompt)
94
120
 
95
- async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = ""):
121
+ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = "", show_usage: bool = True):
96
122
  """Main chat loop with the agent."""
97
123
  print_welcome(model, agent.agent_loop.__name__, container_name)
98
124
 
@@ -101,6 +127,8 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
101
127
  if initial_prompt:
102
128
  history.append({"role": "user", "content": initial_prompt})
103
129
 
130
+ total_cost = 0
131
+
104
132
  while True:
105
133
  if history[-1].get("role") != "user":
106
134
  # Get user input with prompt
@@ -124,6 +152,9 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
124
152
  async for result in agent.run(history):
125
153
  # Add agent responses to history
126
154
  history.extend(result.get("output", []))
155
+
156
+ if show_usage:
157
+ total_cost += result.get("usage", {}).get("response_cost", 0)
127
158
 
128
159
  # Process and display the output
129
160
  for item in result.get("output", []):
@@ -143,7 +174,7 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
143
174
  action_type = action.get("type", "")
144
175
  if action_type:
145
176
  spinner.hide()
146
- print_action(action_type, action)
177
+ print_action(action_type, action, total_cost)
147
178
  spinner.text = f"Performing {action_type}..."
148
179
  spinner.show()
149
180
 
@@ -163,6 +194,8 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
163
194
  print_colored(f"📤 {output}", dim=True)
164
195
 
165
196
  spinner.hide()
197
+ if show_usage and total_cost > 0:
198
+ print_colored(f"Total cost: ${total_cost:.2f}", dim=True)
166
199
 
167
200
 
168
201
  async def main():
@@ -214,6 +247,20 @@ Examples:
214
247
  type=str,
215
248
  help="Initial prompt to send to the agent. Leave blank for interactive mode."
216
249
  )
250
+
251
+ parser.add_argument(
252
+ "-c", "--cache",
253
+ action="store_true",
254
+ help="Tell the API to enable caching"
255
+ )
256
+
257
+ parser.add_argument(
258
+ "-u", "--usage",
259
+ action="store_true",
260
+ help="Show total cost of the agent runs"
261
+ )
262
+
263
+
217
264
 
218
265
  args = parser.parse_args()
219
266
 
@@ -294,11 +341,14 @@ Examples:
294
341
  "raise_error": True,
295
342
  "reset_after_each_run": False
296
343
  }
344
+
345
+ if args.cache:
346
+ agent_kwargs["use_prompt_caching"] = True
297
347
 
298
348
  agent = ComputerAgent(**agent_kwargs)
299
349
 
300
350
  # Start chat loop
301
- await chat_loop(agent, args.model, container_name, args.prompt)
351
+ await chat_loop(agent, args.model, container_name, args.prompt, args.usage)
302
352
 
303
353
 
304
354
 
@@ -606,35 +606,33 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
606
606
  # Basic actions (all versions)
607
607
  if action_type == "screenshot":
608
608
  responses_items.append(make_screenshot_item(call_id=call_id))
609
- elif action_type == "left_click":
609
+ elif action_type in ["click", "left_click"]:
610
610
  coordinate = tool_input.get("coordinate", [0, 0])
611
611
  responses_items.append(make_click_item(
612
612
  x=coordinate[0] if len(coordinate) > 0 else 0,
613
613
  y=coordinate[1] if len(coordinate) > 1 else 0,
614
614
  call_id=call_id
615
615
  ))
616
- elif action_type == "type":
616
+ elif action_type in ["type", "type_text"]:
617
617
  responses_items.append(make_type_item(
618
618
  text=tool_input.get("text", ""),
619
619
  call_id=call_id
620
620
  ))
621
- elif action_type == "key":
621
+ elif action_type in ["key", "keypress", "hotkey"]:
622
622
  responses_items.append(make_keypress_item(
623
623
  keys=tool_input.get("text", "").replace("+", "-").split("-"),
624
624
  call_id=call_id
625
625
  ))
626
- elif action_type == "mouse_move":
626
+ elif action_type in ["mouse_move", "move_cursor", "move"]:
627
627
  # Mouse move - create a custom action item
628
628
  coordinate = tool_input.get("coordinate", [0, 0])
629
- responses_items.append({
630
- "type": "computer_call",
631
- "call_id": call_id,
632
- "action": {
633
- "type": "mouse_move",
634
- "x": coordinate[0] if len(coordinate) > 0 else 0,
635
- "y": coordinate[1] if len(coordinate) > 1 else 0
636
- }
637
- })
629
+ responses_items.append(
630
+ make_move_item(
631
+ x=coordinate[0] if len(coordinate) > 0 else 0,
632
+ y=coordinate[1] if len(coordinate) > 1 else 0,
633
+ call_id=call_id
634
+ )
635
+ )
638
636
 
639
637
  # Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7
640
638
  elif action_type == "scroll":
@@ -651,7 +649,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
651
649
  scroll_y=scroll_y,
652
650
  call_id=call_id
653
651
  ))
654
- elif action_type == "left_click_drag":
652
+ elif action_type in ["left_click_drag", "drag"]:
655
653
  start_coord = tool_input.get("start_coordinate", [0, 0])
656
654
  end_coord = tool_input.get("end_coordinate", [0, 0])
657
655
  responses_items.append(make_drag_item(
@@ -809,7 +807,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
809
807
  y=coordinate[1] if len(coordinate) > 1 else 0,
810
808
  call_id=call_id
811
809
  ))
812
- elif action_type == "type":
810
+ elif action_type in ["type", "type_text"]:
813
811
  # Input:
814
812
  # {
815
813
  # "function": {
@@ -836,7 +834,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
836
834
  text=args.get("text", ""),
837
835
  call_id=call_id
838
836
  ))
839
- elif action_type == "key":
837
+ elif action_type in ["key", "keypress", "hotkey"]:
840
838
  # Input:
841
839
  # {
842
840
  # "function": {
@@ -863,7 +861,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
863
861
  keys=args.get("text", "").replace("+", "-").split("-"),
864
862
  call_id=call_id
865
863
  ))
866
- elif action_type == "mouse_move":
864
+ elif action_type in ["mouse_move", "move_cursor", "move"]:
867
865
  # Input:
868
866
  # {
869
867
  # "function": {
@@ -937,7 +935,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
937
935
  scroll_y=scroll_y,
938
936
  call_id=call_id
939
937
  ))
940
- elif action_type == "left_click_drag":
938
+ elif action_type in ["left_click_drag", "drag"]:
941
939
  # Input:
942
940
  # {
943
941
  # "function": {
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "cua-agent"
9
- version = "0.4.2"
9
+ version = "0.4.4"
10
10
  description = "CUA (Computer Use) Agent for AI-driven computer interaction"
11
11
  readme = "README.md"
12
12
  authors = [
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes