PyPI - cua-agent - Versions diffs - 0.4.2__tar.gz → 0.4.4__tar.gz - Mend

cua-agent 0.4.2tar.gz → 0.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (32) hide show

{cua_agent-0.4.2 → cua_agent-0.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.4.2
+Version: 0.4.4
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: >=3.11

{cua_agent-0.4.2 → cua_agent-0.4.4}/agent/cli.py RENAMED Viewed

@@ -51,9 +51,8 @@ class Colors:
     BG_YELLOW = '\033[43m'
     BG_BLUE = '\033[44m'
-def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = False, end: str = "\n"):
-    """Print colored text to terminal."""
+def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = False, end: str = "\n", right: str = ""):
+    """Print colored text to terminal with optional right-aligned text."""
     prefix = ""
     if bold:
         prefix += Colors.BOLD
@@ -62,10 +61,35 @@ def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = Fa
     if color:
         prefix += color
-    print(f"{prefix}{text}{Colors.RESET}", end=end)
+    if right:
+        # Get terminal width (default to 80 if unable to determine)
+        try:
+            import shutil
+            terminal_width = shutil.get_terminal_size().columns
+        except:
+            terminal_width = 80
+        # Add right margin
+        terminal_width -= 1
+        # Calculate padding needed
+        # Account for ANSI escape codes not taking visual space
+        visible_left_len = len(text)
+        visible_right_len = len(right)
+        padding = terminal_width - visible_left_len - visible_right_len
+        if padding > 0:
+            output = f"{prefix}{text}{' ' * padding}{right}{Colors.RESET}"
+        else:
+            # If not enough space, just put a single space between
+            output = f"{prefix}{text} {right}{Colors.RESET}"
+    else:
+        output = f"{prefix}{text}{Colors.RESET}"
+    print(output, end=end)
-def print_action(action_type: str, details: Dict[str, Any]):
+def print_action(action_type: str, details: Dict[str, Any], total_cost: float):
     """Print computer action with nice formatting."""
     # Format action details
     args_str = ""
@@ -81,8 +105,10 @@ def print_action(action_type: str, details: Dict[str, Any]):
     elif action_type == "scroll" and "x" in details and "y" in details:
         args_str = f"({details['x']}, {details['y']})"
-    print_colored(f"🛠️  {action_type}{args_str}", dim=True)
+    if total_cost > 0:
+        print_colored(f"🛠️  {action_type}{args_str}", dim=True, right=f"💸 ${total_cost:.2f}")
+    else:
+        print_colored(f"🛠️  {action_type}{args_str}", dim=True)
 def print_welcome(model: str, agent_loop: str, container_name: str):
     """Print welcome message."""
@@ -92,7 +118,7 @@ def print_welcome(model: str, agent_loop: str, container_name: str):
 async def ainput(prompt: str = ""):
     return await asyncio.to_thread(input, prompt)
-async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = ""):
+async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = "", show_usage: bool = True):
     """Main chat loop with the agent."""
     print_welcome(model, agent.agent_loop.__name__, container_name)
@@ -101,6 +127,8 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
     if initial_prompt:
         history.append({"role": "user", "content": initial_prompt})
+    total_cost = 0
     while True:
         if history[-1].get("role") != "user":
             # Get user input with prompt
@@ -124,6 +152,9 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
             async for result in agent.run(history):
                 # Add agent responses to history
                 history.extend(result.get("output", []))
+                if show_usage:
+                    total_cost += result.get("usage", {}).get("response_cost", 0)
                 # Process and display the output
                 for item in result.get("output", []):
@@ -143,7 +174,7 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
                         action_type = action.get("type", "")
                         if action_type:
                             spinner.hide()
-                            print_action(action_type, action)
+                            print_action(action_type, action, total_cost)
                             spinner.text = f"Performing {action_type}..."
                             spinner.show()
@@ -163,6 +194,8 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
                             print_colored(f"📤 {output}", dim=True)
             spinner.hide()
+            if show_usage and total_cost > 0:
+                print_colored(f"Total cost: ${total_cost:.2f}", dim=True)
 async def main():
@@ -214,6 +247,20 @@ Examples:
         type=str,
         help="Initial prompt to send to the agent. Leave blank for interactive mode."
     )
+    parser.add_argument(
+        "-c", "--cache",
+        action="store_true",
+        help="Tell the API to enable caching"
+    )
+    parser.add_argument(
+        "-u", "--usage",
+        action="store_true",
+        help="Show total cost of the agent runs"
+    )
     args = parser.parse_args()
@@ -294,11 +341,14 @@ Examples:
                 "raise_error": True,
                 "reset_after_each_run": False
             }
+        if args.cache:
+            agent_kwargs["use_prompt_caching"] = True
         agent = ComputerAgent(**agent_kwargs)
         # Start chat loop
-        await chat_loop(agent, args.model, container_name, args.prompt)
+        await chat_loop(agent, args.model, container_name, args.prompt, args.usage)

{cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/anthropic.py RENAMED Viewed

@@ -606,35 +606,33 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
                         # Basic actions (all versions)
                         if action_type == "screenshot":
                             responses_items.append(make_screenshot_item(call_id=call_id))
-                        elif action_type == "left_click":
+                        elif action_type in ["click", "left_click"]:
                             coordinate = tool_input.get("coordinate", [0, 0])
                             responses_items.append(make_click_item(
                                 x=coordinate[0] if len(coordinate) > 0 else 0,
                                 y=coordinate[1] if len(coordinate) > 1 else 0,
                                 call_id=call_id
                             ))
-                        elif action_type == "type":
+                        elif action_type in ["type", "type_text"]:
                             responses_items.append(make_type_item(
                                 text=tool_input.get("text", ""),
                                 call_id=call_id
                             ))
-                        elif action_type == "key":
+                        elif action_type in ["key", "keypress", "hotkey"]:
                             responses_items.append(make_keypress_item(
                                 keys=tool_input.get("text", "").replace("+", "-").split("-"),
                                 call_id=call_id
                             ))
-                        elif action_type == "mouse_move":
+                        elif action_type in ["mouse_move", "move_cursor", "move"]:
                             # Mouse move - create a custom action item
                             coordinate = tool_input.get("coordinate", [0, 0])
-                            responses_items.append({
-                                "type": "computer_call",
-                                "call_id": call_id,
-                                "action": {
-                                    "type": "mouse_move",
-                                    "x": coordinate[0] if len(coordinate) > 0 else 0,
-                                    "y": coordinate[1] if len(coordinate) > 1 else 0
-                                }
-                            })
+                            responses_items.append(
+                                make_move_item(
+                                    x=coordinate[0] if len(coordinate) > 0 else 0,
+                                    y=coordinate[1] if len(coordinate) > 1 else 0,
+                                    call_id=call_id
+                                )
+                            )
                         # Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7
                         elif action_type == "scroll":
@@ -651,7 +649,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
                                 scroll_y=scroll_y,
                                 call_id=call_id
                             ))
-                        elif action_type == "left_click_drag":
+                        elif action_type in ["left_click_drag", "drag"]:
                             start_coord = tool_input.get("start_coordinate", [0, 0])
                             end_coord = tool_input.get("end_coordinate", [0, 0])
                             responses_items.append(make_drag_item(
@@ -809,7 +807,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
                             y=coordinate[1] if len(coordinate) > 1 else 0,
                             call_id=call_id
                         ))
-                    elif action_type == "type":
+                    elif action_type in ["type", "type_text"]:
                         # Input:
                         # {
                         #     "function": {
@@ -836,7 +834,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
                             text=args.get("text", ""),
                             call_id=call_id
                         ))
-                    elif action_type == "key":
+                    elif action_type in ["key", "keypress", "hotkey"]:
                         # Input:
                         # {
                         #     "function": {
@@ -863,7 +861,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
                             keys=args.get("text", "").replace("+", "-").split("-"),
                             call_id=call_id
                         ))
-                    elif action_type == "mouse_move":
+                    elif action_type in ["mouse_move", "move_cursor", "move"]:
                         # Input:
                         # {
                         #     "function": {
@@ -937,7 +935,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
                             scroll_y=scroll_y,
                             call_id=call_id
                         ))
-                    elif action_type == "left_click_drag":
+                    elif action_type in ["left_click_drag", "drag"]:
                         # Input:
                         # {
                         #     "function": {

{cua_agent-0.4.2 → cua_agent-0.4.4}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
 [project]
 name = "cua-agent"
-version = "0.4.2"
+version = "0.4.4"
 description = "CUA (Computer Use) Agent for AI-driven computer interaction"
 readme = "README.md"
 authors = [