PyPI - cua-agent - Versions diffs - 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl - Mend

cua-agent 0.4.34py3-none-any.whl → 0.4.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (61) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/huggingfacelocal_adapter.py +54 -61
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +14 -6
agent/adapters/models/generic.py +7 -4
agent/adapters/models/internvl.py +66 -30
agent/adapters/models/opencua.py +23 -8
agent/adapters/models/qwen2_5_vl.py +7 -4
agent/agent.py +184 -158
agent/callbacks/__init__.py +4 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +18 -13
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +3 -1
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/telemetry.py +67 -61
agent/callbacks/trajectory_saver.py +90 -70
agent/cli.py +115 -110
agent/computers/__init__.py +13 -8
agent/computers/base.py +32 -19
agent/computers/cua.py +33 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +235 -185
agent/integrations/hud/__init__.py +15 -21
agent/integrations/hud/agent.py +101 -83
agent/integrations/hud/proxy.py +90 -57
agent/loops/__init__.py +25 -21
agent/loops/anthropic.py +537 -483
agent/loops/base.py +13 -14
agent/loops/composed_grounded.py +135 -149
agent/loops/gemini.py +31 -12
agent/loops/glm45v.py +135 -133
agent/loops/gta1.py +47 -50
agent/loops/holo.py +4 -2
agent/loops/internvl.py +6 -11
agent/loops/moondream3.py +36 -12
agent/loops/omniparser.py +215 -210
agent/loops/openai.py +49 -50
agent/loops/opencua.py +29 -41
agent/loops/qwen.py +510 -0
agent/loops/uitars.py +237 -202
agent/proxy/examples.py +54 -50
agent/proxy/handlers.py +27 -34
agent/responses.py +330 -330
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +23 -18
agent/ui/gradio/ui_components.py +310 -161
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/METADATA +18 -10
cua_agent-0.4.36.dist-info/RECORD +64 -0
cua_agent-0.4.34.dist-info/RECORD +0 -63
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/WHEEL +0 -0
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/entry_points.txt +0 -0

agent/callbacks/trajectory_saver.py CHANGED Viewed

@@ -2,26 +2,28 @@
 Trajectory saving callback handler for ComputerAgent.
 """
-import os
+import base64
+import io
 import json
+import os
 import uuid
+from copy import deepcopy
 from datetime import datetime
-import base64
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Union, override
+from typing import Any, Dict, List, Optional, Union, override
 from PIL import Image, ImageDraw
-import io
-from copy import deepcopy
 from .base import AsyncCallbackHandler
 def sanitize_image_urls(data: Any) -> Any:
     """
     Recursively search for 'image_url' keys and set their values to '[omitted]'.
     Args:
         data: Any data structure (dict, list, or primitive type)
     Returns:
         A deep copy of the data with all 'image_url' values replaced with '[omitted]'
     """
@@ -35,17 +37,19 @@ def sanitize_image_urls(data: Any) -> Any:
                 # Recursively sanitize the value
                 sanitized[key] = sanitize_image_urls(value)
         return sanitized
     elif isinstance(data, list):
         # Recursively sanitize each item in the list
         return [sanitize_image_urls(item) for item in data]
     else:
         # For primitive types (str, int, bool, None, etc.), return as-is
         return data
-def extract_computer_call_outputs(items: List[Dict[str, Any]], screenshot_dir: Optional[Path]) -> List[Dict[str, Any]]:
+def extract_computer_call_outputs(
+    items: List[Dict[str, Any]], screenshot_dir: Optional[Path]
+) -> List[Dict[str, Any]]:
     """
     Save any base64-encoded screenshots from computer_call_output entries to files and
     replace their image_url with the saved file path when a call_id is present.
@@ -103,18 +107,21 @@ def extract_computer_call_outputs(items: List[Dict[str, Any]], screenshot_dir: O
         updated.append(msg)
     return updated
 class TrajectorySaverCallback(AsyncCallbackHandler):
     """
     Callback handler that saves agent trajectories to disk.
     Saves each run as a separate trajectory with unique ID, and each turn
     within the trajectory gets its own folder with screenshots and responses.
     """
-    def __init__(self, trajectory_dir: str, reset_on_run: bool = True, screenshot_dir: Optional[str] = None):
+    def __init__(
+        self, trajectory_dir: str, reset_on_run: bool = True, screenshot_dir: Optional[str] = None
+    ):
         """
         Initialize trajectory saver.
         Args:
             trajectory_dir: Base directory to save trajectories
             reset_on_run: If True, reset trajectory_id/turn/artifact on each run.
@@ -129,7 +136,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
         self.reset_on_run = reset_on_run
         # Optional directory to store extracted screenshots from metadata/new_items
         self.screenshot_dir: Optional[Path] = Path(screenshot_dir) if screenshot_dir else None
         # Ensure trajectory directory exists
         self.trajectory_dir.mkdir(parents=True, exist_ok=True)
@@ -137,7 +144,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
         """Get the directory for the current turn."""
         if not self.trajectory_id:
             raise ValueError("Trajectory not initialized - call _on_run_start first")
         # format: trajectory_id/turn_000
         turn_dir = self.trajectory_dir / self.trajectory_id / f"turn_{self.current_turn:03d}"
         turn_dir.mkdir(parents=True, exist_ok=True)
@@ -166,6 +173,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
     def _update_usage(self, usage: Dict[str, Any]) -> None:
         """Update total usage statistics."""
         def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
             for key, value in source.items():
                 if isinstance(value, dict):
@@ -176,20 +184,21 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
                     if key not in target:
                         target[key] = 0
                     target[key] += value
         add_dicts(self.total_usage, usage)
     @override
     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
         """Initialize trajectory tracking for a new run."""
         model = kwargs.get("model", "unknown")
         # Only reset trajectory state if reset_on_run is True or no trajectory exists
         if self.reset_on_run or not self.trajectory_id:
             model_name_short = model.split("+")[-1].split("/")[-1].lower()[:16]
             if "+" in model:
                 model_name_short = model.split("+")[0].lower()[:4] + "_" + model_name_short
             # strip non-alphanumeric characters from model_name_short
-            model_name_short = ''.join(c for c in model_name_short if c.isalnum() or c == '_')
+            model_name_short = "".join(c for c in model_name_short if c.isalnum() or c == "_")
             # id format: yyyy-mm-dd_model_hhmmss_uuid[:4]
             now = datetime.now()
@@ -198,11 +207,11 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
             self.current_artifact = 0
             self.model = model
             self.total_usage = {}
             # Create trajectory directory
             trajectory_path = self.trajectory_dir / self.trajectory_id
             trajectory_path.mkdir(parents=True, exist_ok=True)
             # Save trajectory metadata (optionally extract screenshots to screenshot_dir)
             kwargs_to_save = kwargs.copy()
             try:
@@ -219,7 +228,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
                 "status": "running",
                 "kwargs": kwargs_to_save,
             }
             with open(trajectory_path / "metadata.json", "w") as f:
                 json.dump(metadata, f, indent=2)
         else:
@@ -227,22 +236,27 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
             self.model = model
     @override
-    async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
+    async def on_run_end(
+        self,
+        kwargs: Dict[str, Any],
+        old_items: List[Dict[str, Any]],
+        new_items: List[Dict[str, Any]],
+    ) -> None:
         """Finalize run tracking by updating metadata with completion status, usage, and new items."""
         if not self.trajectory_id:
             return
         # Update metadata with completion status, total usage, and new items
         trajectory_path = self.trajectory_dir / self.trajectory_id
         metadata_path = trajectory_path / "metadata.json"
         # Read existing metadata
         if metadata_path.exists():
             with open(metadata_path, "r") as f:
                 metadata = json.load(f)
         else:
             metadata = {}
         # Update metadata with completion info
         # Optionally extract screenshots from new_items before persisting
         new_items_to_save = new_items
@@ -251,32 +265,34 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
         except Exception:
             pass
-        metadata.update({
-            "status": "completed",
-            "completed_at": str(uuid.uuid1().time),
-            "total_usage": self.total_usage,
-            "new_items": new_items_to_save,
-            "total_turns": self.current_turn
-        })
+        metadata.update(
+            {
+                "status": "completed",
+                "completed_at": str(uuid.uuid1().time),
+                "total_usage": self.total_usage,
+                "new_items": new_items_to_save,
+                "total_turns": self.current_turn,
+            }
+        )
         # Save updated metadata
         with open(metadata_path, "w") as f:
             json.dump(metadata, f, indent=2)
-    @override
+    @override
     async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
         if not self.trajectory_id:
             return
-        self._save_artifact("api_start", { "kwargs": kwargs })
+        self._save_artifact("api_start", {"kwargs": kwargs})
     @override
     async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
         """Save API call result."""
         if not self.trajectory_id:
             return
-        self._save_artifact("api_result", { "kwargs": kwargs, "result": result })
+        self._save_artifact("api_result", {"kwargs": kwargs, "result": result})
     @override
     async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
@@ -295,77 +311,83 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
         """Save responses to the current turn directory and update usage statistics."""
         if not self.trajectory_id:
             return
         # Save responses
         turn_dir = self._get_turn_dir()
         response_data = {
             "timestamp": str(uuid.uuid1().time),
             "model": self.model,
             "kwargs": kwargs,
-            "response": responses
+            "response": responses,
         }
         self._save_artifact("agent_response", response_data)
         # Increment turn counter
         self.current_turn += 1
     def _draw_crosshair_on_image(self, image_bytes: bytes, x: int, y: int) -> bytes:
         """
         Draw a red dot and crosshair at the specified coordinates on the image.
         Args:
             image_bytes: The original image as bytes
             x: X coordinate for the crosshair
             y: Y coordinate for the crosshair
         Returns:
             Modified image as bytes with red dot and crosshair
         """
         # Open the image
         image = Image.open(io.BytesIO(image_bytes))
         draw = ImageDraw.Draw(image)
         # Draw crosshair lines (red, 2px thick)
         crosshair_size = 20
         line_width = 2
         color = "red"
         # Horizontal line
         draw.line([(x - crosshair_size, y), (x + crosshair_size, y)], fill=color, width=line_width)
         # Vertical line
         draw.line([(x, y - crosshair_size), (x, y + crosshair_size)], fill=color, width=line_width)
         # Draw center dot (filled circle)
         dot_radius = 3
-        draw.ellipse([(x - dot_radius, y - dot_radius), (x + dot_radius, y + dot_radius)], fill=color)
+        draw.ellipse(
+            [(x - dot_radius, y - dot_radius), (x + dot_radius, y + dot_radius)], fill=color
+        )
         # Convert back to bytes
         output = io.BytesIO()
-        image.save(output, format='PNG')
+        image.save(output, format="PNG")
         return output.getvalue()
     @override
-    async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
+    async def on_computer_call_end(
+        self, item: Dict[str, Any], result: List[Dict[str, Any]]
+    ) -> None:
         """
         Called when a computer call has completed.
         Saves screenshots and computer call output.
         """
         if not self.trajectory_id:
             return
-        self._save_artifact("computer_call_result", { "item": item, "result": result })
+        self._save_artifact("computer_call_result", {"item": item, "result": result})
         # Check if action has x/y coordinates and there's a screenshot in the result
         action = item.get("action", {})
         if "x" in action and "y" in action:
             # Look for screenshot in the result
             for result_item in result:
-                if (result_item.get("type") == "computer_call_output" and
-                    result_item.get("output", {}).get("type") == "input_image"):
+                if (
+                    result_item.get("type") == "computer_call_output"
+                    and result_item.get("output", {}).get("type") == "input_image"
+                ):
                     image_url = result_item["output"]["image_url"]
                     # Extract base64 image data
                     if image_url.startswith("data:image/"):
                         # Format: data:image/png;base64,<base64_data>
@@ -373,26 +395,24 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
                     else:
                         # Assume it's just base64 data
                         base64_data = image_url
                     try:
                         # Decode the image
                         image_bytes = base64.b64decode(base64_data)
                         # Draw crosshair at the action coordinates
                         annotated_image = self._draw_crosshair_on_image(
-                            image_bytes,
-                            int(action["x"]),
-                            int(action["y"])
+                            image_bytes, int(action["x"]), int(action["y"])
                         )
                         # Save as screenshot_action
                         self._save_artifact("screenshot_action", annotated_image)
                     except Exception as e:
                         # If annotation fails, just log and continue
                         print(f"Failed to annotate screenshot: {e}")
                     break  # Only process the first screenshot found
         # Increment turn counter
-        self.current_turn += 1
+        self.current_turn += 1

cua-agent 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.34py3-none-any.whl → 0.4.36py3-none-any.whl