PyPI - cua-agent - Versions diffs - 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

cua-agent 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show

agent/__init__.py +21 -12
agent/__main__.py +21 -0
agent/adapters/__init__.py +9 -0
agent/adapters/huggingfacelocal_adapter.py +229 -0
agent/agent.py +594 -0
agent/callbacks/__init__.py +19 -0
agent/callbacks/base.py +153 -0
agent/callbacks/budget_manager.py +44 -0
agent/callbacks/image_retention.py +139 -0
agent/callbacks/logging.py +247 -0
agent/callbacks/pii_anonymization.py +259 -0
agent/callbacks/telemetry.py +210 -0
agent/callbacks/trajectory_saver.py +305 -0
agent/cli.py +297 -0
agent/computer_handler.py +107 -0
agent/decorators.py +90 -0
agent/loops/__init__.py +11 -0
agent/loops/anthropic.py +728 -0
agent/loops/omniparser.py +339 -0
agent/loops/openai.py +95 -0
agent/loops/uitars.py +688 -0
agent/responses.py +207 -0
agent/telemetry.py +135 -14
agent/types.py +79 -0
agent/ui/__init__.py +7 -1
agent/ui/__main__.py +2 -13
agent/ui/gradio/__init__.py +6 -19
agent/ui/gradio/app.py +94 -1313
agent/ui/gradio/ui_components.py +721 -0
cua_agent-0.4.0.dist-info/METADATA +424 -0
cua_agent-0.4.0.dist-info/RECORD +33 -0
agent/core/__init__.py +0 -27
agent/core/agent.py +0 -210
agent/core/base.py +0 -217
agent/core/callbacks.py +0 -200
agent/core/experiment.py +0 -249
agent/core/factory.py +0 -122
agent/core/messages.py +0 -332
agent/core/provider_config.py +0 -21
agent/core/telemetry.py +0 -142
agent/core/tools/__init__.py +0 -21
agent/core/tools/base.py +0 -74
agent/core/tools/bash.py +0 -52
agent/core/tools/collection.py +0 -46
agent/core/tools/computer.py +0 -113
agent/core/tools/edit.py +0 -67
agent/core/tools/manager.py +0 -56
agent/core/tools.py +0 -32
agent/core/types.py +0 -88
agent/core/visualization.py +0 -197
agent/providers/__init__.py +0 -4
agent/providers/anthropic/__init__.py +0 -6
agent/providers/anthropic/api/client.py +0 -360
agent/providers/anthropic/api/logging.py +0 -150
agent/providers/anthropic/api_handler.py +0 -140
agent/providers/anthropic/callbacks/__init__.py +0 -5
agent/providers/anthropic/callbacks/manager.py +0 -65
agent/providers/anthropic/loop.py +0 -568
agent/providers/anthropic/prompts.py +0 -23
agent/providers/anthropic/response_handler.py +0 -226
agent/providers/anthropic/tools/__init__.py +0 -33
agent/providers/anthropic/tools/base.py +0 -88
agent/providers/anthropic/tools/bash.py +0 -66
agent/providers/anthropic/tools/collection.py +0 -34
agent/providers/anthropic/tools/computer.py +0 -396
agent/providers/anthropic/tools/edit.py +0 -326
agent/providers/anthropic/tools/manager.py +0 -54
agent/providers/anthropic/tools/run.py +0 -42
agent/providers/anthropic/types.py +0 -16
agent/providers/anthropic/utils.py +0 -381
agent/providers/omni/__init__.py +0 -8
agent/providers/omni/api_handler.py +0 -42
agent/providers/omni/clients/anthropic.py +0 -103
agent/providers/omni/clients/base.py +0 -35
agent/providers/omni/clients/oaicompat.py +0 -195
agent/providers/omni/clients/ollama.py +0 -122
agent/providers/omni/clients/openai.py +0 -155
agent/providers/omni/clients/utils.py +0 -25
agent/providers/omni/image_utils.py +0 -34
agent/providers/omni/loop.py +0 -990
agent/providers/omni/parser.py +0 -307
agent/providers/omni/prompts.py +0 -64
agent/providers/omni/tools/__init__.py +0 -30
agent/providers/omni/tools/base.py +0 -29
agent/providers/omni/tools/bash.py +0 -74
agent/providers/omni/tools/computer.py +0 -179
agent/providers/omni/tools/manager.py +0 -61
agent/providers/omni/utils.py +0 -236
agent/providers/openai/__init__.py +0 -6
agent/providers/openai/api_handler.py +0 -456
agent/providers/openai/loop.py +0 -472
agent/providers/openai/response_handler.py +0 -205
agent/providers/openai/tools/__init__.py +0 -15
agent/providers/openai/tools/base.py +0 -79
agent/providers/openai/tools/computer.py +0 -326
agent/providers/openai/tools/manager.py +0 -106
agent/providers/openai/types.py +0 -36
agent/providers/openai/utils.py +0 -98
agent/providers/uitars/__init__.py +0 -1
agent/providers/uitars/clients/base.py +0 -35
agent/providers/uitars/clients/mlxvlm.py +0 -263
agent/providers/uitars/clients/oaicompat.py +0 -214
agent/providers/uitars/loop.py +0 -660
agent/providers/uitars/prompts.py +0 -63
agent/providers/uitars/tools/__init__.py +0 -1
agent/providers/uitars/tools/computer.py +0 -283
agent/providers/uitars/tools/manager.py +0 -60
agent/providers/uitars/utils.py +0 -264
cua_agent-0.3.2.dist-info/METADATA +0 -295
cua_agent-0.3.2.dist-info/RECORD +0 -87
{cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +0 -0
{cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0

agent/callbacks/pii_anonymization.py ADDED Viewed

@@ -0,0 +1,259 @@
+"""
+PII anonymization callback handler using Microsoft Presidio for text and image redaction.
+"""
+from typing import List, Dict, Any, Optional, Tuple
+from .base import AsyncCallbackHandler
+import base64
+import io
+import logging
+try:
+    from presidio_analyzer import AnalyzerEngine
+    from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine
+    from presidio_anonymizer.entities import RecognizerResult, OperatorConfig
+    from presidio_image_redactor import ImageRedactorEngine
+    from PIL import Image
+    PRESIDIO_AVAILABLE = True
+except ImportError:
+    PRESIDIO_AVAILABLE = False
+logger = logging.getLogger(__name__)
+class PIIAnonymizationCallback(AsyncCallbackHandler):
+    """
+    Callback handler that anonymizes PII in text and images using Microsoft Presidio.
+    This handler:
+    1. Anonymizes PII in messages before sending to the agent loop
+    2. Deanonymizes PII in tool calls and message outputs after the agent loop
+    3. Redacts PII from images in computer_call_output messages
+    """
+    def __init__(
+        self,
+        anonymize_text: bool = True,
+        anonymize_images: bool = True,
+        entities_to_anonymize: Optional[List[str]] = None,
+        anonymization_operator: str = "replace",
+        image_redaction_color: Tuple[int, int, int] = (255, 192, 203)  # Pink
+    ):
+        """
+        Initialize the PII anonymization callback.
+        Args:
+            anonymize_text: Whether to anonymize text content
+            anonymize_images: Whether to redact images
+            entities_to_anonymize: List of entity types to anonymize (None for all)
+            anonymization_operator: Presidio operator to use ("replace", "mask", "redact", etc.)
+            image_redaction_color: RGB color for image redaction
+        """
+        if not PRESIDIO_AVAILABLE:
+            raise ImportError(
+                "Presidio is not available. Install with: "
+                "pip install presidio-analyzer presidio-anonymizer presidio-image-redactor"
+            )
+        self.anonymize_text = anonymize_text
+        self.anonymize_images = anonymize_images
+        self.entities_to_anonymize = entities_to_anonymize
+        self.anonymization_operator = anonymization_operator
+        self.image_redaction_color = image_redaction_color
+        # Initialize Presidio engines
+        self.analyzer = AnalyzerEngine()
+        self.anonymizer = AnonymizerEngine()
+        self.deanonymizer = DeanonymizeEngine()
+        self.image_redactor = ImageRedactorEngine()
+        # Store anonymization mappings for deanonymization
+        self.anonymization_mappings: Dict[str, Any] = {}
+    async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Anonymize PII in messages before sending to agent loop.
+        Args:
+            messages: List of message dictionaries
+        Returns:
+            List of messages with PII anonymized
+        """
+        if not self.anonymize_text and not self.anonymize_images:
+            return messages
+        anonymized_messages = []
+        for msg in messages:
+            anonymized_msg = await self._anonymize_message(msg)
+            anonymized_messages.append(anonymized_msg)
+        return anonymized_messages
+    async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Deanonymize PII in tool calls and message outputs after agent loop.
+        Args:
+            output: List of output dictionaries
+        Returns:
+            List of output with PII deanonymized for tool calls
+        """
+        if not self.anonymize_text:
+            return output
+        deanonymized_output = []
+        for item in output:
+            # Only deanonymize tool calls and computer_call messages
+            if item.get("type") in ["computer_call", "computer_call_output"]:
+                deanonymized_item = await self._deanonymize_item(item)
+                deanonymized_output.append(deanonymized_item)
+            else:
+                deanonymized_output.append(item)
+        return deanonymized_output
+    async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
+        """Anonymize PII in a single message."""
+        msg_copy = message.copy()
+        # Anonymize text content
+        if self.anonymize_text:
+            msg_copy = await self._anonymize_text_content(msg_copy)
+        # Redact images in computer_call_output
+        if self.anonymize_images and msg_copy.get("type") == "computer_call_output":
+            msg_copy = await self._redact_image_content(msg_copy)
+        return msg_copy
+    async def _anonymize_text_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
+        """Anonymize text content in a message."""
+        msg_copy = message.copy()
+        # Handle content array
+        content = msg_copy.get("content", [])
+        if isinstance(content, str):
+            anonymized_text, _ = await self._anonymize_text(content)
+            msg_copy["content"] = anonymized_text
+        elif isinstance(content, list):
+            anonymized_content = []
+            for item in content:
+                if isinstance(item, dict) and item.get("type") == "text":
+                    text = item.get("text", "")
+                    anonymized_text, _ = await self._anonymize_text(text)
+                    item_copy = item.copy()
+                    item_copy["text"] = anonymized_text
+                    anonymized_content.append(item_copy)
+                else:
+                    anonymized_content.append(item)
+            msg_copy["content"] = anonymized_content
+        return msg_copy
+    async def _redact_image_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
+        """Redact PII from images in computer_call_output messages."""
+        msg_copy = message.copy()
+        output = msg_copy.get("output", {})
+        if isinstance(output, dict) and "image_url" in output:
+            try:
+                # Extract base64 image data
+                image_url = output["image_url"]
+                if image_url.startswith("data:image/"):
+                    # Parse data URL
+                    header, data = image_url.split(",", 1)
+                    image_data = base64.b64decode(data)
+                    # Load image with PIL
+                    image = Image.open(io.BytesIO(image_data))
+                    # Redact PII from image
+                    redacted_image = self.image_redactor.redact(image, self.image_redaction_color)
+                    # Convert back to base64
+                    buffer = io.BytesIO()
+                    redacted_image.save(buffer, format="PNG")
+                    redacted_data = base64.b64encode(buffer.getvalue()).decode()
+                    # Update image URL
+                    output_copy = output.copy()
+                    output_copy["image_url"] = f"data:image/png;base64,{redacted_data}"
+                    msg_copy["output"] = output_copy
+            except Exception as e:
+                logger.warning(f"Failed to redact image: {e}")
+        return msg_copy
+    async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
+        """Deanonymize PII in tool calls and computer outputs."""
+        item_copy = item.copy()
+        # Handle computer_call arguments
+        if item.get("type") == "computer_call":
+            args = item_copy.get("args", {})
+            if isinstance(args, dict):
+                deanonymized_args = {}
+                for key, value in args.items():
+                    if isinstance(value, str):
+                        deanonymized_value, _ = await self._deanonymize_text(value)
+                        deanonymized_args[key] = deanonymized_value
+                    else:
+                        deanonymized_args[key] = value
+                item_copy["args"] = deanonymized_args
+        return item_copy
+    async def _anonymize_text(self, text: str) -> Tuple[str, List[RecognizerResult]]:
+        """Anonymize PII in text and return the anonymized text and results."""
+        if not text.strip():
+            return text, []
+        try:
+            # Analyze text for PII
+            analyzer_results = self.analyzer.analyze(
+                text=text,
+                entities=self.entities_to_anonymize,
+                language="en"
+            )
+            if not analyzer_results:
+                return text, []
+            # Anonymize the text
+            anonymized_result = self.anonymizer.anonymize(
+                text=text,
+                analyzer_results=analyzer_results,
+                operators={entity_type: OperatorConfig(self.anonymization_operator)
+                          for entity_type in set(result.entity_type for result in analyzer_results)}
+            )
+            # Store mapping for deanonymization
+            mapping_key = str(hash(text))
+            self.anonymization_mappings[mapping_key] = {
+                "original": text,
+                "anonymized": anonymized_result.text,
+                "results": analyzer_results
+            }
+            return anonymized_result.text, analyzer_results
+        except Exception as e:
+            logger.warning(f"Failed to anonymize text: {e}")
+            return text, []
+    async def _deanonymize_text(self, text: str) -> Tuple[str, bool]:
+        """Attempt to deanonymize text using stored mappings."""
+        try:
+            # Look for matching anonymized text in mappings
+            for mapping_key, mapping in self.anonymization_mappings.items():
+                if mapping["anonymized"] == text:
+                    return mapping["original"], True
+            # If no mapping found, return original text
+            return text, False
+        except Exception as e:
+            logger.warning(f"Failed to deanonymize text: {e}")
+            return text, False

agent/callbacks/telemetry.py ADDED Viewed

@@ -0,0 +1,210 @@
+"""
+Telemetry callback handler for Computer-Use Agent (cua-agent)
+"""
+import time
+import uuid
+from typing import List, Dict, Any, Optional, Union
+from .base import AsyncCallbackHandler
+from ..telemetry import (
+    record_event,
+    is_telemetry_enabled,
+    set_dimension,
+    SYSTEM_INFO,
+)
+class TelemetryCallback(AsyncCallbackHandler):
+    """
+    Telemetry callback handler for Computer-Use Agent (cua-agent)
+    Tracks agent usage, performance metrics, and optionally trajectory data.
+    """
+    def __init__(
+        self,
+        agent,
+        log_trajectory: bool = False
+    ):
+        """
+        Initialize telemetry callback.
+        Args:
+            agent: The ComputerAgent instance
+            log_trajectory: Whether to log full trajectory items (opt-in)
+        """
+        self.agent = agent
+        self.log_trajectory = log_trajectory
+        # Generate session/run IDs
+        self.session_id = str(uuid.uuid4())
+        self.run_id = None
+        # Track timing and metrics
+        self.run_start_time = None
+        self.step_count = 0
+        self.step_start_time = None
+        self.total_usage = {
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0,
+            "response_cost": 0.0
+        }
+        # Record agent initialization
+        if is_telemetry_enabled():
+            self._record_agent_initialization()
+    def _record_agent_initialization(self) -> None:
+        """Record agent type/model and session initialization."""
+        agent_info = {
+            "session_id": self.session_id,
+            "agent_type": self.agent.agent_loop.__name__,
+            "model": getattr(self.agent, 'model', 'unknown'),
+            **SYSTEM_INFO
+        }
+        # Set session-level dimensions
+        set_dimension("session_id", self.session_id)
+        set_dimension("agent_type", agent_info["agent_type"])
+        set_dimension("model", agent_info["model"])
+        record_event("agent_session_start", agent_info)
+    async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
+        """Called at the start of an agent run loop."""
+        if not is_telemetry_enabled():
+            return
+        self.run_id = str(uuid.uuid4())
+        self.run_start_time = time.time()
+        self.step_count = 0
+        # Calculate input context size
+        input_context_size = self._calculate_context_size(old_items)
+        run_data = {
+            "session_id": self.session_id,
+            "run_id": self.run_id,
+            "start_time": self.run_start_time,
+            "input_context_size": input_context_size,
+            "num_existing_messages": len(old_items)
+        }
+        # Log trajectory if opted in
+        if self.log_trajectory:
+            trajectory = self._extract_trajectory(old_items)
+            if trajectory:
+                run_data["uploaded_trajectory"] = trajectory
+        set_dimension("run_id", self.run_id)
+        record_event("agent_run_start", run_data)
+    async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
+        """Called at the end of an agent run loop."""
+        if not is_telemetry_enabled() or not self.run_start_time:
+            return
+        run_duration = time.time() - self.run_start_time
+        run_data = {
+            "session_id": self.session_id,
+            "run_id": self.run_id,
+            "end_time": time.time(),
+            "duration_seconds": run_duration,
+            "num_steps": self.step_count,
+            "total_usage": self.total_usage.copy()
+        }
+        # Log trajectory if opted in
+        if self.log_trajectory:
+            trajectory = self._extract_trajectory(new_items)
+            if trajectory:
+                run_data["uploaded_trajectory"] = trajectory
+        record_event("agent_run_end", run_data)
+    async def on_usage(self, usage: Dict[str, Any]) -> None:
+        """Called when usage information is received."""
+        if not is_telemetry_enabled():
+            return
+        # Accumulate usage stats
+        self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
+        self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
+        self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
+        self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
+        # Record individual usage event
+        usage_data = {
+            "session_id": self.session_id,
+            "run_id": self.run_id,
+            "step": self.step_count,
+            **usage
+        }
+        record_event("agent_usage", usage_data)
+    async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
+        """Called when responses are received."""
+        if not is_telemetry_enabled():
+            return
+        self.step_count += 1
+        step_duration = None
+        if self.step_start_time:
+            step_duration = time.time() - self.step_start_time
+        self.step_start_time = time.time()
+        step_data = {
+            "session_id": self.session_id,
+            "run_id": self.run_id,
+            "step": self.step_count,
+            "timestamp": self.step_start_time
+        }
+        if step_duration is not None:
+            step_data["duration_seconds"] = step_duration
+        record_event("agent_step", step_data)
+    def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
+        """Calculate approximate context size in tokens/characters."""
+        total_size = 0
+        for item in items:
+            if item.get("type") == "message" and "content" in item:
+                content = item["content"]
+                if isinstance(content, str):
+                    total_size += len(content)
+                elif isinstance(content, list):
+                    for part in content:
+                        if isinstance(part, dict) and "text" in part:
+                            total_size += len(part["text"])
+            elif "content" in item and isinstance(item["content"], str):
+                total_size += len(item["content"])
+        return total_size
+    def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Extract trajectory items that should be logged."""
+        trajectory = []
+        for item in items:
+            # Include user messages, assistant messages, reasoning, computer calls, and computer outputs
+            if (
+                item.get("role") == "user" or  # User inputs
+                (item.get("type") == "message" and item.get("role") == "assistant") or  # Model outputs
+                item.get("type") == "reasoning" or  # Reasoning traces
+                item.get("type") == "computer_call" or  # Computer actions
+                item.get("type") == "computer_call_output"  # Computer outputs
+            ):
+                # Create a copy of the item with timestamp
+                trajectory_item = item.copy()
+                trajectory_item["logged_at"] = time.time()
+                trajectory.append(trajectory_item)
+        return trajectory

cua-agent 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

cua-agent 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl