PyPI - cua-agent - Versions diffs - 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl - Mend

cua-agent 0.4.34py3-none-any.whl → 0.4.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (61) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/huggingfacelocal_adapter.py +54 -61
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +14 -6
agent/adapters/models/generic.py +7 -4
agent/adapters/models/internvl.py +66 -30
agent/adapters/models/opencua.py +23 -8
agent/adapters/models/qwen2_5_vl.py +7 -4
agent/agent.py +184 -158
agent/callbacks/__init__.py +4 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +18 -13
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +3 -1
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/telemetry.py +67 -61
agent/callbacks/trajectory_saver.py +90 -70
agent/cli.py +115 -110
agent/computers/__init__.py +13 -8
agent/computers/base.py +32 -19
agent/computers/cua.py +33 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +235 -185
agent/integrations/hud/__init__.py +15 -21
agent/integrations/hud/agent.py +101 -83
agent/integrations/hud/proxy.py +90 -57
agent/loops/__init__.py +25 -21
agent/loops/anthropic.py +537 -483
agent/loops/base.py +13 -14
agent/loops/composed_grounded.py +135 -149
agent/loops/gemini.py +31 -12
agent/loops/glm45v.py +135 -133
agent/loops/gta1.py +47 -50
agent/loops/holo.py +4 -2
agent/loops/internvl.py +6 -11
agent/loops/moondream3.py +36 -12
agent/loops/omniparser.py +215 -210
agent/loops/openai.py +49 -50
agent/loops/opencua.py +29 -41
agent/loops/qwen.py +510 -0
agent/loops/uitars.py +237 -202
agent/proxy/examples.py +54 -50
agent/proxy/handlers.py +27 -34
agent/responses.py +330 -330
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +23 -18
agent/ui/gradio/ui_components.py +310 -161
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/METADATA +18 -10
cua_agent-0.4.36.dist-info/RECORD +64 -0
cua_agent-0.4.34.dist-info/RECORD +0 -63
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/WHEEL +0 -0
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/entry_points.txt +0 -0

agent/callbacks/pii_anonymization.py CHANGED Viewed

@@ -2,38 +2,41 @@
 PII anonymization callback handler using Microsoft Presidio for text and image redaction.
 """
-from typing import List, Dict, Any, Optional, Tuple
-from .base import AsyncCallbackHandler
 import base64
 import io
 import logging
+from typing import Any, Dict, List, Optional, Tuple
+from .base import AsyncCallbackHandler
 try:
     # TODO: Add Presidio dependencies
     from PIL import Image
     PRESIDIO_AVAILABLE = True
 except ImportError:
     PRESIDIO_AVAILABLE = False
 logger = logging.getLogger(__name__)
 class PIIAnonymizationCallback(AsyncCallbackHandler):
     """
     Callback handler that anonymizes PII in text and images using Microsoft Presidio.
     This handler:
     1. Anonymizes PII in messages before sending to the agent loop
     2. Deanonymizes PII in tool calls and message outputs after the agent loop
     3. Redacts PII from images in computer_call_output messages
     """
     def __init__(
         self,
         # TODO: Any extra kwargs if needed
     ):
         """
         Initialize the PII anonymization callback.
         Args:
             anonymize_text: Whether to anonymize text content
             anonymize_images: Whether to redact images
@@ -46,16 +49,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
                 "Presidio is not available. Install with: "
                 "pip install cua-agent[pii-anonymization]"
             )
         # TODO: Implement __init__
     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Anonymize PII in messages before sending to agent loop.
         Args:
             messages: List of message dictionaries
         Returns:
             List of messages with PII anonymized
         """
@@ -63,16 +66,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
         for msg in messages:
             anonymized_msg = await self._anonymize_message(msg)
             anonymized_messages.append(anonymized_msg)
         return anonymized_messages
     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Deanonymize PII in tool calls and message outputs after agent loop.
         Args:
             output: List of output dictionaries
         Returns:
             List of output with PII deanonymized for tool calls
         """
@@ -84,13 +87,13 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
                 deanonymized_output.append(deanonymized_item)
             else:
                 deanonymized_output.append(item)
         return deanonymized_output
     async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: Implement _anonymize_message
         return message
     async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: Implement _deanonymize_item
         return item

agent/callbacks/telemetry.py CHANGED Viewed

@@ -2,17 +2,17 @@
 Telemetry callback handler for Computer-Use Agent (cua-agent)
 """
+import platform
 import time
 import uuid
-from typing import List, Dict, Any, Optional, Union
+from typing import Any, Dict, List, Optional, Union
-from .base import AsyncCallbackHandler
 from core.telemetry import (
-    record_event,
     is_telemetry_enabled,
+    record_event,
 )
-import platform
+from .base import AsyncCallbackHandler
 SYSTEM_INFO = {
     "os": platform.system().lower(),
@@ -20,32 +20,29 @@ SYSTEM_INFO = {
     "python_version": platform.python_version(),
 }
 class TelemetryCallback(AsyncCallbackHandler):
     """
     Telemetry callback handler for Computer-Use Agent (cua-agent)
     Tracks agent usage, performance metrics, and optionally trajectory data.
     """
-    def __init__(
-        self,
-        agent,
-        log_trajectory: bool = False
-    ):
+    def __init__(self, agent, log_trajectory: bool = False):
         """
         Initialize telemetry callback.
         Args:
             agent: The ComputerAgent instance
             log_trajectory: Whether to log full trajectory items (opt-in)
         """
         self.agent = agent
         self.log_trajectory = log_trajectory
         # Generate session/run IDs
         self.session_id = str(uuid.uuid4())
         self.run_id = None
         # Track timing and metrics
         self.run_start_time = None
         self.step_count = 0
@@ -54,126 +51,133 @@ class TelemetryCallback(AsyncCallbackHandler):
             "prompt_tokens": 0,
             "completion_tokens": 0,
             "total_tokens": 0,
-            "response_cost": 0.0
+            "response_cost": 0.0,
         }
         # Record agent initialization
         if is_telemetry_enabled():
             self._record_agent_initialization()
     def _record_agent_initialization(self) -> None:
         """Record agent type/model and session initialization."""
         agent_info = {
             "session_id": self.session_id,
-            "agent_type": self.agent.agent_loop.__name__ if hasattr(self.agent, 'agent_loop') else 'unknown',
-            "model": getattr(self.agent, 'model', 'unknown'),
-            **SYSTEM_INFO
+            "agent_type": (
+                self.agent.agent_loop.__name__ if hasattr(self.agent, "agent_loop") else "unknown"
+            ),
+            "model": getattr(self.agent, "model", "unknown"),
+            **SYSTEM_INFO,
         }
         record_event("agent_session_start", agent_info)
     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
         """Called at the start of an agent run loop."""
         if not is_telemetry_enabled():
             return
         self.run_id = str(uuid.uuid4())
         self.run_start_time = time.time()
         self.step_count = 0
         # Calculate input context size
         input_context_size = self._calculate_context_size(old_items)
         run_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "start_time": self.run_start_time,
             "input_context_size": input_context_size,
-            "num_existing_messages": len(old_items)
+            "num_existing_messages": len(old_items),
         }
         # Log trajectory if opted in
         if self.log_trajectory:
             trajectory = self._extract_trajectory(old_items)
             if trajectory:
                 run_data["uploaded_trajectory"] = trajectory
         record_event("agent_run_start", run_data)
-    async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
+    async def on_run_end(
+        self,
+        kwargs: Dict[str, Any],
+        old_items: List[Dict[str, Any]],
+        new_items: List[Dict[str, Any]],
+    ) -> None:
         """Called at the end of an agent run loop."""
         if not is_telemetry_enabled() or not self.run_start_time:
             return
         run_duration = time.time() - self.run_start_time
         run_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "end_time": time.time(),
             "duration_seconds": run_duration,
             "num_steps": self.step_count,
-            "total_usage": self.total_usage.copy()
+            "total_usage": self.total_usage.copy(),
         }
         # Log trajectory if opted in
         if self.log_trajectory:
             trajectory = self._extract_trajectory(new_items)
             if trajectory:
                 run_data["uploaded_trajectory"] = trajectory
         record_event("agent_run_end", run_data)
     async def on_usage(self, usage: Dict[str, Any]) -> None:
         """Called when usage information is received."""
         if not is_telemetry_enabled():
             return
         # Accumulate usage stats
         self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
-        self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
+        self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
         self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
         self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
         # Record individual usage event
         usage_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "step": self.step_count,
-            **usage
+            **usage,
         }
         record_event("agent_usage", usage_data)
     async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
         """Called when responses are received."""
         if not is_telemetry_enabled():
             return
         self.step_count += 1
         step_duration = None
         if self.step_start_time:
             step_duration = time.time() - self.step_start_time
         self.step_start_time = time.time()
         step_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "step": self.step_count,
-            "timestamp": self.step_start_time
+            "timestamp": self.step_start_time,
         }
         if step_duration is not None:
             step_data["duration_seconds"] = step_duration
         record_event("agent_step", step_data)
     def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
         """Calculate approximate context size in tokens/characters."""
         total_size = 0
         for item in items:
             if item.get("type") == "message" and "content" in item:
                 content = item["content"]
@@ -185,25 +189,27 @@ class TelemetryCallback(AsyncCallbackHandler):
                             total_size += len(part["text"])
             elif "content" in item and isinstance(item["content"], str):
                 total_size += len(item["content"])
         return total_size
     def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Extract trajectory items that should be logged."""
         trajectory = []
         for item in items:
             # Include user messages, assistant messages, reasoning, computer calls, and computer outputs
             if (
-                item.get("role") == "user" or  # User inputs
-                (item.get("type") == "message" and item.get("role") == "assistant") or  # Model outputs
-                item.get("type") == "reasoning" or  # Reasoning traces
-                item.get("type") == "computer_call" or  # Computer actions
-                item.get("type") == "computer_call_output"  # Computer outputs
+                item.get("role") == "user"  # User inputs
+                or (
+                    item.get("type") == "message" and item.get("role") == "assistant"
+                )  # Model outputs
+                or item.get("type") == "reasoning"  # Reasoning traces
+                or item.get("type") == "computer_call"  # Computer actions
+                or item.get("type") == "computer_call_output"  # Computer outputs
             ):
                 # Create a copy of the item with timestamp
                 trajectory_item = item.copy()
                 trajectory_item["logged_at"] = time.time()
                 trajectory.append(trajectory_item)
-        return trajectory
+        return trajectory

cua-agent 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.34py3-none-any.whl → 0.4.36py3-none-any.whl