PyPI - cua-agent - Versions diffs - 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/__init__.py +4 -0
agent/adapters/azure_ml_adapter.py +283 -0
agent/adapters/cua_adapter.py +161 -0
agent/adapters/huggingfacelocal_adapter.py +67 -125
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +41 -0
agent/adapters/models/generic.py +78 -0
agent/adapters/models/internvl.py +290 -0
agent/adapters/models/opencua.py +115 -0
agent/adapters/models/qwen2_5_vl.py +78 -0
agent/agent.py +337 -185
agent/callbacks/__init__.py +9 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +54 -98
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +35 -33
agent/callbacks/otel.py +291 -0
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/prompt_instructions.py +47 -0
agent/callbacks/telemetry.py +99 -61
agent/callbacks/trajectory_saver.py +95 -69
agent/cli.py +269 -119
agent/computers/__init__.py +14 -9
agent/computers/base.py +32 -19
agent/computers/cua.py +52 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +359 -235
agent/integrations/hud/__init__.py +38 -99
agent/integrations/hud/agent.py +369 -0
agent/integrations/hud/proxy.py +166 -52
agent/loops/__init__.py +44 -14
agent/loops/anthropic.py +579 -492
agent/loops/base.py +19 -15
agent/loops/composed_grounded.py +136 -150
agent/loops/fara/__init__.py +8 -0
agent/loops/fara/config.py +506 -0
agent/loops/fara/helpers.py +357 -0
agent/loops/fara/schema.py +143 -0
agent/loops/gelato.py +183 -0
agent/loops/gemini.py +935 -0
agent/loops/generic_vlm.py +601 -0
agent/loops/glm45v.py +140 -135
agent/loops/gta1.py +48 -51
agent/loops/holo.py +218 -0
agent/loops/internvl.py +180 -0
agent/loops/moondream3.py +493 -0
agent/loops/omniparser.py +326 -226
agent/loops/openai.py +50 -51
agent/loops/opencua.py +134 -0
agent/loops/uiins.py +175 -0
agent/loops/uitars.py +247 -206
agent/loops/uitars2.py +951 -0
agent/playground/__init__.py +5 -0
agent/playground/server.py +301 -0
agent/proxy/examples.py +61 -57
agent/proxy/handlers.py +46 -39
agent/responses.py +447 -347
agent/tools/__init__.py +24 -0
agent/tools/base.py +253 -0
agent/tools/browser_tool.py +423 -0
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +25 -22
agent/ui/gradio/ui_components.py +314 -167
cua_agent-0.7.16.dist-info/METADATA +85 -0
cua_agent-0.7.16.dist-info/RECORD +79 -0
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
cua_agent-0.4.22.dist-info/METADATA +0 -436
cua_agent-0.4.22.dist-info/RECORD +0 -51
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0

agent/callbacks/pii_anonymization.py CHANGED Viewed

@@ -2,38 +2,41 @@
 PII anonymization callback handler using Microsoft Presidio for text and image redaction.
 """
-from typing import List, Dict, Any, Optional, Tuple
-from .base import AsyncCallbackHandler
 import base64
 import io
 import logging
+from typing import Any, Dict, List, Optional, Tuple
+from .base import AsyncCallbackHandler
 try:
     # TODO: Add Presidio dependencies
     from PIL import Image
     PRESIDIO_AVAILABLE = True
 except ImportError:
     PRESIDIO_AVAILABLE = False
 logger = logging.getLogger(__name__)
 class PIIAnonymizationCallback(AsyncCallbackHandler):
     """
     Callback handler that anonymizes PII in text and images using Microsoft Presidio.
     This handler:
     1. Anonymizes PII in messages before sending to the agent loop
     2. Deanonymizes PII in tool calls and message outputs after the agent loop
     3. Redacts PII from images in computer_call_output messages
     """
     def __init__(
         self,
         # TODO: Any extra kwargs if needed
     ):
         """
         Initialize the PII anonymization callback.
         Args:
             anonymize_text: Whether to anonymize text content
             anonymize_images: Whether to redact images
@@ -46,16 +49,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
                 "Presidio is not available. Install with: "
                 "pip install cua-agent[pii-anonymization]"
             )
         # TODO: Implement __init__
     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Anonymize PII in messages before sending to agent loop.
         Args:
             messages: List of message dictionaries
         Returns:
             List of messages with PII anonymized
         """
@@ -63,16 +66,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
         for msg in messages:
             anonymized_msg = await self._anonymize_message(msg)
             anonymized_messages.append(anonymized_msg)
         return anonymized_messages
     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Deanonymize PII in tool calls and message outputs after agent loop.
         Args:
             output: List of output dictionaries
         Returns:
             List of output with PII deanonymized for tool calls
         """
@@ -84,13 +87,13 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
                 deanonymized_output.append(deanonymized_item)
             else:
                 deanonymized_output.append(item)
         return deanonymized_output
     async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: Implement _anonymize_message
         return message
     async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: Implement _deanonymize_item
         return item

agent/callbacks/prompt_instructions.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""
+Prompt instructions callback.
+This callback allows simple prompt engineering by pre-pending a user
+instructions message to the start of the conversation before each LLM call.
+Usage:
+    from agent.callbacks import PromptInstructionsCallback
+    agent = ComputerAgent(
+        model="openai/computer-use-preview",
+        callbacks=[PromptInstructionsCallback("Follow these rules...")]
+    )
+"""
+from typing import Any, Dict, List, Optional
+from .base import AsyncCallbackHandler
+class PromptInstructionsCallback(AsyncCallbackHandler):
+    """
+    Prepend a user instructions message to the message list.
+    This is a minimal, non-invasive way to guide the agent's behavior without
+    modifying agent loops or tools. It works with any provider/loop since it
+    only alters the messages array before sending to the model.
+    """
+    def __init__(self, instructions: Optional[str]) -> None:
+        self.instructions = instructions
+    async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        # Pre-pend instructions message
+        if not self.instructions:
+            return messages
+        # Ensure we don't duplicate if already present at the front
+        if messages and isinstance(messages[0], dict):
+            first = messages[0]
+            if first.get("role") == "user" and first.get("content") == self.instructions:
+                return messages
+        return [
+            {"role": "user", "content": self.instructions},
+        ] + messages

agent/callbacks/telemetry.py CHANGED Viewed

@@ -2,17 +2,17 @@
 Telemetry callback handler for Computer-Use Agent (cua-agent)
 """
+import platform
 import time
 import uuid
-from typing import List, Dict, Any, Optional, Union
+from typing import Any, Dict, List, Optional, Union
-from .base import AsyncCallbackHandler
 from core.telemetry import (
-    record_event,
     is_telemetry_enabled,
+    record_event,
 )
-import platform
+from .base import AsyncCallbackHandler
 SYSTEM_INFO = {
     "os": platform.system().lower(),
@@ -20,32 +20,29 @@ SYSTEM_INFO = {
     "python_version": platform.python_version(),
 }
 class TelemetryCallback(AsyncCallbackHandler):
     """
     Telemetry callback handler for Computer-Use Agent (cua-agent)
     Tracks agent usage, performance metrics, and optionally trajectory data.
     """
-    def __init__(
-        self,
-        agent,
-        log_trajectory: bool = False
-    ):
+    def __init__(self, agent, log_trajectory: bool = False):
         """
         Initialize telemetry callback.
         Args:
             agent: The ComputerAgent instance
             log_trajectory: Whether to log full trajectory items (opt-in)
         """
         self.agent = agent
         self.log_trajectory = log_trajectory
         # Generate session/run IDs
         self.session_id = str(uuid.uuid4())
         self.run_id = None
         # Track timing and metrics
         self.run_start_time = None
         self.step_count = 0
@@ -54,126 +51,165 @@ class TelemetryCallback(AsyncCallbackHandler):
             "prompt_tokens": 0,
             "completion_tokens": 0,
             "total_tokens": 0,
-            "response_cost": 0.0
+            "response_cost": 0.0,
         }
         # Record agent initialization
         if is_telemetry_enabled():
             self._record_agent_initialization()
     def _record_agent_initialization(self) -> None:
         """Record agent type/model and session initialization."""
+        # Get the agent loop type (class name)
+        agent_type = "unknown"
+        if hasattr(self.agent, "agent_loop") and self.agent.agent_loop is not None:
+            agent_type = type(self.agent.agent_loop).__name__
         agent_info = {
             "session_id": self.session_id,
-            "agent_type": self.agent.agent_loop.__name__ if hasattr(self.agent, 'agent_loop') else 'unknown',
-            "model": getattr(self.agent, 'model', 'unknown'),
-            **SYSTEM_INFO
+            "agent_type": agent_type,
+            "model": getattr(self.agent, "model", "unknown"),
+            **SYSTEM_INFO,
         }
+        # Include VM name if available
+        vm_name = self._get_vm_name()
+        if vm_name:
+            agent_info["vm_name"] = vm_name
         record_event("agent_session_start", agent_info)
     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
         """Called at the start of an agent run loop."""
         if not is_telemetry_enabled():
             return
         self.run_id = str(uuid.uuid4())
         self.run_start_time = time.time()
         self.step_count = 0
         # Calculate input context size
         input_context_size = self._calculate_context_size(old_items)
         run_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "start_time": self.run_start_time,
             "input_context_size": input_context_size,
-            "num_existing_messages": len(old_items)
+            "num_existing_messages": len(old_items),
         }
+        # Include VM name if available
+        vm_name = self._get_vm_name()
+        if vm_name:
+            run_data["vm_name"] = vm_name
         # Log trajectory if opted in
         if self.log_trajectory:
             trajectory = self._extract_trajectory(old_items)
             if trajectory:
                 run_data["uploaded_trajectory"] = trajectory
         record_event("agent_run_start", run_data)
-    async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
+    async def on_run_end(
+        self,
+        kwargs: Dict[str, Any],
+        old_items: List[Dict[str, Any]],
+        new_items: List[Dict[str, Any]],
+    ) -> None:
         """Called at the end of an agent run loop."""
         if not is_telemetry_enabled() or not self.run_start_time:
             return
         run_duration = time.time() - self.run_start_time
         run_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "end_time": time.time(),
             "duration_seconds": run_duration,
             "num_steps": self.step_count,
-            "total_usage": self.total_usage.copy()
+            "total_usage": self.total_usage.copy(),
         }
+        # Include VM name if available
+        vm_name = self._get_vm_name()
+        if vm_name:
+            run_data["vm_name"] = vm_name
         # Log trajectory if opted in
         if self.log_trajectory:
             trajectory = self._extract_trajectory(new_items)
             if trajectory:
                 run_data["uploaded_trajectory"] = trajectory
         record_event("agent_run_end", run_data)
     async def on_usage(self, usage: Dict[str, Any]) -> None:
         """Called when usage information is received."""
         if not is_telemetry_enabled():
             return
         # Accumulate usage stats
         self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
-        self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
+        self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
         self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
         self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
         # Record individual usage event
         usage_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "step": self.step_count,
-            **usage
+            **usage,
         }
         record_event("agent_usage", usage_data)
     async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
         """Called when responses are received."""
         if not is_telemetry_enabled():
             return
         self.step_count += 1
         step_duration = None
         if self.step_start_time:
             step_duration = time.time() - self.step_start_time
         self.step_start_time = time.time()
         step_data = {
             "session_id": self.session_id,
             "run_id": self.run_id,
             "step": self.step_count,
-            "timestamp": self.step_start_time
+            "timestamp": self.step_start_time,
         }
         if step_duration is not None:
             step_data["duration_seconds"] = step_duration
         record_event("agent_step", step_data)
+    def _get_vm_name(self) -> Optional[str]:
+        """Extract VM name from agent's computer handler if available."""
+        try:
+            if hasattr(self.agent, "computer_handler") and self.agent.computer_handler:
+                handler = self.agent.computer_handler
+                # Check if it's a cuaComputerHandler with a cua_computer
+                if hasattr(handler, "cua_computer"):
+                    computer = handler.cua_computer
+                    if hasattr(computer, "config") and hasattr(computer.config, "name"):
+                        return computer.config.name
+        except Exception:
+            pass
+        return None
     def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
         """Calculate approximate context size in tokens/characters."""
         total_size = 0
         for item in items:
             if item.get("type") == "message" and "content" in item:
                 content = item["content"]
@@ -185,25 +221,27 @@ class TelemetryCallback(AsyncCallbackHandler):
                             total_size += len(part["text"])
             elif "content" in item and isinstance(item["content"], str):
                 total_size += len(item["content"])
         return total_size
     def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Extract trajectory items that should be logged."""
         trajectory = []
         for item in items:
             # Include user messages, assistant messages, reasoning, computer calls, and computer outputs
             if (
-                item.get("role") == "user" or  # User inputs
-                (item.get("type") == "message" and item.get("role") == "assistant") or  # Model outputs
-                item.get("type") == "reasoning" or  # Reasoning traces
-                item.get("type") == "computer_call" or  # Computer actions
-                item.get("type") == "computer_call_output"  # Computer outputs
+                item.get("role") == "user"  # User inputs
+                or (
+                    item.get("type") == "message" and item.get("role") == "assistant"
+                )  # Model outputs
+                or item.get("type") == "reasoning"  # Reasoning traces
+                or item.get("type") == "computer_call"  # Computer actions
+                or item.get("type") == "computer_call_output"  # Computer outputs
             ):
                 # Create a copy of the item with timestamp
                 trajectory_item = item.copy()
                 trajectory_item["logged_at"] = time.time()
                 trajectory.append(trajectory_item)
-        return trajectory
+        return trajectory

cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl