PyPI - cua-agent - Versions diffs - 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

cua-agent 0.4.14py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (82) hide show

agent/__init__.py +4 -19
agent/__main__.py +2 -1
agent/adapters/__init__.py +6 -0
agent/adapters/azure_ml_adapter.py +283 -0
agent/adapters/cua_adapter.py +161 -0
agent/adapters/huggingfacelocal_adapter.py +67 -125
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +370 -0
agent/adapters/models/__init__.py +41 -0
agent/adapters/models/generic.py +78 -0
agent/adapters/models/internvl.py +290 -0
agent/adapters/models/opencua.py +115 -0
agent/adapters/models/qwen2_5_vl.py +78 -0
agent/agent.py +431 -241
agent/callbacks/__init__.py +10 -3
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +54 -98
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +140 -0
agent/callbacks/otel.py +291 -0
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/prompt_instructions.py +47 -0
agent/callbacks/telemetry.py +106 -69
agent/callbacks/trajectory_saver.py +178 -70
agent/cli.py +269 -119
agent/computers/__init__.py +14 -9
agent/computers/base.py +32 -19
agent/computers/cua.py +52 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +359 -235
agent/integrations/hud/__init__.py +164 -74
agent/integrations/hud/agent.py +338 -342
agent/integrations/hud/proxy.py +297 -0
agent/loops/__init__.py +44 -14
agent/loops/anthropic.py +590 -492
agent/loops/base.py +19 -15
agent/loops/composed_grounded.py +142 -144
agent/loops/fara/__init__.py +8 -0
agent/loops/fara/config.py +506 -0
agent/loops/fara/helpers.py +357 -0
agent/loops/fara/schema.py +143 -0
agent/loops/gelato.py +183 -0
agent/loops/gemini.py +935 -0
agent/loops/generic_vlm.py +601 -0
agent/loops/glm45v.py +140 -135
agent/loops/gta1.py +48 -51
agent/loops/holo.py +218 -0
agent/loops/internvl.py +180 -0
agent/loops/moondream3.py +493 -0
agent/loops/omniparser.py +326 -226
agent/loops/openai.py +63 -56
agent/loops/opencua.py +134 -0
agent/loops/uiins.py +175 -0
agent/loops/uitars.py +262 -212
agent/loops/uitars2.py +951 -0
agent/playground/__init__.py +5 -0
agent/playground/server.py +301 -0
agent/proxy/examples.py +196 -0
agent/proxy/handlers.py +255 -0
agent/responses.py +486 -339
agent/tools/__init__.py +24 -0
agent/tools/base.py +253 -0
agent/tools/browser_tool.py +423 -0
agent/types.py +20 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +25 -22
agent/ui/gradio/ui_components.py +314 -167
cua_agent-0.7.16.dist-info/METADATA +85 -0
cua_agent-0.7.16.dist-info/RECORD +79 -0
{cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
agent/integrations/hud/adapter.py +0 -121
agent/integrations/hud/computer_handler.py +0 -187
agent/telemetry.py +0 -142
cua_agent-0.4.14.dist-info/METADATA +0 -436
cua_agent-0.4.14.dist-info/RECORD +0 -50
{cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0

agent/callbacks/otel.py ADDED Viewed

@@ -0,0 +1,291 @@
+"""
+OpenTelemetry callback handler for Computer-Use Agent (cua-agent).
+Instruments agent operations for the Four Golden Signals:
+- Latency: Operation duration
+- Traffic: Operation counts
+- Errors: Error counts
+- Saturation: Concurrent operations
+"""
+import time
+from typing import Any, Dict, List, Optional
+from .base import AsyncCallbackHandler
+# Import OTEL functions - these are available when cua-core[telemetry] is installed
+try:
+    from core.telemetry import (
+        add_breadcrumb,
+        capture_exception,
+        create_span,
+        is_otel_enabled,
+        record_error,
+        record_operation,
+        record_tokens,
+        set_context,
+        track_concurrent,
+    )
+    OTEL_AVAILABLE = True
+except ImportError:
+    OTEL_AVAILABLE = False
+    def is_otel_enabled() -> bool:
+        return False
+class OtelCallback(AsyncCallbackHandler):
+    """
+    OpenTelemetry callback handler for instrumentation.
+    Tracks:
+    - Agent session lifecycle (start/end)
+    - Agent run lifecycle (start/end with duration)
+    - Individual steps (with duration)
+    - Computer actions (with duration)
+    - Token usage
+    - Errors
+    """
+    def __init__(self, agent: Any):
+        """
+        Initialize OTEL callback.
+        Args:
+            agent: The ComputerAgent instance
+        """
+        self.agent = agent
+        self.model = getattr(agent, "model", "unknown")
+        # Timing state
+        self.run_start_time: Optional[float] = None
+        self.step_start_time: Optional[float] = None
+        self.step_count = 0
+        # Span management
+        self._session_span: Optional[Any] = None
+        self._run_span: Optional[Any] = None
+        # Track concurrent sessions
+        self._concurrent_tracker: Optional[Any] = None
+        if OTEL_AVAILABLE and is_otel_enabled():
+            # Set context for all events
+            set_context(
+                "agent",
+                {
+                    "model": self.model,
+                    "agent_type": self._get_agent_type(),
+                },
+            )
+    def _get_agent_type(self) -> str:
+        """Get the agent loop type name."""
+        if hasattr(self.agent, "agent_loop") and self.agent.agent_loop is not None:
+            return type(self.agent.agent_loop).__name__
+        return "unknown"
+    async def on_run_start(
+        self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]
+    ) -> None:
+        """Called at the start of an agent run loop."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        self.run_start_time = time.perf_counter()
+        self.step_count = 0
+        # Add breadcrumb for debugging
+        add_breadcrumb(
+            category="agent",
+            message=f"Agent run started with model {self.model}",
+            level="info",
+            data={
+                "model": self.model,
+                "agent_type": self._get_agent_type(),
+                "input_messages": len(old_items),
+            },
+        )
+    async def on_run_end(
+        self,
+        kwargs: Dict[str, Any],
+        old_items: List[Dict[str, Any]],
+        new_items: List[Dict[str, Any]],
+    ) -> None:
+        """Called at the end of an agent run loop."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        if self.run_start_time is not None:
+            duration = time.perf_counter() - self.run_start_time
+            # Record run metrics
+            record_operation(
+                operation="agent.run",
+                duration_seconds=duration,
+                status="success",
+                model=self.model,
+                steps=self.step_count,
+            )
+            add_breadcrumb(
+                category="agent",
+                message=f"Agent run completed in {duration:.2f}s",
+                level="info",
+                data={
+                    "duration_seconds": duration,
+                    "steps": self.step_count,
+                    "output_messages": len(new_items),
+                },
+            )
+        self.run_start_time = None
+    async def on_responses(
+        self, kwargs: Dict[str, Any], responses: Dict[str, Any]
+    ) -> None:
+        """Called when responses are received (each step)."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        self.step_count += 1
+        current_time = time.perf_counter()
+        # Calculate step duration if we have a start time
+        if self.step_start_time is not None:
+            step_duration = current_time - self.step_start_time
+            record_operation(
+                operation="agent.step",
+                duration_seconds=step_duration,
+                status="success",
+                model=self.model,
+                step_number=self.step_count,
+            )
+        # Start timing next step
+        self.step_start_time = current_time
+        add_breadcrumb(
+            category="agent",
+            message=f"Agent step {self.step_count} completed",
+            level="info",
+            data={"step": self.step_count},
+        )
+    async def on_usage(self, usage: Dict[str, Any]) -> None:
+        """Called when usage information is received."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        completion_tokens = usage.get("completion_tokens", 0)
+        if prompt_tokens > 0 or completion_tokens > 0:
+            record_tokens(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                model=self.model,
+            )
+    async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
+        """Called when a computer call is about to start."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        action = item.get("action", {})
+        action_type = action.get("type", "unknown")
+        add_breadcrumb(
+            category="computer",
+            message=f"Computer action: {action_type}",
+            level="info",
+            data={"action_type": action_type},
+        )
+    async def on_computer_call_end(
+        self, item: Dict[str, Any], result: List[Dict[str, Any]]
+    ) -> None:
+        """Called when a computer call has completed."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        action = item.get("action", {})
+        action_type = action.get("type", "unknown")
+        # Record computer action metric
+        # Note: We don't have precise timing here, so we record with 0 duration
+        # The actual timing should be done in the computer module
+        record_operation(
+            operation=f"computer.action.{action_type}",
+            duration_seconds=0,  # Timing handled elsewhere
+            status="success",
+            model=self.model,
+        )
+    async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
+        """Called when an LLM API call is about to start."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        add_breadcrumb(
+            category="llm",
+            message="LLM API call started",
+            level="info",
+            data={"model": self.model},
+        )
+    async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
+        """Called when an LLM API call has completed."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        add_breadcrumb(
+            category="llm",
+            message="LLM API call completed",
+            level="info",
+        )
+class OtelErrorCallback(AsyncCallbackHandler):
+    """
+    Callback that captures errors and sends them to Sentry/OTEL.
+    Should be added early in the callback chain to catch all errors.
+    """
+    def __init__(self, agent: Any):
+        """
+        Initialize error callback.
+        Args:
+            agent: The ComputerAgent instance
+        """
+        self.agent = agent
+        self.model = getattr(agent, "model", "unknown")
+    async def on_error(self, error: Exception, context: Dict[str, Any]) -> None:
+        """Called when an error occurs during agent execution."""
+        if not OTEL_AVAILABLE or not is_otel_enabled():
+            return
+        error_type = type(error).__name__
+        operation = context.get("operation", "unknown")
+        # Record error metric
+        record_error(
+            error_type=error_type,
+            operation=operation,
+            model=self.model,
+        )
+        # Capture exception in Sentry
+        capture_exception(
+            error,
+            context={
+                "model": self.model,
+                "operation": operation,
+                **{k: v for k, v in context.items() if k != "operation"},
+            },
+        )

agent/callbacks/pii_anonymization.py CHANGED Viewed

@@ -2,38 +2,41 @@
 PII anonymization callback handler using Microsoft Presidio for text and image redaction.
 """
-from typing import List, Dict, Any, Optional, Tuple
-from .base import AsyncCallbackHandler
 import base64
 import io
 import logging
+from typing import Any, Dict, List, Optional, Tuple
+from .base import AsyncCallbackHandler
 try:
     # TODO: Add Presidio dependencies
     from PIL import Image
     PRESIDIO_AVAILABLE = True
 except ImportError:
     PRESIDIO_AVAILABLE = False
 logger = logging.getLogger(__name__)
 class PIIAnonymizationCallback(AsyncCallbackHandler):
     """
     Callback handler that anonymizes PII in text and images using Microsoft Presidio.
     This handler:
     1. Anonymizes PII in messages before sending to the agent loop
     2. Deanonymizes PII in tool calls and message outputs after the agent loop
     3. Redacts PII from images in computer_call_output messages
     """
     def __init__(
         self,
         # TODO: Any extra kwargs if needed
     ):
         """
         Initialize the PII anonymization callback.
         Args:
             anonymize_text: Whether to anonymize text content
             anonymize_images: Whether to redact images
@@ -46,16 +49,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
                 "Presidio is not available. Install with: "
                 "pip install cua-agent[pii-anonymization]"
             )
         # TODO: Implement __init__
     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Anonymize PII in messages before sending to agent loop.
         Args:
             messages: List of message dictionaries
         Returns:
             List of messages with PII anonymized
         """
@@ -63,16 +66,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
         for msg in messages:
             anonymized_msg = await self._anonymize_message(msg)
             anonymized_messages.append(anonymized_msg)
         return anonymized_messages
     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Deanonymize PII in tool calls and message outputs after agent loop.
         Args:
             output: List of output dictionaries
         Returns:
             List of output with PII deanonymized for tool calls
         """
@@ -84,13 +87,13 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
                 deanonymized_output.append(deanonymized_item)
             else:
                 deanonymized_output.append(item)
         return deanonymized_output
     async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: Implement _anonymize_message
         return message
     async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: Implement _deanonymize_item
         return item

agent/callbacks/prompt_instructions.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""
+Prompt instructions callback.
+This callback allows simple prompt engineering by pre-pending a user
+instructions message to the start of the conversation before each LLM call.
+Usage:
+    from agent.callbacks import PromptInstructionsCallback
+    agent = ComputerAgent(
+        model="openai/computer-use-preview",
+        callbacks=[PromptInstructionsCallback("Follow these rules...")]
+    )
+"""
+from typing import Any, Dict, List, Optional
+from .base import AsyncCallbackHandler
+class PromptInstructionsCallback(AsyncCallbackHandler):
+    """
+    Prepend a user instructions message to the message list.
+    This is a minimal, non-invasive way to guide the agent's behavior without
+    modifying agent loops or tools. It works with any provider/loop since it
+    only alters the messages array before sending to the model.
+    """
+    def __init__(self, instructions: Optional[str]) -> None:
+        self.instructions = instructions
+    async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        # Pre-pend instructions message
+        if not self.instructions:
+            return messages
+        # Ensure we don't duplicate if already present at the front
+        if messages and isinstance(messages[0], dict):
+            first = messages[0]
+            if first.get("role") == "user" and first.get("content") == self.instructions:
+                return messages
+        return [
+            {"role": "user", "content": self.instructions},
+        ] + messages

cua-agent 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.14py3-none-any.whl → 0.7.16py3-none-any.whl