PyPI - cua-agent - Versions diffs - 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

cua-agent 0.3.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show

agent/__init__.py +21 -12
agent/__main__.py +21 -0
agent/adapters/__init__.py +9 -0
agent/adapters/huggingfacelocal_adapter.py +229 -0
agent/agent.py +594 -0
agent/callbacks/__init__.py +19 -0
agent/callbacks/base.py +153 -0
agent/callbacks/budget_manager.py +44 -0
agent/callbacks/image_retention.py +139 -0
agent/callbacks/logging.py +247 -0
agent/callbacks/pii_anonymization.py +259 -0
agent/callbacks/telemetry.py +210 -0
agent/callbacks/trajectory_saver.py +305 -0
agent/cli.py +297 -0
agent/computer_handler.py +107 -0
agent/decorators.py +90 -0
agent/loops/__init__.py +11 -0
agent/loops/anthropic.py +728 -0
agent/loops/omniparser.py +339 -0
agent/loops/openai.py +95 -0
agent/loops/uitars.py +688 -0
agent/responses.py +207 -0
agent/telemetry.py +135 -14
agent/types.py +79 -0
agent/ui/__init__.py +7 -1
agent/ui/__main__.py +2 -13
agent/ui/gradio/__init__.py +6 -19
agent/ui/gradio/app.py +94 -1313
agent/ui/gradio/ui_components.py +721 -0
cua_agent-0.4.0.dist-info/METADATA +424 -0
cua_agent-0.4.0.dist-info/RECORD +33 -0
{cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +1 -1
agent/core/__init__.py +0 -27
agent/core/agent.py +0 -210
agent/core/base.py +0 -217
agent/core/callbacks.py +0 -200
agent/core/experiment.py +0 -249
agent/core/factory.py +0 -122
agent/core/messages.py +0 -332
agent/core/provider_config.py +0 -21
agent/core/telemetry.py +0 -142
agent/core/tools/__init__.py +0 -21
agent/core/tools/base.py +0 -74
agent/core/tools/bash.py +0 -52
agent/core/tools/collection.py +0 -46
agent/core/tools/computer.py +0 -113
agent/core/tools/edit.py +0 -67
agent/core/tools/manager.py +0 -56
agent/core/tools.py +0 -32
agent/core/types.py +0 -88
agent/core/visualization.py +0 -197
agent/providers/__init__.py +0 -4
agent/providers/anthropic/__init__.py +0 -6
agent/providers/anthropic/api/client.py +0 -360
agent/providers/anthropic/api/logging.py +0 -150
agent/providers/anthropic/api_handler.py +0 -140
agent/providers/anthropic/callbacks/__init__.py +0 -5
agent/providers/anthropic/callbacks/manager.py +0 -65
agent/providers/anthropic/loop.py +0 -568
agent/providers/anthropic/prompts.py +0 -23
agent/providers/anthropic/response_handler.py +0 -226
agent/providers/anthropic/tools/__init__.py +0 -33
agent/providers/anthropic/tools/base.py +0 -88
agent/providers/anthropic/tools/bash.py +0 -66
agent/providers/anthropic/tools/collection.py +0 -34
agent/providers/anthropic/tools/computer.py +0 -396
agent/providers/anthropic/tools/edit.py +0 -326
agent/providers/anthropic/tools/manager.py +0 -54
agent/providers/anthropic/tools/run.py +0 -42
agent/providers/anthropic/types.py +0 -16
agent/providers/anthropic/utils.py +0 -367
agent/providers/omni/__init__.py +0 -8
agent/providers/omni/api_handler.py +0 -42
agent/providers/omni/clients/anthropic.py +0 -103
agent/providers/omni/clients/base.py +0 -35
agent/providers/omni/clients/oaicompat.py +0 -195
agent/providers/omni/clients/ollama.py +0 -122
agent/providers/omni/clients/openai.py +0 -155
agent/providers/omni/clients/utils.py +0 -25
agent/providers/omni/image_utils.py +0 -34
agent/providers/omni/loop.py +0 -990
agent/providers/omni/parser.py +0 -307
agent/providers/omni/prompts.py +0 -64
agent/providers/omni/tools/__init__.py +0 -30
agent/providers/omni/tools/base.py +0 -29
agent/providers/omni/tools/bash.py +0 -74
agent/providers/omni/tools/computer.py +0 -179
agent/providers/omni/tools/manager.py +0 -61
agent/providers/omni/utils.py +0 -236
agent/providers/openai/__init__.py +0 -6
agent/providers/openai/api_handler.py +0 -456
agent/providers/openai/loop.py +0 -472
agent/providers/openai/response_handler.py +0 -205
agent/providers/openai/tools/__init__.py +0 -15
agent/providers/openai/tools/base.py +0 -79
agent/providers/openai/tools/computer.py +0 -326
agent/providers/openai/tools/manager.py +0 -106
agent/providers/openai/types.py +0 -36
agent/providers/openai/utils.py +0 -98
agent/providers/uitars/__init__.py +0 -1
agent/providers/uitars/clients/base.py +0 -35
agent/providers/uitars/clients/mlxvlm.py +0 -263
agent/providers/uitars/clients/oaicompat.py +0 -214
agent/providers/uitars/loop.py +0 -660
agent/providers/uitars/prompts.py +0 -63
agent/providers/uitars/tools/__init__.py +0 -1
agent/providers/uitars/tools/computer.py +0 -283
agent/providers/uitars/tools/manager.py +0 -60
agent/providers/uitars/utils.py +0 -264
cua_agent-0.3.1.dist-info/METADATA +0 -295
cua_agent-0.3.1.dist-info/RECORD +0 -87
{cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0

agent/core/visualization.py DELETED Viewed

@@ -1,197 +0,0 @@
-"""Core visualization utilities for agents."""
-import logging
-import base64
-from typing import Dict, Tuple
-from PIL import Image, ImageDraw
-from io import BytesIO
-logger = logging.getLogger(__name__)
-def visualize_click(x: int, y: int, img_base64: str) -> Image.Image:
-    """Visualize a click action by drawing a circle on the screenshot.
-    Args:
-        x: X coordinate of the click
-        y: Y coordinate of the click
-        img_base64: Base64-encoded screenshot
-    Returns:
-        PIL Image with visualization
-    """
-    try:
-        # Decode the base64 image
-        image_data = base64.b64decode(img_base64)
-        img = Image.open(BytesIO(image_data))
-        # Create a copy to draw on
-        draw_img = img.copy()
-        draw = ImageDraw.Draw(draw_img)
-        # Draw a circle at the click location
-        radius = 15
-        draw.ellipse([(x - radius, y - radius), (x + radius, y + radius)], outline="red", width=3)
-        # Draw crosshairs
-        line_length = 20
-        draw.line([(x - line_length, y), (x + line_length, y)], fill="red", width=3)
-        draw.line([(x, y - line_length), (x, y + line_length)], fill="red", width=3)
-        return draw_img
-    except Exception as e:
-        logger.error(f"Error visualizing click: {str(e)}")
-        # Return a blank image as fallback
-        return Image.new("RGB", (800, 600), "white")
-def visualize_scroll(direction: str, clicks: int, img_base64: str) -> Image.Image:
-    """Visualize a scroll action by drawing arrows on the screenshot.
-    Args:
-        direction: Direction of scroll ('up' or 'down')
-        clicks: Number of scroll clicks
-        img_base64: Base64-encoded screenshot
-    Returns:
-        PIL Image with visualization
-    """
-    try:
-        # Decode the base64 image
-        image_data = base64.b64decode(img_base64)
-        img = Image.open(BytesIO(image_data))
-        # Create a copy to draw on
-        draw_img = img.copy()
-        draw = ImageDraw.Draw(draw_img)
-        # Calculate parameters for visualization
-        width, height = img.size
-        center_x = width // 2
-        # Draw arrows to indicate scrolling
-        arrow_length = min(100, height // 4)
-        arrow_width = 30
-        num_arrows = min(clicks, 3)  # Don't draw too many arrows
-        # Calculate starting position
-        if direction == "down":
-            start_y = height // 3
-            arrow_dir = 1  # Down
-        else:
-            start_y = height * 2 // 3
-            arrow_dir = -1  # Up
-        # Draw the arrows
-        for i in range(num_arrows):
-            y_pos = start_y + (i * arrow_length * arrow_dir * 0.7)
-            arrow_top = (center_x, y_pos)
-            arrow_bottom = (center_x, y_pos + arrow_length * arrow_dir)
-            # Draw the main line
-            draw.line([arrow_top, arrow_bottom], fill="red", width=5)
-            # Draw the arrowhead
-            arrowhead_size = 20
-            if direction == "down":
-                draw.line(
-                    [
-                        (center_x - arrow_width // 2, arrow_bottom[1] - arrowhead_size),
-                        arrow_bottom,
-                        (center_x + arrow_width // 2, arrow_bottom[1] - arrowhead_size),
-                    ],
-                    fill="red",
-                    width=5,
-                )
-            else:
-                draw.line(
-                    [
-                        (center_x - arrow_width // 2, arrow_bottom[1] + arrowhead_size),
-                        arrow_bottom,
-                        (center_x + arrow_width // 2, arrow_bottom[1] + arrowhead_size),
-                    ],
-                    fill="red",
-                    width=5,
-                )
-        return draw_img
-    except Exception as e:
-        logger.error(f"Error visualizing scroll: {str(e)}")
-        # Return a blank image as fallback
-        return Image.new("RGB", (800, 600), "white")
-def calculate_element_center(bbox: Dict[str, float], width: int, height: int) -> Tuple[int, int]:
-    """Calculate the center point of a UI element.
-    Args:
-        bbox: Bounding box dictionary with x1, y1, x2, y2 coordinates (0-1 normalized)
-        width: Screen width in pixels
-        height: Screen height in pixels
-    Returns:
-        (x, y) tuple with pixel coordinates
-    """
-    center_x = int((bbox["x1"] + bbox["x2"]) / 2 * width)
-    center_y = int((bbox["y1"] + bbox["y2"]) / 2 * height)
-    return center_x, center_y
-class VisualizationHelper:
-    """Helper class for visualizing agent actions."""
-    def __init__(self, agent):
-        """Initialize visualization helper.
-        Args:
-            agent: Reference to the agent that will use this helper
-        """
-        self.agent = agent
-    def visualize_action(self, x: int, y: int, img_base64: str) -> None:
-        """Visualize a click action by drawing on the screenshot."""
-        if (
-            not self.agent.save_trajectory
-            or not hasattr(self.agent, "experiment_manager")
-            or not self.agent.experiment_manager
-        ):
-            return
-        try:
-            # Use the visualization utility
-            img = visualize_click(x, y, img_base64)
-            # Save the visualization
-            self.agent.experiment_manager.save_action_visualization(img, "click", f"x{x}_y{y}")
-        except Exception as e:
-            logger.error(f"Error visualizing action: {str(e)}")
-    def visualize_scroll(self, direction: str, clicks: int, img_base64: str) -> None:
-        """Visualize a scroll action by drawing arrows on the screenshot."""
-        if (
-            not self.agent.save_trajectory
-            or not hasattr(self.agent, "experiment_manager")
-            or not self.agent.experiment_manager
-        ):
-            return
-        try:
-            # Use the visualization utility
-            img = visualize_scroll(direction, clicks, img_base64)
-            # Save the visualization
-            self.agent.experiment_manager.save_action_visualization(
-                img, "scroll", f"{direction}_{clicks}"
-            )
-        except Exception as e:
-            logger.error(f"Error visualizing scroll: {str(e)}")
-    def save_action_visualization(
-        self, img: Image.Image, action_name: str, details: str = ""
-    ) -> str:
-        """Save a visualization of an action."""
-        if hasattr(self.agent, "experiment_manager") and self.agent.experiment_manager:
-            return self.agent.experiment_manager.save_action_visualization(
-                img, action_name, details
-            )
-        return ""

agent/providers/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-"""Provider implementations for different AI services."""
-# Import specific providers only when needed to avoid circular imports
-__all__ = []  # Let each provider module handle its own exports

agent/providers/anthropic/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-"""Anthropic provider implementation."""
-from .loop import AnthropicLoop
-from .types import LLMProvider
-__all__ = ["AnthropicLoop", "LLMProvider"]

agent/providers/anthropic/api/client.py DELETED Viewed

@@ -1,360 +0,0 @@
-from typing import Any, List, Dict, cast
-import httpx
-import asyncio
-from anthropic import Anthropic, AnthropicBedrock, AnthropicVertex
-from anthropic.types.beta import BetaMessage, BetaMessageParam, BetaToolUnionParam
-from ..types import LLMProvider
-from .logging import log_api_interaction
-import random
-import logging
-logger = logging.getLogger(__name__)
-class APIConnectionError(Exception):
-    """Error raised when there are connection issues with the API."""
-    pass
-class BaseAnthropicClient:
-    """Base class for Anthropic API clients."""
-    MAX_RETRIES = 10
-    INITIAL_RETRY_DELAY = 1.0
-    MAX_RETRY_DELAY = 60.0
-    JITTER_FACTOR = 0.1
-    async def create_message(
-        self,
-        *,
-        messages: list[BetaMessageParam],
-        system: list[Any],
-        tools: list[BetaToolUnionParam],
-        max_tokens: int,
-        betas: list[str],
-    ) -> BetaMessage:
-        """Create a message using the Anthropic API."""
-        raise NotImplementedError
-    async def _make_api_call_with_retries(self, api_call):
-        """Make an API call with exponential backoff retry logic.
-        Args:
-            api_call: Async function that makes the actual API call
-        Returns:
-            API response
-        Raises:
-            APIConnectionError: If all retries fail
-        """
-        retry_count = 0
-        last_error = None
-        while retry_count < self.MAX_RETRIES:
-            try:
-                return await api_call()
-            except Exception as e:
-                last_error = e
-                retry_count += 1
-                if retry_count == self.MAX_RETRIES:
-                    break
-                # Calculate delay with exponential backoff and jitter
-                delay = min(
-                    self.INITIAL_RETRY_DELAY * (2 ** (retry_count - 1)), self.MAX_RETRY_DELAY
-                )
-                # Add jitter to avoid thundering herd
-                jitter = delay * self.JITTER_FACTOR * (2 * random.random() - 1)
-                final_delay = delay + jitter
-                logger.info(
-                    f"Retrying request (attempt {retry_count}/{self.MAX_RETRIES}) "
-                    f"in {final_delay:.2f} seconds after error: {str(e)}"
-                )
-                await asyncio.sleep(final_delay)
-        raise APIConnectionError(
-            f"Failed after {self.MAX_RETRIES} retries. " f"Last error: {str(last_error)}"
-        )
-    async def run_interleaved(
-        self, messages: List[Dict[str, Any]], system: str, max_tokens: int = 4096
-    ) -> Any:
-        """Run the Anthropic API with the Claude model, supports interleaved tool calling.
-        Args:
-            messages: List of message objects
-            system: System prompt
-            max_tokens: Maximum tokens to generate
-        Returns:
-            API response
-        """
-        # Add the tool_result check/fix logic here
-        fixed_messages = self._fix_missing_tool_results(messages)
-        # Get model name from concrete implementation if available
-        model_name = getattr(self, "model", "unknown model")
-        logger.info(f"Running Anthropic API call with model {model_name}")
-        retry_count = 0
-        while retry_count < self.MAX_RETRIES:
-            try:
-                # Call the Anthropic API through create_message which is implemented by subclasses
-                # Convert system str to the list format expected by create_message
-                system_list = [system]
-                # Convert message format if needed - concrete implementations may do further conversion
-                response = await self.create_message(
-                    messages=cast(list[BetaMessageParam], fixed_messages),
-                    system=system_list,
-                    tools=[],  # Tools are included in the messages
-                    max_tokens=max_tokens,
-                    betas=["tools-2023-12-13"],
-                )
-                logger.info(f"Anthropic API call successful")
-                return response
-            except Exception as e:
-                retry_count += 1
-                wait_time = self.INITIAL_RETRY_DELAY * (
-                    2 ** (retry_count - 1)
-                )  # Exponential backoff
-                logger.info(
-                    f"Retrying request (attempt {retry_count}/{self.MAX_RETRIES}) in {wait_time:.2f} seconds after error: {str(e)}"
-                )
-                await asyncio.sleep(wait_time)
-        # If we get here, all retries failed
-        raise RuntimeError(f"Failed to call Anthropic API after {self.MAX_RETRIES} attempts")
-    def _fix_missing_tool_results(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """Check for and fix any missing tool_result blocks after tool_use blocks.
-        Args:
-            messages: List of message objects
-        Returns:
-            Fixed messages with proper tool_result blocks
-        """
-        fixed_messages = []
-        pending_tool_uses = {}  # Map of tool_use IDs to their details
-        for i, message in enumerate(messages):
-            # Track any tool_use blocks in this message
-            if message.get("role") == "assistant" and "content" in message:
-                content = message.get("content", [])
-                for block in content:
-                    if isinstance(block, dict) and block.get("type") == "tool_use":
-                        tool_id = block.get("id")
-                        if tool_id:
-                            pending_tool_uses[tool_id] = {
-                                "name": block.get("name", ""),
-                                "input": block.get("input", {}),
-                            }
-            # Check if this message handles any pending tool_use blocks
-            if message.get("role") == "user" and "content" in message:
-                # Check for tool_result blocks in this message
-                content = message.get("content", [])
-                for block in content:
-                    if isinstance(block, dict) and block.get("type") == "tool_result":
-                        tool_id = block.get("tool_use_id")
-                        if tool_id in pending_tool_uses:
-                            # This tool_result handles a pending tool_use
-                            pending_tool_uses.pop(tool_id)
-            # Add the message to our fixed list
-            fixed_messages.append(message)
-            # If this is an assistant message with tool_use blocks and there are
-            # pending tool uses that need to be resolved before the next assistant message
-            if (
-                i + 1 < len(messages)
-                and message.get("role") == "assistant"
-                and messages[i + 1].get("role") == "assistant"
-                and pending_tool_uses
-            ):
-                # We need to insert a user message with tool_results for all pending tool_uses
-                tool_results = []
-                for tool_id, tool_info in pending_tool_uses.items():
-                    tool_results.append(
-                        {
-                            "type": "tool_result",
-                            "tool_use_id": tool_id,
-                            "content": {
-                                "type": "error",
-                                "message": "Tool execution was skipped or failed",
-                            },
-                        }
-                    )
-                # Insert a synthetic user message with the tool results
-                if tool_results:
-                    fixed_messages.append({"role": "user", "content": tool_results})
-                # Clear pending tools since we've added results for them
-                pending_tool_uses = {}
-        # Check if there are any remaining pending tool_uses at the end of the conversation
-        if pending_tool_uses and fixed_messages and fixed_messages[-1].get("role") == "assistant":
-            # Add a final user message with tool results for any pending tool_uses
-            tool_results = []
-            for tool_id, tool_info in pending_tool_uses.items():
-                tool_results.append(
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": tool_id,
-                        "content": {
-                            "type": "error",
-                            "message": "Tool execution was skipped or failed",
-                        },
-                    }
-                )
-            if tool_results:
-                fixed_messages.append({"role": "user", "content": tool_results})
-        return fixed_messages
-class AnthropicDirectClient(BaseAnthropicClient):
-    """Direct Anthropic API client implementation."""
-    def __init__(self, api_key: str, model: str):
-        self.model = model
-        self.client = Anthropic(api_key=api_key, http_client=self._create_http_client())
-    def _create_http_client(self) -> httpx.Client:
-        """Create an HTTP client with appropriate settings."""
-        return httpx.Client(
-            verify=True,
-            timeout=httpx.Timeout(connect=30.0, read=300.0, write=30.0, pool=30.0),
-            transport=httpx.HTTPTransport(
-                retries=3,
-                verify=True,
-                limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
-            ),
-        )
-    async def create_message(
-        self,
-        *,
-        messages: list[BetaMessageParam],
-        system: list[Any],
-        tools: list[BetaToolUnionParam],
-        max_tokens: int,
-        betas: list[str],
-    ) -> BetaMessage:
-        """Create a message using the direct Anthropic API with retry logic."""
-        async def api_call():
-            response = self.client.beta.messages.with_raw_response.create(
-                max_tokens=max_tokens,
-                messages=messages,
-                model=self.model,
-                system=system,
-                tools=tools,
-                betas=betas,
-            )
-            log_api_interaction(response.http_response.request, response.http_response, None)
-            return response.parse()
-        try:
-            return await self._make_api_call_with_retries(api_call)
-        except Exception as e:
-            log_api_interaction(None, None, e)
-            raise
-class AnthropicVertexClient(BaseAnthropicClient):
-    """Google Cloud Vertex AI implementation of Anthropic client."""
-    def __init__(self, model: str):
-        self.model = model
-        self.client = AnthropicVertex()
-    async def create_message(
-        self,
-        *,
-        messages: list[BetaMessageParam],
-        system: list[Any],
-        tools: list[BetaToolUnionParam],
-        max_tokens: int,
-        betas: list[str],
-    ) -> BetaMessage:
-        """Create a message using Vertex AI with retry logic."""
-        async def api_call():
-            response = self.client.beta.messages.with_raw_response.create(
-                max_tokens=max_tokens,
-                messages=messages,
-                model=self.model,
-                system=system,
-                tools=tools,
-                betas=betas,
-            )
-            log_api_interaction(response.http_response.request, response.http_response, None)
-            return response.parse()
-        try:
-            return await self._make_api_call_with_retries(api_call)
-        except Exception as e:
-            log_api_interaction(None, None, e)
-            raise
-class AnthropicBedrockClient(BaseAnthropicClient):
-    """AWS Bedrock implementation of Anthropic client."""
-    def __init__(self, model: str):
-        self.model = model
-        self.client = AnthropicBedrock()
-    async def create_message(
-        self,
-        *,
-        messages: list[BetaMessageParam],
-        system: list[Any],
-        tools: list[BetaToolUnionParam],
-        max_tokens: int,
-        betas: list[str],
-    ) -> BetaMessage:
-        """Create a message using AWS Bedrock with retry logic."""
-        async def api_call():
-            response = self.client.beta.messages.with_raw_response.create(
-                max_tokens=max_tokens,
-                messages=messages,
-                model=self.model,
-                system=system,
-                tools=tools,
-                betas=betas,
-            )
-            log_api_interaction(response.http_response.request, response.http_response, None)
-            return response.parse()
-        try:
-            return await self._make_api_call_with_retries(api_call)
-        except Exception as e:
-            log_api_interaction(None, None, e)
-            raise
-class AnthropicClientFactory:
-    """Factory for creating appropriate Anthropic client implementations."""
-    @staticmethod
-    def create_client(provider: LLMProvider, api_key: str, model: str) -> BaseAnthropicClient:
-        """Create an appropriate client based on the provider."""
-        if provider == LLMProvider.ANTHROPIC:
-            return AnthropicDirectClient(api_key, model)
-        elif provider == LLMProvider.VERTEX:
-            return AnthropicVertexClient(model)
-        elif provider == LLMProvider.BEDROCK:
-            return AnthropicBedrockClient(model)
-        raise ValueError(f"Unsupported provider: {provider}")

cua-agent 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

cua-agent 0.3.1py3-none-any.whl → 0.4.0py3-none-any.whl