PyPI - cua-agent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

cua-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (65) hide show

agent/README.md +63 -0
agent/__init__.py +10 -0
agent/core/README.md +101 -0
agent/core/__init__.py +34 -0
agent/core/agent.py +284 -0
agent/core/base_agent.py +164 -0
agent/core/callbacks.py +147 -0
agent/core/computer_agent.py +69 -0
agent/core/experiment.py +222 -0
agent/core/factory.py +102 -0
agent/core/loop.py +244 -0
agent/core/messages.py +230 -0
agent/core/tools/__init__.py +21 -0
agent/core/tools/base.py +74 -0
agent/core/tools/bash.py +52 -0
agent/core/tools/collection.py +46 -0
agent/core/tools/computer.py +113 -0
agent/core/tools/edit.py +67 -0
agent/core/tools/manager.py +56 -0
agent/providers/__init__.py +4 -0
agent/providers/anthropic/__init__.py +6 -0
agent/providers/anthropic/api/client.py +222 -0
agent/providers/anthropic/api/logging.py +150 -0
agent/providers/anthropic/callbacks/manager.py +55 -0
agent/providers/anthropic/loop.py +521 -0
agent/providers/anthropic/messages/manager.py +110 -0
agent/providers/anthropic/prompts.py +20 -0
agent/providers/anthropic/tools/__init__.py +33 -0
agent/providers/anthropic/tools/base.py +88 -0
agent/providers/anthropic/tools/bash.py +163 -0
agent/providers/anthropic/tools/collection.py +34 -0
agent/providers/anthropic/tools/computer.py +550 -0
agent/providers/anthropic/tools/edit.py +326 -0
agent/providers/anthropic/tools/manager.py +54 -0
agent/providers/anthropic/tools/run.py +42 -0
agent/providers/anthropic/types.py +16 -0
agent/providers/omni/__init__.py +27 -0
agent/providers/omni/callbacks.py +78 -0
agent/providers/omni/clients/anthropic.py +99 -0
agent/providers/omni/clients/base.py +44 -0
agent/providers/omni/clients/groq.py +101 -0
agent/providers/omni/clients/openai.py +159 -0
agent/providers/omni/clients/utils.py +25 -0
agent/providers/omni/experiment.py +273 -0
agent/providers/omni/image_utils.py +106 -0
agent/providers/omni/loop.py +961 -0
agent/providers/omni/messages.py +168 -0
agent/providers/omni/parser.py +252 -0
agent/providers/omni/prompts.py +78 -0
agent/providers/omni/tool_manager.py +91 -0
agent/providers/omni/tools/__init__.py +13 -0
agent/providers/omni/tools/bash.py +69 -0
agent/providers/omni/tools/computer.py +216 -0
agent/providers/omni/tools/manager.py +83 -0
agent/providers/omni/types.py +30 -0
agent/providers/omni/utils.py +155 -0
agent/providers/omni/visualization.py +130 -0
agent/types/__init__.py +26 -0
agent/types/base.py +52 -0
agent/types/messages.py +36 -0
agent/types/tools.py +32 -0
cua_agent-0.1.0.dist-info/METADATA +44 -0
cua_agent-0.1.0.dist-info/RECORD +65 -0
cua_agent-0.1.0.dist-info/WHEEL +4 -0
cua_agent-0.1.0.dist-info/entry_points.txt +4 -0

agent/core/tools/computer.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""Abstract base computer tool implementation."""
+import asyncio
+import base64
+import io
+import logging
+from abc import abstractmethod
+from typing import Any, Dict, Optional, Tuple
+from PIL import Image
+from computer.computer import Computer
+from .base import BaseTool, ToolError, ToolResult
+class BaseComputerTool(BaseTool):
+    """Base class for computer interaction tools across different providers."""
+    name = "computer"
+    logger = logging.getLogger(__name__)
+    width: Optional[int] = None
+    height: Optional[int] = None
+    display_num: Optional[int] = None
+    computer: Computer
+    _screenshot_delay = 1.0  # Default delay for most platforms
+    _scaling_enabled = True
+    def __init__(self, computer: Computer):
+        """Initialize the ComputerTool.
+        Args:
+            computer: Computer instance for screen interactions
+        """
+        self.computer = computer
+    async def initialize_dimensions(self):
+        """Initialize screen dimensions from the computer interface."""
+        display_size = await self.computer.interface.get_screen_size()
+        self.width = display_size["width"]
+        self.height = display_size["height"]
+        self.logger.info(f"Initialized screen dimensions to {self.width}x{self.height}")
+    @property
+    def options(self) -> Dict[str, Any]:
+        """Get the options for the tool.
+        Returns:
+            Dictionary with tool options
+        """
+        if self.width is None or self.height is None:
+            raise RuntimeError(
+                "Screen dimensions not initialized. Call initialize_dimensions() first."
+            )
+        return {
+            "display_width_px": self.width,
+            "display_height_px": self.height,
+            "display_number": self.display_num,
+        }
+    async def resize_screenshot_if_needed(self, screenshot: bytes) -> bytes:
+        """Resize a screenshot to match the expected dimensions.
+        Args:
+            screenshot: Raw screenshot data
+        Returns:
+            Resized screenshot data
+        """
+        if self.width is None or self.height is None:
+            raise ToolError("Screen dimensions not initialized")
+        try:
+            img = Image.open(io.BytesIO(screenshot))
+            if img.mode in ("RGBA", "LA") or (img.mode == "P" and "transparency" in img.info):
+                img = img.convert("RGB")
+            # Resize if dimensions don't match
+            if img.size != (self.width, self.height):
+                self.logger.info(
+                    f"Scaling image from {img.size} to {self.width}x{self.height} to match screen dimensions"
+                )
+                img = img.resize((self.width, self.height), Image.Resampling.LANCZOS)
+                # Save back to bytes
+                buffer = io.BytesIO()
+                img.save(buffer, format="PNG")
+                return buffer.getvalue()
+            return screenshot
+        except Exception as e:
+            self.logger.error(f"Error during screenshot resizing: {str(e)}")
+            raise ToolError(f"Failed to resize screenshot: {str(e)}")
+    async def screenshot(self) -> ToolResult:
+        """Take a screenshot and return it as a ToolResult with base64-encoded image.
+        Returns:
+            ToolResult with the screenshot
+        """
+        try:
+            screenshot = await self.computer.interface.screenshot()
+            screenshot = await self.resize_screenshot_if_needed(screenshot)
+            return ToolResult(base64_image=base64.b64encode(screenshot).decode())
+        except Exception as e:
+            self.logger.error(f"Error taking screenshot: {str(e)}")
+            return ToolResult(error=f"Failed to take screenshot: {str(e)}")
+    @abstractmethod
+    async def __call__(self, **kwargs) -> ToolResult:
+        """Execute the tool with the provided arguments."""
+        raise NotImplementedError

agent/core/tools/edit.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Abstract base edit tool implementation."""
+import asyncio
+import logging
+import os
+from abc import abstractmethod
+from pathlib import Path
+from typing import Any, Dict, Optional
+from computer.computer import Computer
+from .base import BaseTool, ToolError, ToolResult
+class BaseEditTool(BaseTool):
+    """Base class for text editor tools across different providers."""
+    name = "edit"
+    logger = logging.getLogger(__name__)
+    computer: Computer
+    def __init__(self, computer: Computer):
+        """Initialize the EditTool.
+        Args:
+            computer: Computer instance, may be used for related operations
+        """
+        self.computer = computer
+    async def read_file(self, path: str) -> str:
+        """Read a file and return its contents.
+        Args:
+            path: Path to the file to read
+        Returns:
+            File contents as a string
+        """
+        try:
+            path_obj = Path(path)
+            if not path_obj.exists():
+                raise ToolError(f"File does not exist: {path}")
+            return path_obj.read_text()
+        except Exception as e:
+            self.logger.error(f"Error reading file: {str(e)}")
+            raise ToolError(f"Failed to read file: {str(e)}")
+    async def write_file(self, path: str, content: str) -> None:
+        """Write content to a file.
+        Args:
+            path: Path to the file to write
+            content: Content to write to the file
+        """
+        try:
+            path_obj = Path(path)
+            # Create parent directories if they don't exist
+            path_obj.parent.mkdir(parents=True, exist_ok=True)
+            path_obj.write_text(content)
+        except Exception as e:
+            self.logger.error(f"Error writing file: {str(e)}")
+            raise ToolError(f"Failed to write file: {str(e)}")
+    @abstractmethod
+    async def __call__(self, **kwargs) -> ToolResult:
+        """Execute the tool with the provided arguments."""
+        raise NotImplementedError

agent/core/tools/manager.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Tool manager for initializing and running tools."""
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+from computer.computer import Computer
+from .base import BaseTool, ToolResult
+from .collection import ToolCollection
+class BaseToolManager(ABC):
+    """Base class for tool managers across different providers."""
+    def __init__(self, computer: Computer):
+        """Initialize the tool manager.
+        Args:
+            computer: Computer instance for computer-related tools
+        """
+        self.computer = computer
+        self.tools: ToolCollection | None = None
+    @abstractmethod
+    def _initialize_tools(self) -> ToolCollection:
+        """Initialize all available tools."""
+        ...
+    async def initialize(self) -> None:
+        """Initialize tool-specific requirements and create tool collection."""
+        await self._initialize_tools_specific()
+        self.tools = self._initialize_tools()
+    @abstractmethod
+    async def _initialize_tools_specific(self) -> None:
+        """Initialize provider-specific tool requirements."""
+        ...
+    @abstractmethod
+    def get_tool_params(self) -> List[Dict[str, Any]]:
+        """Get tool parameters for API calls."""
+        ...
+    async def execute_tool(self, name: str, tool_input: Dict[str, Any]) -> ToolResult:
+        """Execute a tool with the given input.
+        Args:
+            name: Name of the tool to execute
+            tool_input: Input parameters for the tool
+        Returns:
+            Result of the tool execution
+        """
+        if self.tools is None:
+            raise RuntimeError("Tools not initialized. Call initialize() first.")
+        return await self.tools.run(name=name, tool_input=tool_input)

agent/providers/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""Provider implementations for different AI services."""
+# Import specific providers only when needed to avoid circular imports
+__all__ = []  # Let each provider module handle its own exports

agent/providers/anthropic/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Anthropic provider implementation."""
+from .loop import AnthropicLoop
+from .types import APIProvider
+__all__ = ["AnthropicLoop", "APIProvider"]

agent/providers/anthropic/api/client.py ADDED Viewed

@@ -0,0 +1,222 @@
+from typing import Any
+import httpx
+import asyncio
+from anthropic import Anthropic, AnthropicBedrock, AnthropicVertex
+from anthropic.types.beta import BetaMessage, BetaMessageParam, BetaToolUnionParam
+from ..types import APIProvider
+from .logging import log_api_interaction
+import random
+import logging
+logger = logging.getLogger(__name__)
+class APIConnectionError(Exception):
+    """Error raised when there are connection issues with the API."""
+    pass
+class BaseAnthropicClient:
+    """Base class for Anthropic API clients."""
+    MAX_RETRIES = 10
+    INITIAL_RETRY_DELAY = 1.0
+    MAX_RETRY_DELAY = 60.0
+    JITTER_FACTOR = 0.1
+    async def create_message(
+        self,
+        *,
+        messages: list[BetaMessageParam],
+        system: list[Any],
+        tools: list[BetaToolUnionParam],
+        max_tokens: int,
+        betas: list[str],
+    ) -> BetaMessage:
+        """Create a message using the Anthropic API."""
+        raise NotImplementedError
+    async def _make_api_call_with_retries(self, api_call):
+        """Make an API call with exponential backoff retry logic.
+        Args:
+            api_call: Async function that makes the actual API call
+        Returns:
+            API response
+        Raises:
+            APIConnectionError: If all retries fail
+        """
+        retry_count = 0
+        last_error = None
+        while retry_count < self.MAX_RETRIES:
+            try:
+                return await api_call()
+            except Exception as e:
+                last_error = e
+                retry_count += 1
+                if retry_count == self.MAX_RETRIES:
+                    break
+                # Calculate delay with exponential backoff and jitter
+                delay = min(
+                    self.INITIAL_RETRY_DELAY * (2 ** (retry_count - 1)),
+                    self.MAX_RETRY_DELAY
+                )
+                # Add jitter to avoid thundering herd
+                jitter = delay * self.JITTER_FACTOR * (2 * random.random() - 1)
+                final_delay = delay + jitter
+                logger.info(
+                    f"Retrying request (attempt {retry_count}/{self.MAX_RETRIES}) "
+                    f"in {final_delay:.2f} seconds after error: {str(e)}"
+                )
+                await asyncio.sleep(final_delay)
+        raise APIConnectionError(
+            f"Failed after {self.MAX_RETRIES} retries. "
+            f"Last error: {str(last_error)}"
+        )
+class AnthropicDirectClient(BaseAnthropicClient):
+    """Direct Anthropic API client implementation."""
+    def __init__(self, api_key: str, model: str):
+        self.model = model
+        self.client = Anthropic(
+            api_key=api_key,
+            http_client=self._create_http_client()
+        )
+    def _create_http_client(self) -> httpx.Client:
+        """Create an HTTP client with appropriate settings."""
+        return httpx.Client(
+            verify=True,
+            timeout=httpx.Timeout(
+                connect=30.0,
+                read=300.0,
+                write=30.0,
+                pool=30.0
+            ),
+            transport=httpx.HTTPTransport(
+                retries=3,
+                verify=True,
+                limits=httpx.Limits(
+                    max_keepalive_connections=5,
+                    max_connections=10
+                )
+            )
+        )
+    async def create_message(
+        self,
+        *,
+        messages: list[BetaMessageParam],
+        system: list[Any],
+        tools: list[BetaToolUnionParam],
+        max_tokens: int,
+        betas: list[str],
+    ) -> BetaMessage:
+        """Create a message using the direct Anthropic API with retry logic."""
+        async def api_call():
+            response = self.client.beta.messages.with_raw_response.create(
+                max_tokens=max_tokens,
+                messages=messages,
+                model=self.model,
+                system=system,
+                tools=tools,
+                betas=betas,
+            )
+            log_api_interaction(response.http_response.request, response.http_response, None)
+            return response.parse()
+        try:
+            return await self._make_api_call_with_retries(api_call)
+        except Exception as e:
+            log_api_interaction(None, None, e)
+            raise
+class AnthropicVertexClient(BaseAnthropicClient):
+    """Google Cloud Vertex AI implementation of Anthropic client."""
+    def __init__(self, model: str):
+        self.model = model
+        self.client = AnthropicVertex()
+    async def create_message(
+        self,
+        *,
+        messages: list[BetaMessageParam],
+        system: list[Any],
+        tools: list[BetaToolUnionParam],
+        max_tokens: int,
+        betas: list[str],
+    ) -> BetaMessage:
+        """Create a message using Vertex AI with retry logic."""
+        async def api_call():
+            response = self.client.beta.messages.with_raw_response.create(
+                max_tokens=max_tokens,
+                messages=messages,
+                model=self.model,
+                system=system,
+                tools=tools,
+                betas=betas,
+            )
+            log_api_interaction(response.http_response.request, response.http_response, None)
+            return response.parse()
+        try:
+            return await self._make_api_call_with_retries(api_call)
+        except Exception as e:
+            log_api_interaction(None, None, e)
+            raise
+class AnthropicBedrockClient(BaseAnthropicClient):
+    """AWS Bedrock implementation of Anthropic client."""
+    def __init__(self, model: str):
+        self.model = model
+        self.client = AnthropicBedrock()
+    async def create_message(
+        self,
+        *,
+        messages: list[BetaMessageParam],
+        system: list[Any],
+        tools: list[BetaToolUnionParam],
+        max_tokens: int,
+        betas: list[str],
+    ) -> BetaMessage:
+        """Create a message using AWS Bedrock with retry logic."""
+        async def api_call():
+            response = self.client.beta.messages.with_raw_response.create(
+                max_tokens=max_tokens,
+                messages=messages,
+                model=self.model,
+                system=system,
+                tools=tools,
+                betas=betas,
+            )
+            log_api_interaction(response.http_response.request, response.http_response, None)
+            return response.parse()
+        try:
+            return await self._make_api_call_with_retries(api_call)
+        except Exception as e:
+            log_api_interaction(None, None, e)
+            raise
+class AnthropicClientFactory:
+    """Factory for creating appropriate Anthropic client implementations."""
+    @staticmethod
+    def create_client(provider: APIProvider, api_key: str, model: str) -> BaseAnthropicClient:
+        """Create an appropriate client based on the provider."""
+        if provider == APIProvider.ANTHROPIC:
+            return AnthropicDirectClient(api_key, model)
+        elif provider == APIProvider.VERTEX:
+            return AnthropicVertexClient(model)
+        elif provider == APIProvider.BEDROCK:
+            return AnthropicBedrockClient(model)
+        raise ValueError(f"Unsupported provider: {provider}")

agent/providers/anthropic/api/logging.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""API logging functionality."""
+import json
+import logging
+from datetime import datetime
+from pathlib import Path
+import httpx
+from typing import Any
+logger = logging.getLogger(__name__)
+def _filter_base64_images(content: Any) -> Any:
+    """Filter out base64 image data from content.
+    Args:
+        content: Content to filter
+    Returns:
+        Filtered content with base64 data replaced by placeholder
+    """
+    if isinstance(content, dict):
+        filtered = {}
+        for key, value in content.items():
+            if (
+                isinstance(value, dict)
+                and value.get("type") == "image"
+                and value.get("source", {}).get("type") == "base64"
+            ):
+                # Replace base64 data with placeholder
+                filtered[key] = {
+                    **value,
+                    "source": {
+                        **value["source"],
+                        "data": "<base64_image_data>"
+                    }
+                }
+            else:
+                filtered[key] = _filter_base64_images(value)
+        return filtered
+    elif isinstance(content, list):
+        return [_filter_base64_images(item) for item in content]
+    return content
+def log_api_interaction(
+    request: httpx.Request | None,
+    response: httpx.Response | object | None,
+    error: Exception | None,
+    log_dir: Path = Path("/tmp/claude_logs")
+) -> None:
+    """Log API request, response, and any errors in a structured way.
+    Args:
+        request: The HTTP request if available
+        response: The HTTP response or response object
+        error: Any error that occurred
+        log_dir: Directory to store log files
+    """
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
+    # Helper function to safely decode JSON content
+    def safe_json_decode(content):
+        if not content:
+            return None
+        try:
+            if isinstance(content, bytes):
+                return json.loads(content.decode())
+            elif isinstance(content, str):
+                return json.loads(content)
+            elif isinstance(content, dict):
+                return content
+            return None
+        except json.JSONDecodeError:
+            return {"error": "Could not decode JSON", "raw": str(content)}
+    # Process request content
+    request_content = None
+    if request and request.content:
+        request_content = safe_json_decode(request.content)
+        request_content = _filter_base64_images(request_content)
+    # Process response content
+    response_content = None
+    if response:
+        if isinstance(response, httpx.Response):
+            try:
+                response_content = response.json()
+            except json.JSONDecodeError:
+                response_content = {"error": "Could not decode JSON", "raw": response.text}
+        else:
+            response_content = safe_json_decode(response)
+        response_content = _filter_base64_images(response_content)
+    log_entry = {
+        "timestamp": timestamp,
+        "request": {
+            "method": request.method if request else None,
+            "url": str(request.url) if request else None,
+            "headers": dict(request.headers) if request else None,
+            "content": request_content,
+        } if request else None,
+        "response": {
+            "status_code": response.status_code if isinstance(response, httpx.Response) else None,
+            "headers": dict(response.headers) if isinstance(response, httpx.Response) else None,
+            "content": response_content,
+        } if response else None,
+        "error": {
+            "type": type(error).__name__ if error else None,
+            "message": str(error) if error else None,
+        } if error else None
+    }
+    # Log to file with timestamp in filename
+    log_dir.mkdir(exist_ok=True)
+    log_file = log_dir / f"claude_api_{timestamp.replace(' ', '_').replace(':', '-')}.json"
+    with open(log_file, 'w') as f:
+        json.dump(log_entry, f, indent=2)
+    # Also log a summary to the console
+    if error:
+        logger.error(f"API Error at {timestamp}: {error}")
+    else:
+        logger.info(
+            f"API Call at {timestamp}: "
+            f"{request.method if request else 'No request'} -> "
+            f"{response.status_code if isinstance(response, httpx.Response) else 'No response'}"
+        )
+        # Log if there are any images in the content
+        if response_content:
+            image_count = count_images(response_content)
+            if image_count > 0:
+                logger.info(f"Response contains {image_count} images")
+def count_images(content: dict | list | Any) -> int:
+    """Count the number of images in the content.
+    Args:
+        content: Content to search for images
+    Returns:
+        Number of images found
+    """
+    if isinstance(content, dict):
+        if content.get("type") == "image":
+            return 1
+        return sum(count_images(v) for v in content.values())
+    elif isinstance(content, list):
+        return sum(count_images(item) for item in content)
+    return 0

agent/providers/anthropic/callbacks/manager.py ADDED Viewed

@@ -0,0 +1,55 @@
+from typing import Callable, Protocol
+import httpx
+from anthropic.types.beta import BetaContentBlockParam
+from ..tools import ToolResult
+class APICallback(Protocol):
+    """Protocol for API callbacks."""
+    def __call__(self, request: httpx.Request | None,
+                 response: httpx.Response | object | None,
+                 error: Exception | None) -> None: ...
+class ContentCallback(Protocol):
+    """Protocol for content callbacks."""
+    def __call__(self, content: BetaContentBlockParam) -> None: ...
+class ToolCallback(Protocol):
+    """Protocol for tool callbacks."""
+    def __call__(self, result: ToolResult, tool_id: str) -> None: ...
+class CallbackManager:
+    """Manages various callbacks for the agent system."""
+    def __init__(
+        self,
+        content_callback: ContentCallback,
+        tool_callback: ToolCallback,
+        api_callback: APICallback,
+    ):
+        """Initialize the callback manager.
+        Args:
+            content_callback: Callback for content updates
+            tool_callback: Callback for tool execution results
+            api_callback: Callback for API interactions
+        """
+        self.content_callback = content_callback
+        self.tool_callback = tool_callback
+        self.api_callback = api_callback
+    def on_content(self, content: BetaContentBlockParam) -> None:
+        """Handle content updates."""
+        self.content_callback(content)
+    def on_tool_result(self, result: ToolResult, tool_id: str) -> None:
+        """Handle tool execution results."""
+        self.tool_callback(result, tool_id)
+    def on_api_interaction(
+        self,
+        request: httpx.Request | None,
+        response: httpx.Response | object | None,
+        error: Exception | None
+    ) -> None:
+        """Handle API interactions."""
+        self.api_callback(request, response, error)