PyPI - cua-agent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

cua-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (65) hide show

agent/README.md +63 -0
agent/__init__.py +10 -0
agent/core/README.md +101 -0
agent/core/__init__.py +34 -0
agent/core/agent.py +284 -0
agent/core/base_agent.py +164 -0
agent/core/callbacks.py +147 -0
agent/core/computer_agent.py +69 -0
agent/core/experiment.py +222 -0
agent/core/factory.py +102 -0
agent/core/loop.py +244 -0
agent/core/messages.py +230 -0
agent/core/tools/__init__.py +21 -0
agent/core/tools/base.py +74 -0
agent/core/tools/bash.py +52 -0
agent/core/tools/collection.py +46 -0
agent/core/tools/computer.py +113 -0
agent/core/tools/edit.py +67 -0
agent/core/tools/manager.py +56 -0
agent/providers/__init__.py +4 -0
agent/providers/anthropic/__init__.py +6 -0
agent/providers/anthropic/api/client.py +222 -0
agent/providers/anthropic/api/logging.py +150 -0
agent/providers/anthropic/callbacks/manager.py +55 -0
agent/providers/anthropic/loop.py +521 -0
agent/providers/anthropic/messages/manager.py +110 -0
agent/providers/anthropic/prompts.py +20 -0
agent/providers/anthropic/tools/__init__.py +33 -0
agent/providers/anthropic/tools/base.py +88 -0
agent/providers/anthropic/tools/bash.py +163 -0
agent/providers/anthropic/tools/collection.py +34 -0
agent/providers/anthropic/tools/computer.py +550 -0
agent/providers/anthropic/tools/edit.py +326 -0
agent/providers/anthropic/tools/manager.py +54 -0
agent/providers/anthropic/tools/run.py +42 -0
agent/providers/anthropic/types.py +16 -0
agent/providers/omni/__init__.py +27 -0
agent/providers/omni/callbacks.py +78 -0
agent/providers/omni/clients/anthropic.py +99 -0
agent/providers/omni/clients/base.py +44 -0
agent/providers/omni/clients/groq.py +101 -0
agent/providers/omni/clients/openai.py +159 -0
agent/providers/omni/clients/utils.py +25 -0
agent/providers/omni/experiment.py +273 -0
agent/providers/omni/image_utils.py +106 -0
agent/providers/omni/loop.py +961 -0
agent/providers/omni/messages.py +168 -0
agent/providers/omni/parser.py +252 -0
agent/providers/omni/prompts.py +78 -0
agent/providers/omni/tool_manager.py +91 -0
agent/providers/omni/tools/__init__.py +13 -0
agent/providers/omni/tools/bash.py +69 -0
agent/providers/omni/tools/computer.py +216 -0
agent/providers/omni/tools/manager.py +83 -0
agent/providers/omni/types.py +30 -0
agent/providers/omni/utils.py +155 -0
agent/providers/omni/visualization.py +130 -0
agent/types/__init__.py +26 -0
agent/types/base.py +52 -0
agent/types/messages.py +36 -0
agent/types/tools.py +32 -0
cua_agent-0.1.0.dist-info/METADATA +44 -0
cua_agent-0.1.0.dist-info/RECORD +65 -0
cua_agent-0.1.0.dist-info/WHEEL +4 -0
cua_agent-0.1.0.dist-info/entry_points.txt +4 -0

agent/core/callbacks.py ADDED Viewed

@@ -0,0 +1,147 @@
+"""Callback handlers for agent."""
+import json
+import logging
+from abc import ABC, abstractmethod
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Protocol
+logger = logging.getLogger(__name__)
+class ContentCallback(Protocol):
+    """Protocol for content callbacks."""
+    def __call__(self, content: Dict[str, Any]) -> None: ...
+class ToolCallback(Protocol):
+    """Protocol for tool callbacks."""
+    def __call__(self, result: Any, tool_id: str) -> None: ...
+class APICallback(Protocol):
+    """Protocol for API callbacks."""
+    def __call__(self, request: Any, response: Any, error: Optional[Exception] = None) -> None: ...
+class BaseCallbackManager(ABC):
+    """Base class for callback managers."""
+    def __init__(
+        self,
+        content_callback: ContentCallback,
+        tool_callback: ToolCallback,
+        api_callback: APICallback,
+    ):
+        """Initialize the callback manager.
+        Args:
+            content_callback: Callback for content updates
+            tool_callback: Callback for tool execution results
+            api_callback: Callback for API interactions
+        """
+        self.content_callback = content_callback
+        self.tool_callback = tool_callback
+        self.api_callback = api_callback
+    @abstractmethod
+    def on_content(self, content: Any) -> None:
+        """Handle content updates."""
+        raise NotImplementedError
+    @abstractmethod
+    def on_tool_result(self, result: Any, tool_id: str) -> None:
+        """Handle tool execution results."""
+        raise NotImplementedError
+    @abstractmethod
+    def on_api_interaction(
+        self,
+        request: Any,
+        response: Any,
+        error: Optional[Exception] = None
+    ) -> None:
+        """Handle API interactions."""
+        raise NotImplementedError
+class CallbackManager:
+    """Manager for callback handlers."""
+    def __init__(self, handlers: Optional[List["CallbackHandler"]] = None):
+        """Initialize with optional handlers.
+        Args:
+            handlers: List of callback handlers
+        """
+        self.handlers = handlers or []
+    def add_handler(self, handler: "CallbackHandler") -> None:
+        """Add a callback handler.
+        Args:
+            handler: Callback handler to add
+        """
+        self.handlers.append(handler)
+    async def on_action_start(self, action: str, **kwargs) -> None:
+        """Called when an action starts.
+        Args:
+            action: Action name
+            **kwargs: Additional data
+        """
+        for handler in self.handlers:
+            await handler.on_action_start(action, **kwargs)
+    async def on_action_end(self, action: str, success: bool, **kwargs) -> None:
+        """Called when an action ends.
+        Args:
+            action: Action name
+            success: Whether the action was successful
+            **kwargs: Additional data
+        """
+        for handler in self.handlers:
+            await handler.on_action_end(action, success, **kwargs)
+    async def on_error(self, error: Exception, **kwargs) -> None:
+        """Called when an error occurs.
+        Args:
+            error: Exception that occurred
+            **kwargs: Additional data
+        """
+        for handler in self.handlers:
+            await handler.on_error(error, **kwargs)
+class CallbackHandler(ABC):
+    """Base class for callback handlers."""
+    @abstractmethod
+    async def on_action_start(self, action: str, **kwargs) -> None:
+        """Called when an action starts.
+        Args:
+            action: Action name
+            **kwargs: Additional data
+        """
+        pass
+    @abstractmethod
+    async def on_action_end(self, action: str, success: bool, **kwargs) -> None:
+        """Called when an action ends.
+        Args:
+            action: Action name
+            success: Whether the action was successful
+            **kwargs: Additional data
+        """
+        pass
+    @abstractmethod
+    async def on_error(self, error: Exception, **kwargs) -> None:
+        """Called when an error occurs.
+        Args:
+            error: Exception that occurred
+            **kwargs: Additional data
+        """
+        pass

agent/core/computer_agent.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Main entry point for computer agents."""
+import logging
+from typing import Any, AsyncGenerator, Dict, Optional
+from computer import Computer
+from ..types.base import Provider
+from .factory import AgentFactory
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ComputerAgent:
+    """A computer agent that can perform automated tasks using natural language instructions."""
+    def __init__(self, provider: Provider, computer: Optional[Computer] = None, **kwargs):
+        """Initialize the ComputerAgent.
+        Args:
+            provider: The AI provider to use (e.g., Provider.ANTHROPIC)
+            computer: Optional Computer instance. If not provided, one will be created with default settings.
+            **kwargs: Additional provider-specific arguments
+        """
+        self.provider = provider
+        self._computer = computer
+        self._kwargs = kwargs
+        self._agent = None
+        self._initialized = False
+        self._in_context = False
+        # Create provider-specific agent using factory
+        self._agent = AgentFactory.create(provider=provider, computer=computer, **kwargs)
+    async def __aenter__(self):
+        """Enter the async context manager."""
+        self._in_context = True
+        await self.initialize()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Exit the async context manager."""
+        self._in_context = False
+    async def initialize(self) -> None:
+        """Initialize the agent and its components."""
+        if not self._initialized:
+            if not self._in_context and self._computer:
+                # If not in context manager but have a computer, initialize it
+                await self._computer.run()
+            self._initialized = True
+    async def run(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
+        """Run the agent with a given task."""
+        if not self._initialized:
+            await self.initialize()
+        if self._agent is None:
+            logger.error("Agent not initialized properly")
+            yield {"error": "Agent not initialized properly"}
+            return
+        async for result in self._agent.run(task):
+            yield result
+    @property
+    def computer(self) -> Optional[Computer]:
+        """Get the underlying computer instance."""
+        return self._agent.computer if self._agent else None

agent/core/experiment.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""Core experiment management for agents."""
+import os
+import logging
+import base64
+from io import BytesIO
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+from PIL import Image
+import json
+logger = logging.getLogger(__name__)
+class ExperimentManager:
+    """Manages experiment directories and logging for the agent."""
+    def __init__(
+        self,
+        base_dir: Optional[str] = None,
+        only_n_most_recent_images: Optional[int] = None,
+    ):
+        """Initialize the experiment manager.
+        Args:
+            base_dir: Base directory for saving experiment data
+            only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
+        """
+        self.base_dir = base_dir
+        self.only_n_most_recent_images = only_n_most_recent_images
+        self.run_dir = None
+        self.current_turn_dir = None
+        self.turn_count = 0
+        self.screenshot_count = 0
+        # Track all screenshots for potential API request inclusion
+        self.screenshot_paths = []
+        # Set up experiment directories if base_dir is provided
+        if self.base_dir:
+            self.setup_experiment_dirs()
+    def setup_experiment_dirs(self) -> None:
+        """Setup the experiment directory structure."""
+        if not self.base_dir:
+            return
+        # Create base experiments directory if it doesn't exist
+        os.makedirs(self.base_dir, exist_ok=True)
+        # Create timestamped run directory
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.run_dir = os.path.join(self.base_dir, timestamp)
+        os.makedirs(self.run_dir, exist_ok=True)
+        logger.info(f"Created run directory: {self.run_dir}")
+        # Create first turn directory
+        self.create_turn_dir()
+    def create_turn_dir(self) -> None:
+        """Create a new directory for the current turn."""
+        if not self.run_dir:
+            logger.warning("Cannot create turn directory: run_dir not set")
+            return
+        # Increment turn counter
+        self.turn_count += 1
+        # Create turn directory with padded number
+        turn_name = f"turn_{self.turn_count:03d}"
+        self.current_turn_dir = os.path.join(self.run_dir, turn_name)
+        os.makedirs(self.current_turn_dir, exist_ok=True)
+        logger.info(f"Created turn directory: {self.current_turn_dir}")
+    def sanitize_log_data(self, data: Any) -> Any:
+        """Sanitize log data by replacing large binary data with placeholders.
+        Args:
+            data: Data to sanitize
+        Returns:
+            Sanitized copy of the data
+        """
+        if isinstance(data, dict):
+            result = {}
+            for k, v in data.items():
+                result[k] = self.sanitize_log_data(v)
+            return result
+        elif isinstance(data, list):
+            return [self.sanitize_log_data(item) for item in data]
+        elif isinstance(data, str) and len(data) > 1000 and "base64" in data.lower():
+            return f"[BASE64_DATA_LENGTH_{len(data)}]"
+        else:
+            return data
+    def save_screenshot(self, img_base64: str, action_type: str = "") -> None:
+        """Save a screenshot to the experiment directory.
+        Args:
+            img_base64: Base64 encoded screenshot
+            action_type: Type of action that triggered the screenshot
+        """
+        if not self.current_turn_dir:
+            return
+        try:
+            # Increment screenshot counter
+            self.screenshot_count += 1
+            # Create a descriptive filename
+            timestamp = int(datetime.now().timestamp() * 1000)
+            action_suffix = f"_{action_type}" if action_type else ""
+            filename = f"screenshot_{self.screenshot_count:03d}{action_suffix}_{timestamp}.png"
+            # Save directly to the turn directory
+            filepath = os.path.join(self.current_turn_dir, filename)
+            # Save the screenshot
+            img_data = base64.b64decode(img_base64)
+            with open(filepath, "wb") as f:
+                f.write(img_data)
+            # Keep track of the file path
+            self.screenshot_paths.append(filepath)
+            return filepath
+        except Exception as e:
+            logger.error(f"Error saving screenshot: {str(e)}")
+            return None
+    def save_action_visualization(
+        self, img: Image.Image, action_name: str, details: str = ""
+    ) -> str:
+        """Save a visualization of an action.
+        Args:
+            img: Image to save
+            action_name: Name of the action
+            details: Additional details about the action
+        Returns:
+            Path to the saved image
+        """
+        if not self.current_turn_dir:
+            return ""
+        try:
+            # Create a descriptive filename
+            timestamp = int(datetime.now().timestamp() * 1000)
+            details_suffix = f"_{details}" if details else ""
+            filename = f"vis_{action_name}{details_suffix}_{timestamp}.png"
+            # Save directly to the turn directory
+            filepath = os.path.join(self.current_turn_dir, filename)
+            # Save the image
+            img.save(filepath)
+            # Keep track of the file path
+            self.screenshot_paths.append(filepath)
+            return filepath
+        except Exception as e:
+            logger.error(f"Error saving action visualization: {str(e)}")
+            return ""
+    def log_api_call(
+        self,
+        call_type: str,
+        request: Any,
+        provider: str = "unknown",
+        model: str = "unknown",
+        response: Any = None,
+        error: Optional[Exception] = None,
+    ) -> None:
+        """Log API call details to file.
+        Args:
+            call_type: Type of API call (request, response, error)
+            request: Request data
+            provider: API provider name
+            model: Model name
+            response: Response data (for response logs)
+            error: Error information (for error logs)
+        """
+        if not self.current_turn_dir:
+            logger.warning("Cannot log API call: current_turn_dir not set")
+            return
+        try:
+            # Create a timestamp for the log file
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            # Create filename based on log type
+            filename = f"api_call_{timestamp}_{call_type}.json"
+            filepath = os.path.join(self.current_turn_dir, filename)
+            # Sanitize data before logging
+            sanitized_request = self.sanitize_log_data(request)
+            sanitized_response = self.sanitize_log_data(response) if response is not None else None
+            # Prepare log data
+            log_data = {
+                "timestamp": timestamp,
+                "provider": provider,
+                "model": model,
+                "type": call_type,
+                "request": sanitized_request,
+            }
+            if sanitized_response is not None:
+                log_data["response"] = sanitized_response
+            if error is not None:
+                log_data["error"] = str(error)
+            # Write to file
+            with open(filepath, "w") as f:
+                json.dump(log_data, f, indent=2, default=str)
+            logger.info(f"Logged API {call_type} to {filepath}")
+        except Exception as e:
+            logger.error(f"Error logging API call: {str(e)}")

agent/core/factory.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Factory for creating provider-specific agents."""
+from typing import Optional, Dict, Any, List
+from computer import Computer
+from ..types.base import Provider
+from .base_agent import BaseComputerAgent
+# Import provider-specific implementations
+_ANTHROPIC_AVAILABLE = False
+_OPENAI_AVAILABLE = False
+_OLLAMA_AVAILABLE = False
+_OMNI_AVAILABLE = False
+# Try importing providers
+try:
+    import anthropic
+    from ..providers.anthropic.agent import AnthropicComputerAgent
+    _ANTHROPIC_AVAILABLE = True
+except ImportError:
+    pass
+try:
+    import openai
+    _OPENAI_AVAILABLE = True
+except ImportError:
+    pass
+try:
+    from ..providers.omni.agent import OmniComputerAgent
+    _OMNI_AVAILABLE = True
+except ImportError:
+    pass
+class AgentFactory:
+    """Factory for creating provider-specific agent implementations."""
+    @staticmethod
+    def create(
+        provider: Provider, computer: Optional[Computer] = None, **kwargs: Any
+    ) -> BaseComputerAgent:
+        """Create an agent based on the specified provider.
+        Args:
+            provider: The AI provider to use
+            computer: Optional Computer instance
+            **kwargs: Additional provider-specific arguments
+        Returns:
+            A provider-specific agent implementation
+        Raises:
+            ImportError: If provider dependencies are not installed
+            ValueError: If provider is not supported
+        """
+        # Create a Computer instance if none is provided
+        if computer is None:
+            computer = Computer()
+        if provider == Provider.ANTHROPIC:
+            if not _ANTHROPIC_AVAILABLE:
+                raise ImportError(
+                    "Anthropic provider requires additional dependencies. "
+                    "Install them with: pip install cua-agent[anthropic]"
+                )
+            return AnthropicComputerAgent(max_retries=3, computer=computer, **kwargs)
+        elif provider == Provider.OPENAI:
+            if not _OPENAI_AVAILABLE:
+                raise ImportError(
+                    "OpenAI provider requires additional dependencies. "
+                    "Install them with: pip install cua-agent[openai]"
+                )
+            raise NotImplementedError("OpenAI provider not yet implemented")
+        elif provider == Provider.OLLAMA:
+            if not _OLLAMA_AVAILABLE:
+                raise ImportError(
+                    "Ollama provider requires additional dependencies. "
+                    "Install them with: pip install cua-agent[ollama]"
+                )
+            # Only import ollama when actually creating an Ollama agent
+            try:
+                import ollama
+                from ..providers.ollama.agent import OllamaComputerAgent
+                return OllamaComputerAgent(max_retries=3, computer=computer, **kwargs)
+            except ImportError:
+                raise ImportError(
+                    "Failed to import ollama package. " "Install it with: pip install ollama"
+                )
+        elif provider == Provider.OMNI:
+            if not _OMNI_AVAILABLE:
+                raise ImportError(
+                    "Omni provider requires additional dependencies. "
+                    "Install them with: pip install cua-agent[omni]"
+                )
+            return OmniComputerAgent(max_retries=3, computer=computer, **kwargs)
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")