PyPI - cua-agent - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.17__py3-none-any.whl - Mend

cua-agent 0.1.5py3-none-any.whl → 0.1.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (52) hide show

agent/__init__.py +3 -4
agent/core/__init__.py +3 -10
agent/core/computer_agent.py +207 -32
agent/core/experiment.py +20 -3
agent/core/loop.py +78 -120
agent/core/messages.py +279 -125
agent/core/telemetry.py +44 -32
agent/core/types.py +35 -0
agent/core/visualization.py +197 -0
agent/providers/anthropic/api/client.py +142 -1
agent/providers/anthropic/api_handler.py +140 -0
agent/providers/anthropic/callbacks/__init__.py +5 -0
agent/providers/anthropic/loop.py +224 -209
agent/providers/anthropic/messages/manager.py +3 -1
agent/providers/anthropic/response_handler.py +229 -0
agent/providers/anthropic/tools/base.py +1 -1
agent/providers/anthropic/tools/bash.py +0 -97
agent/providers/anthropic/tools/collection.py +2 -2
agent/providers/anthropic/tools/computer.py +34 -24
agent/providers/anthropic/tools/manager.py +2 -2
agent/providers/anthropic/utils.py +370 -0
agent/providers/omni/__init__.py +1 -20
agent/providers/omni/api_handler.py +42 -0
agent/providers/omni/clients/anthropic.py +4 -0
agent/providers/omni/image_utils.py +0 -72
agent/providers/omni/loop.py +497 -607
agent/providers/omni/parser.py +60 -5
agent/providers/omni/tools/__init__.py +25 -8
agent/providers/omni/tools/base.py +29 -0
agent/providers/omni/tools/bash.py +43 -38
agent/providers/omni/tools/computer.py +144 -181
agent/providers/omni/tools/manager.py +26 -48
agent/providers/omni/types.py +0 -4
agent/providers/omni/utils.py +225 -144
{cua_agent-0.1.5.dist-info → cua_agent-0.1.17.dist-info}/METADATA +6 -36
cua_agent-0.1.17.dist-info/RECORD +63 -0
agent/core/agent.py +0 -252
agent/core/base_agent.py +0 -164
agent/core/factory.py +0 -102
agent/providers/omni/callbacks.py +0 -78
agent/providers/omni/clients/groq.py +0 -101
agent/providers/omni/experiment.py +0 -273
agent/providers/omni/messages.py +0 -171
agent/providers/omni/tool_manager.py +0 -91
agent/providers/omni/visualization.py +0 -130
agent/types/__init__.py +0 -26
agent/types/base.py +0 -53
agent/types/messages.py +0 -36
cua_agent-0.1.5.dist-info/RECORD +0 -67
/agent/{types → core}/tools.py +0 -0
{cua_agent-0.1.5.dist-info → cua_agent-0.1.17.dist-info}/WHEEL +0 -0
{cua_agent-0.1.5.dist-info → cua_agent-0.1.17.dist-info}/entry_points.txt +0 -0

agent/core/messages.py CHANGED Viewed

@@ -1,12 +1,11 @@
 """Message handling utilities for agent."""
-import base64
-from datetime import datetime
-from io import BytesIO
 import logging
-from typing import Any, Dict, List, Optional, Union
-from PIL import Image
+import json
+from typing import Any, Dict, List, Optional, Union, Tuple
 from dataclasses import dataclass
+import re
+from ..providers.omni.parser import ParseResult
 logger = logging.getLogger(__name__)
@@ -123,123 +122,278 @@ class BaseMessageManager:
                     break
-def create_user_message(text: str) -> Dict[str, str]:
-    """Create a user message.
-    Args:
-        text: The message text
-    Returns:
-        Message dictionary
-    """
-    return {
-        "role": "user",
-        "content": text,
-    }
-def create_assistant_message(text: str) -> Dict[str, str]:
-    """Create an assistant message.
-    Args:
-        text: The message text
-    Returns:
-        Message dictionary
-    """
-    return {
-        "role": "assistant",
-        "content": text,
-    }
-def create_system_message(text: str) -> Dict[str, str]:
-    """Create a system message.
-    Args:
-        text: The message text
-    Returns:
-        Message dictionary
-    """
-    return {
-        "role": "system",
-        "content": text,
-    }
-def create_image_message(
-    image_base64: Optional[str] = None,
-    image_path: Optional[str] = None,
-    image_obj: Optional[Image.Image] = None,
-) -> Dict[str, Union[str, List[Dict[str, Any]]]]:
-    """Create a message with an image.
-    Args:
-        image_base64: Base64 encoded image
-        image_path: Path to image file
-        image_obj: PIL Image object
-    Returns:
-        Message dictionary with content list
-    Raises:
-        ValueError: If no image source is provided
-    """
-    if not any([image_base64, image_path, image_obj]):
-        raise ValueError("Must provide one of image_base64, image_path, or image_obj")
-    # Convert to base64 if needed
-    if image_path and not image_base64:
-        with open(image_path, "rb") as f:
-            image_bytes = f.read()
-            image_base64 = base64.b64encode(image_bytes).decode("utf-8")
-    elif image_obj and not image_base64:
-        buffer = BytesIO()
-        image_obj.save(buffer, format="PNG")
-        image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
-    return {
-        "role": "user",
-        "content": [
-            {"type": "image", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
-        ],
-    }
-def create_screen_message(
-    parsed_screen: Dict[str, Any],
-    include_raw: bool = False,
-) -> Dict[str, Union[str, List[Dict[str, Any]]]]:
-    """Create a message with screen information.
-    Args:
-        parsed_screen: Dictionary containing parsed screen info
-        include_raw: Whether to include raw screenshot base64
-    Returns:
-        Message dictionary with content
-    """
-    if include_raw and "screenshot_base64" in parsed_screen:
-        # Create content list with both image and text
-        return {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image",
-                    "image_url": {
-                        "url": f"data:image/png;base64,{parsed_screen['screenshot_base64']}"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": f"Screen dimensions: {parsed_screen['width']}x{parsed_screen['height']}",
-                },
-            ],
-        }
-    else:
-        # Create text-only message with screen info
-        return {
-            "role": "user",
-            "content": f"Screen dimensions: {parsed_screen['width']}x{parsed_screen['height']}",
-        }
+class StandardMessageManager:
+    """Manages messages in a standardized OpenAI format across different providers."""
+    def __init__(self, config: Optional[ImageRetentionConfig] = None):
+        """Initialize message manager.
+        Args:
+            config: Configuration for image retention
+        """
+        self.messages: List[Dict[str, Any]] = []
+        self.config = config or ImageRetentionConfig()
+    def add_user_message(self, content: Union[str, List[Dict[str, Any]]]) -> None:
+        """Add a user message.
+        Args:
+            content: Message content (text or multimodal content)
+        """
+        self.messages.append({"role": "user", "content": content})
+    def add_assistant_message(self, content: Union[str, List[Dict[str, Any]]]) -> None:
+        """Add an assistant message.
+        Args:
+            content: Message content (text or multimodal content)
+        """
+        self.messages.append({"role": "assistant", "content": content})
+    def add_system_message(self, content: str) -> None:
+        """Add a system message.
+        Args:
+            content: System message content
+        """
+        self.messages.append({"role": "system", "content": content})
+    def get_messages(self) -> List[Dict[str, Any]]:
+        """Get all messages in standard format.
+        Returns:
+            List of messages
+        """
+        # If image retention is configured, apply it
+        if self.config.num_images_to_keep is not None:
+            return self._apply_image_retention(self.messages)
+        return self.messages
+    def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Apply image retention policy to messages.
+        Args:
+            messages: List of messages
+        Returns:
+            List of messages with image retention applied
+        """
+        if not self.config.num_images_to_keep:
+            return messages
+        # Find user messages with images
+        image_messages = []
+        for msg in messages:
+            if msg["role"] == "user" and isinstance(msg["content"], list):
+                has_image = any(
+                    item.get("type") == "image_url" or item.get("type") == "image"
+                    for item in msg["content"]
+                )
+                if has_image:
+                    image_messages.append(msg)
+        # If we don't have more images than the limit, return all messages
+        if len(image_messages) <= self.config.num_images_to_keep:
+            return messages
+        # Get the most recent N images to keep
+        images_to_keep = image_messages[-self.config.num_images_to_keep :]
+        images_to_remove = image_messages[: -self.config.num_images_to_keep]
+        # Create a new message list without the older images
+        result = []
+        for msg in messages:
+            if msg in images_to_remove:
+                # Skip this message
+                continue
+            result.append(msg)
+        return result
+    def to_anthropic_format(
+        self, messages: List[Dict[str, Any]]
+    ) -> Tuple[List[Dict[str, Any]], str]:
+        """Convert standard OpenAI format messages to Anthropic format.
+        Args:
+            messages: List of messages in OpenAI format
+        Returns:
+            Tuple containing (anthropic_messages, system_content)
+        """
+        result = []
+        system_content = ""
+        # Process messages in order to maintain conversation flow
+        previous_assistant_tool_use_ids = (
+            set()
+        )  # Track tool_use_ids in the previous assistant message
+        for i, msg in enumerate(messages):
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "system":
+                # Collect system messages for later use
+                system_content += content + "\n"
+                continue
+            if role == "assistant":
+                # Track tool_use_ids in this assistant message for the next user message
+                previous_assistant_tool_use_ids = set()
+                if isinstance(content, list):
+                    for item in content:
+                        if (
+                            isinstance(item, dict)
+                            and item.get("type") == "tool_use"
+                            and "id" in item
+                        ):
+                            previous_assistant_tool_use_ids.add(item["id"])
+                logger.info(
+                    f"Tool use IDs in assistant message #{i}: {previous_assistant_tool_use_ids}"
+                )
+            if role in ["user", "assistant"]:
+                anthropic_msg = {"role": role}
+                # Convert content based on type
+                if isinstance(content, str):
+                    # Simple text content
+                    anthropic_msg["content"] = [{"type": "text", "text": content}]
+                elif isinstance(content, list):
+                    # Convert complex content
+                    anthropic_content = []
+                    for item in content:
+                        item_type = item.get("type", "")
+                        if item_type == "text":
+                            anthropic_content.append({"type": "text", "text": item.get("text", "")})
+                        elif item_type == "image_url":
+                            # Convert OpenAI image format to Anthropic
+                            image_url = item.get("image_url", {}).get("url", "")
+                            if image_url.startswith("data:"):
+                                # Extract base64 data and media type
+                                match = re.match(r"data:(.+);base64,(.+)", image_url)
+                                if match:
+                                    media_type, data = match.groups()
+                                    anthropic_content.append(
+                                        {
+                                            "type": "image",
+                                            "source": {
+                                                "type": "base64",
+                                                "media_type": media_type,
+                                                "data": data,
+                                            },
+                                        }
+                                    )
+                            else:
+                                # Regular URL
+                                anthropic_content.append(
+                                    {
+                                        "type": "image",
+                                        "source": {
+                                            "type": "url",
+                                            "url": image_url,
+                                        },
+                                    }
+                                )
+                        elif item_type == "tool_use":
+                            # Always include tool_use blocks
+                            anthropic_content.append(item)
+                        elif item_type == "tool_result":
+                            # Check if this is a user message AND if the tool_use_id exists in the previous assistant message
+                            tool_use_id = item.get("tool_use_id")
+                            # Only include tool_result if it references a tool_use from the immediately preceding assistant message
+                            if (
+                                role == "user"
+                                and tool_use_id
+                                and tool_use_id in previous_assistant_tool_use_ids
+                            ):
+                                anthropic_content.append(item)
+                                logger.info(
+                                    f"Including tool_result with tool_use_id: {tool_use_id}"
+                                )
+                            else:
+                                # Convert to text to preserve information
+                                logger.warning(
+                                    f"Converting tool_result to text. Tool use ID {tool_use_id} not found in previous assistant message"
+                                )
+                                content_text = "Tool Result: "
+                                if "content" in item:
+                                    if isinstance(item["content"], list):
+                                        for content_item in item["content"]:
+                                            if (
+                                                isinstance(content_item, dict)
+                                                and content_item.get("type") == "text"
+                                            ):
+                                                content_text += content_item.get("text", "")
+                                    elif isinstance(item["content"], str):
+                                        content_text += item["content"]
+                                anthropic_content.append({"type": "text", "text": content_text})
+                    anthropic_msg["content"] = anthropic_content
+                result.append(anthropic_msg)
+        return result, system_content
+    def from_anthropic_format(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Convert Anthropic format messages to standard OpenAI format.
+        Args:
+            messages: List of messages in Anthropic format
+        Returns:
+            List of messages in OpenAI format
+        """
+        result = []
+        for msg in messages:
+            role = msg.get("role", "")
+            content = msg.get("content", [])
+            if role in ["user", "assistant"]:
+                openai_msg = {"role": role}
+                # Simple case: single text block
+                if len(content) == 1 and content[0].get("type") == "text":
+                    openai_msg["content"] = content[0].get("text", "")
+                else:
+                    # Complex case: multiple blocks or non-text
+                    openai_content = []
+                    for item in content:
+                        item_type = item.get("type", "")
+                        if item_type == "text":
+                            openai_content.append({"type": "text", "text": item.get("text", "")})
+                        elif item_type == "image":
+                            # Convert Anthropic image to OpenAI format
+                            source = item.get("source", {})
+                            if source.get("type") == "base64":
+                                media_type = source.get("media_type", "image/png")
+                                data = source.get("data", "")
+                                openai_content.append(
+                                    {
+                                        "type": "image_url",
+                                        "image_url": {"url": f"data:{media_type};base64,{data}"},
+                                    }
+                                )
+                            else:
+                                # URL
+                                openai_content.append(
+                                    {
+                                        "type": "image_url",
+                                        "image_url": {"url": source.get("url", "")},
+                                    }
+                                )
+                        elif item_type in ["tool_use", "tool_result"]:
+                            # Pass through tool-related content
+                            openai_content.append(item)
+                    openai_msg["content"] = openai_content
+                result.append(openai_msg)
+        return result

agent/core/telemetry.py CHANGED Viewed

@@ -4,58 +4,70 @@ import logging
 import os
 import platform
 import sys
-from typing import Dict, Any
+from typing import Dict, Any, Callable
 # Import the core telemetry module
 TELEMETRY_AVAILABLE = False
+# Local fallbacks in case core telemetry isn't available
+def _noop(*args: Any, **kwargs: Any) -> None:
+    """No-op function for when telemetry is not available."""
+    pass
+# Define default functions with unique names to avoid shadowing
+_default_record_event = _noop
+_default_increment_counter = _noop
+_default_set_dimension = _noop
+_default_get_telemetry_client = lambda: None
+_default_flush = _noop
+_default_is_telemetry_enabled = lambda: False
+_default_is_telemetry_globally_disabled = lambda: True
+# Set the actual functions to the defaults initially
+record_event = _default_record_event
+increment_counter = _default_increment_counter
+set_dimension = _default_set_dimension
+get_telemetry_client = _default_get_telemetry_client
+flush = _default_flush
+is_telemetry_enabled = _default_is_telemetry_enabled
+is_telemetry_globally_disabled = _default_is_telemetry_globally_disabled
+logger = logging.getLogger("cua.agent.telemetry")
 try:
+    # Import from core telemetry
     from core.telemetry import (
-        record_event,
-        increment,
-        get_telemetry_client,
-        flush,
-        is_telemetry_enabled,
-        is_telemetry_globally_disabled,
+        record_event as core_record_event,
+        increment as core_increment,
+        get_telemetry_client as core_get_telemetry_client,
+        flush as core_flush,
+        is_telemetry_enabled as core_is_telemetry_enabled,
+        is_telemetry_globally_disabled as core_is_telemetry_globally_disabled,
     )
+    # Override the default functions with actual implementations
+    record_event = core_record_event
+    get_telemetry_client = core_get_telemetry_client
+    flush = core_flush
+    is_telemetry_enabled = core_is_telemetry_enabled
+    is_telemetry_globally_disabled = core_is_telemetry_globally_disabled
     def increment_counter(counter_name: str, value: int = 1) -> None:
         """Wrapper for increment to maintain backward compatibility."""
         if is_telemetry_enabled():
-            increment(counter_name, value)
+            core_increment(counter_name, value)
     def set_dimension(name: str, value: Any) -> None:
         """Set a dimension that will be attached to all events."""
-        logger = logging.getLogger("cua.agent.telemetry")
         logger.debug(f"Setting dimension {name}={value}")
     TELEMETRY_AVAILABLE = True
-    logger = logging.getLogger("cua.agent.telemetry")
     logger.info("Successfully imported telemetry")
 except ImportError as e:
-    logger = logging.getLogger("cua.agent.telemetry")
     logger.warning(f"Could not import telemetry: {e}")
-    TELEMETRY_AVAILABLE = False
-# Local fallbacks in case core telemetry isn't available
-def _noop(*args: Any, **kwargs: Any) -> None:
-    """No-op function for when telemetry is not available."""
-    pass
-logger = logging.getLogger("cua.agent.telemetry")
-# If telemetry isn't available, use no-op functions
-if not TELEMETRY_AVAILABLE:
     logger.debug("Telemetry not available, using no-op functions")
-    record_event = _noop  # type: ignore
-    increment_counter = _noop  # type: ignore
-    set_dimension = _noop  # type: ignore
-    get_telemetry_client = lambda: None  # type: ignore
-    flush = _noop  # type: ignore
-    is_telemetry_enabled = lambda: False  # type: ignore
-    is_telemetry_globally_disabled = lambda: True  # type: ignore
 # Get system info once to use in telemetry
 SYSTEM_INFO = {
@@ -71,7 +83,7 @@ def enable_telemetry() -> bool:
     Returns:
         bool: True if telemetry was successfully enabled, False otherwise
     """
-    global TELEMETRY_AVAILABLE
+    global TELEMETRY_AVAILABLE, record_event, increment_counter, get_telemetry_client, flush, is_telemetry_enabled, is_telemetry_globally_disabled
     # Check if globally disabled using core function
     if TELEMETRY_AVAILABLE and is_telemetry_globally_disabled():

agent/core/types.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Core type definitions."""
+from typing import Any, Dict, List, Optional, TypedDict, Union
+class AgentResponse(TypedDict, total=False):
+    """Agent response format."""
+    id: str
+    object: str
+    created_at: int
+    status: str
+    error: Optional[str]
+    incomplete_details: Optional[Any]
+    instructions: Optional[Any]
+    max_output_tokens: Optional[int]
+    model: str
+    output: List[Dict[str, Any]]
+    parallel_tool_calls: bool
+    previous_response_id: Optional[str]
+    reasoning: Dict[str, str]
+    store: bool
+    temperature: float
+    text: Dict[str, Dict[str, str]]
+    tool_choice: str
+    tools: List[Dict[str, Union[str, int]]]
+    top_p: float
+    truncation: str
+    usage: Dict[str, Any]
+    user: Optional[str]
+    metadata: Dict[str, Any]
+    response: Dict[str, List[Dict[str, Any]]]
+    # Additional fields for error responses
+    role: str
+    content: Union[str, List[Dict[str, Any]]]

cua-agent 0.1.5__py3-none-any.whl → 0.1.17__py3-none-any.whl

Potentially problematic release.

cua-agent 0.1.5py3-none-any.whl → 0.1.17py3-none-any.whl