PyPI - cua-agent - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

cua-agent 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (18) hide show

agent/__init__.py +3 -3
agent/core/README.md +2 -2
agent/core/agent.py +78 -35
agent/core/messages.py +15 -0
agent/providers/anthropic/__init__.py +2 -2
agent/providers/anthropic/api/client.py +43 -46
agent/providers/anthropic/loop.py +2 -2
agent/providers/anthropic/types.py +5 -5
agent/providers/omni/__init__.py +2 -2
agent/providers/omni/loop.py +17 -13
agent/providers/omni/messages.py +3 -0
agent/providers/omni/prompts.py +0 -14
agent/providers/omni/types.py +35 -13
agent/types/base.py +2 -1
{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/METADATA +1 -1
{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/RECORD +18 -18
{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/WHEEL +0 -0
{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/entry_points.txt +0 -0

agent/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ __version__ = "0.1.0"
 from .core.factory import AgentFactory
 from .core.agent import ComputerAgent
-from .types.base import Provider, AgenticLoop
-from .providers.omni.types import APIProvider
+from .providers.omni.types import LLMProvider, LLM
+from .types.base import Provider, AgentLoop
-__all__ = ["AgentFactory", "Provider", "ComputerAgent", "AgenticLoop", "APIProvider"]
+__all__ = ["AgentFactory", "Provider", "ComputerAgent", "AgentLoop", "LLMProvider", "LLM"]

agent/core/README.md CHANGED Viewed

@@ -34,7 +34,7 @@ Here's how to use the unified ComputerAgent:
 ```python
 from agent.core.agent import ComputerAgent
 from agent.types.base import AgenticLoop
-from agent.providers.omni.types import APIProvider
+from agent.providers.omni.types import LLMProvider
 from computer import Computer
 # Create a Computer instance
@@ -44,7 +44,7 @@ computer = Computer()
 agent = ComputerAgent(
     computer=computer,
     loop_type=AgenticLoop.OMNI,
-    provider=APIProvider.OPENAI,
+    provider=LLMProvider.OPENAI,
     model="gpt-4o",
     api_key="your_api_key_here",  # Can also use OPENAI_API_KEY environment variable
     save_trajectory=True,

agent/core/agent.py CHANGED Viewed

@@ -3,12 +3,12 @@
 import os
 import logging
 import asyncio
-from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING
+from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING, Union, cast
 from datetime import datetime
 from computer import Computer
-from ..types.base import Provider, AgenticLoop
+from ..types.base import Provider, AgentLoop
 from .base_agent import BaseComputerAgent
 # Only import types for type checking to avoid circular imports
@@ -17,23 +17,23 @@ if TYPE_CHECKING:
     from ..providers.omni.loop import OmniLoop
     from ..providers.omni.parser import OmniParser
-# Import the APIProvider enum without importing the whole module
-from ..providers.omni.types import APIProvider
+# Import the provider types
+from ..providers.omni.types import LLMProvider, LLM, Model, LLMModel
 logger = logging.getLogger(__name__)
 # Default models for different providers
 DEFAULT_MODELS = {
-    APIProvider.OPENAI: "gpt-4o",
-    APIProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
-    APIProvider.GROQ: "llama3-70b-8192",
+    LLMProvider.OPENAI: "gpt-4o",
+    LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
+    LLMProvider.GROQ: "llama3-70b-8192",
 }
 # Map providers to their environment variable names
 ENV_VARS = {
-    APIProvider.OPENAI: "OPENAI_API_KEY",
-    APIProvider.GROQ: "GROQ_API_KEY",
-    APIProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
+    LLMProvider.OPENAI: "OPENAI_API_KEY",
+    LLMProvider.GROQ: "GROQ_API_KEY",
+    LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
 }
@@ -47,10 +47,9 @@ class ComputerAgent(BaseComputerAgent):
     def __init__(
         self,
         computer: Computer,
-        loop_type: AgenticLoop = AgenticLoop.OMNI,
-        ai_provider: APIProvider = APIProvider.OPENAI,
+        loop: AgentLoop = AgentLoop.OMNI,
+        model: Optional[Union[LLM, Dict[str, str], str]] = None,
         api_key: Optional[str] = None,
-        model: Optional[str] = None,
         save_trajectory: bool = True,
         trajectory_dir: Optional[str] = "trajectories",
         only_n_most_recent_images: Optional[int] = None,
@@ -62,10 +61,13 @@ class ComputerAgent(BaseComputerAgent):
         Args:
             computer: Computer instance to control
-            loop_type: The type of loop to use (Anthropic or Omni)
-            ai_provider: AI provider to use (required for Cua loop)
+            loop: The type of loop to use (Anthropic or Omni)
+            model: LLM configuration. Can be:
+                  - LLM object with provider and name
+                  - Dict with 'provider' and 'name' keys
+                  - String with model name (defaults to OpenAI provider)
+                  - None (defaults based on loop)
             api_key: Optional API key (will use environment variable if not provided)
-            model: Optional model name (will use provider default if not specified)
             save_trajectory: Whether to save screenshots and logs
             trajectory_dir: Directory to save trajectories (defaults to "trajectories")
             only_n_most_recent_images: Limit history to N most recent images
@@ -87,8 +89,7 @@ class ComputerAgent(BaseComputerAgent):
             **kwargs,
         )
-        self.loop_type = loop_type
-        self.provider = ai_provider
+        self.loop_type = loop
         self.save_trajectory = save_trajectory
         self.trajectory_dir = trajectory_dir
         self.only_n_most_recent_images = only_n_most_recent_images
@@ -98,14 +99,19 @@ class ComputerAgent(BaseComputerAgent):
         # Configure logging based on verbosity
         self._configure_logging(verbosity)
+        # Process model configuration
+        self.model_config = self._process_model_config(model, loop)
         # Get API key from environment if not provided
         if api_key is None:
             env_var = (
-                ENV_VARS.get(ai_provider) if loop_type == AgenticLoop.OMNI else "ANTHROPIC_API_KEY"
+                ENV_VARS.get(self.model_config.provider)
+                if loop == AgentLoop.OMNI
+                else "ANTHROPIC_API_KEY"
             )
             if not env_var:
                 raise ValueError(
-                    f"Unsupported provider: {ai_provider}. Please use one of: {list(ENV_VARS.keys())}"
+                    f"Unsupported provider: {self.model_config.provider}. Please use one of: {list(ENV_VARS.keys())}"
                 )
             api_key = os.environ.get(env_var)
@@ -119,18 +125,49 @@ class ComputerAgent(BaseComputerAgent):
                 )
         self.api_key = api_key
-        # Set model based on provider if not specified
-        if model is None:
-            if loop_type == AgenticLoop.OMNI:
-                self.model = DEFAULT_MODELS[ai_provider]
-            else:  # Anthropic loop
-                self.model = DEFAULT_MODELS[APIProvider.ANTHROPIC]
-        else:
-            self.model = model
         # Initialize the appropriate loop based on loop_type
         self.loop = self._init_loop()
+    def _process_model_config(
+        self, model_input: Optional[Union[LLM, Dict[str, str], str]], loop: AgentLoop
+    ) -> LLM:
+        """Process and normalize model configuration.
+        Args:
+            model_input: Input model configuration (LLM, dict, string, or None)
+            loop: The loop type being used
+        Returns:
+            Normalized LLM instance
+        """
+        # Handle case where model_input is None
+        if model_input is None:
+            # Use Anthropic for Anthropic loop, OpenAI for Omni loop
+            default_provider = (
+                LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
+            )
+            return LLM(provider=default_provider)
+        # Handle case where model_input is already a LLM or one of its aliases
+        if isinstance(model_input, (LLM, Model, LLMModel)):
+            return model_input
+        # Handle case where model_input is a dict
+        if isinstance(model_input, dict):
+            provider = model_input.get("provider", LLMProvider.OPENAI)
+            if isinstance(provider, str):
+                provider = LLMProvider(provider)
+            return LLM(provider=provider, name=model_input.get("name"))
+        # Handle case where model_input is a string (model name)
+        if isinstance(model_input, str):
+            default_provider = (
+                LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
+            )
+            return LLM(provider=default_provider, name=model_input)
+        raise ValueError(f"Unsupported model configuration: {model_input}")
     def _configure_logging(self, verbosity: int):
         """Configure logging based on verbosity level."""
         # Use the logging level directly without mapping
@@ -159,12 +196,15 @@ class ComputerAgent(BaseComputerAgent):
         from ..providers.omni.loop import OmniLoop
         from ..providers.omni.parser import OmniParser
-        if self.loop_type == AgenticLoop.ANTHROPIC:
+        if self.loop_type == AgentLoop.ANTHROPIC:
             from ..providers.anthropic.loop import AnthropicLoop
+            # Ensure we always have a valid model name
+            model_name = self.model_config.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
             return AnthropicLoop(
                 api_key=self.api_key,
-                model=self.model,
+                model=model_name,
                 computer=self.computer,
                 save_trajectory=self.save_trajectory,
                 base_dir=self.trajectory_dir,
@@ -176,10 +216,13 @@ class ComputerAgent(BaseComputerAgent):
         if "parser" not in self._kwargs:
             self._kwargs["parser"] = OmniParser()
+        # Ensure we always have a valid model name
+        model_name = self.model_config.name or DEFAULT_MODELS[self.model_config.provider]
         return OmniLoop(
-            provider=self.provider,
+            provider=self.model_config.provider,
             api_key=self.api_key,
-            model=self.model,
+            model=model_name,
             computer=self.computer,
             save_trajectory=self.save_trajectory,
             base_dir=self.trajectory_dir,
@@ -198,7 +241,7 @@ class ComputerAgent(BaseComputerAgent):
         """
         try:
             # Format the messages based on loop type
-            if self.loop_type == AgenticLoop.ANTHROPIC:
+            if self.loop_type == AgentLoop.ANTHROPIC:
                 # Anthropic format
                 messages = [{"role": "user", "content": [{"type": "text", "text": task}]}]
             else:
@@ -221,7 +264,7 @@ class ComputerAgent(BaseComputerAgent):
                         continue
                     # Extract content and metadata based on loop type
-                    if self.loop_type == AgenticLoop.ANTHROPIC:
+                    if self.loop_type == AgentLoop.ANTHROPIC:
                         # Handle Anthropic format
                         if "content" in result:
                             content_text = ""

agent/core/messages.py CHANGED Viewed

@@ -37,6 +37,17 @@ class BaseMessageManager:
         if self.image_retention_config.min_removal_threshold < 1:
             raise ValueError("min_removal_threshold must be at least 1")
+        # Track provider for message formatting
+        self.provider = "openai"  # Default provider
+    def set_provider(self, provider: str) -> None:
+        """Set the current provider to format messages for.
+        Args:
+            provider: Provider name (e.g., 'openai', 'anthropic')
+        """
+        self.provider = provider.lower()
     def prepare_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Prepare messages by applying image retention and caching as configured.
@@ -96,6 +107,10 @@ class BaseMessageManager:
         Args:
             messages: Messages to inject caching into
         """
+        # Only apply cache_control for Anthropic API, not OpenAI
+        if self.provider != "anthropic":
+            return
         # Default to caching last 3 turns
         turns_to_cache = 3
         for message in reversed(messages):

agent/providers/anthropic/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Anthropic provider implementation."""
 from .loop import AnthropicLoop
-from .types import APIProvider
+from .types import LLMProvider
-__all__ = ["AnthropicLoop", "APIProvider"]
+__all__ = ["AnthropicLoop", "LLMProvider"]

agent/providers/anthropic/api/client.py CHANGED Viewed

@@ -3,25 +3,28 @@ import httpx
 import asyncio
 from anthropic import Anthropic, AnthropicBedrock, AnthropicVertex
 from anthropic.types.beta import BetaMessage, BetaMessageParam, BetaToolUnionParam
-from ..types import APIProvider
+from ..types import LLMProvider
 from .logging import log_api_interaction
 import random
 import logging
 logger = logging.getLogger(__name__)
 class APIConnectionError(Exception):
     """Error raised when there are connection issues with the API."""
     pass
 class BaseAnthropicClient:
     """Base class for Anthropic API clients."""
     MAX_RETRIES = 10
     INITIAL_RETRY_DELAY = 1.0
     MAX_RETRY_DELAY = 60.0
     JITTER_FACTOR = 0.1
     async def create_message(
         self,
         *,
@@ -36,79 +39,67 @@ class BaseAnthropicClient:
     async def _make_api_call_with_retries(self, api_call):
         """Make an API call with exponential backoff retry logic.
         Args:
             api_call: Async function that makes the actual API call
         Returns:
             API response
         Raises:
             APIConnectionError: If all retries fail
         """
         retry_count = 0
         last_error = None
         while retry_count < self.MAX_RETRIES:
             try:
                 return await api_call()
             except Exception as e:
                 last_error = e
                 retry_count += 1
                 if retry_count == self.MAX_RETRIES:
                     break
                 # Calculate delay with exponential backoff and jitter
                 delay = min(
-                    self.INITIAL_RETRY_DELAY * (2 ** (retry_count - 1)),
-                    self.MAX_RETRY_DELAY
+                    self.INITIAL_RETRY_DELAY * (2 ** (retry_count - 1)), self.MAX_RETRY_DELAY
                 )
                 # Add jitter to avoid thundering herd
                 jitter = delay * self.JITTER_FACTOR * (2 * random.random() - 1)
                 final_delay = delay + jitter
                 logger.info(
                     f"Retrying request (attempt {retry_count}/{self.MAX_RETRIES}) "
                     f"in {final_delay:.2f} seconds after error: {str(e)}"
                 )
                 await asyncio.sleep(final_delay)
         raise APIConnectionError(
-            f"Failed after {self.MAX_RETRIES} retries. "
-            f"Last error: {str(last_error)}"
+            f"Failed after {self.MAX_RETRIES} retries. " f"Last error: {str(last_error)}"
         )
 class AnthropicDirectClient(BaseAnthropicClient):
     """Direct Anthropic API client implementation."""
     def __init__(self, api_key: str, model: str):
         self.model = model
-        self.client = Anthropic(
-            api_key=api_key,
-            http_client=self._create_http_client()
-        )
+        self.client = Anthropic(api_key=api_key, http_client=self._create_http_client())
     def _create_http_client(self) -> httpx.Client:
         """Create an HTTP client with appropriate settings."""
         return httpx.Client(
             verify=True,
-            timeout=httpx.Timeout(
-                connect=30.0,
-                read=300.0,
-                write=30.0,
-                pool=30.0
-            ),
+            timeout=httpx.Timeout(connect=30.0, read=300.0, write=30.0, pool=30.0),
             transport=httpx.HTTPTransport(
                 retries=3,
                 verify=True,
-                limits=httpx.Limits(
-                    max_keepalive_connections=5,
-                    max_connections=10
-                )
-            )
+                limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
+            ),
         )
     async def create_message(
         self,
         *,
@@ -119,6 +110,7 @@ class AnthropicDirectClient(BaseAnthropicClient):
         betas: list[str],
     ) -> BetaMessage:
         """Create a message using the direct Anthropic API with retry logic."""
         async def api_call():
             response = self.client.beta.messages.with_raw_response.create(
                 max_tokens=max_tokens,
@@ -130,20 +122,21 @@ class AnthropicDirectClient(BaseAnthropicClient):
             )
             log_api_interaction(response.http_response.request, response.http_response, None)
             return response.parse()
         try:
             return await self._make_api_call_with_retries(api_call)
         except Exception as e:
             log_api_interaction(None, None, e)
             raise
 class AnthropicVertexClient(BaseAnthropicClient):
     """Google Cloud Vertex AI implementation of Anthropic client."""
     def __init__(self, model: str):
         self.model = model
         self.client = AnthropicVertex()
     async def create_message(
         self,
         *,
@@ -154,6 +147,7 @@ class AnthropicVertexClient(BaseAnthropicClient):
         betas: list[str],
     ) -> BetaMessage:
         """Create a message using Vertex AI with retry logic."""
         async def api_call():
             response = self.client.beta.messages.with_raw_response.create(
                 max_tokens=max_tokens,
@@ -165,20 +159,21 @@ class AnthropicVertexClient(BaseAnthropicClient):
             )
             log_api_interaction(response.http_response.request, response.http_response, None)
             return response.parse()
         try:
             return await self._make_api_call_with_retries(api_call)
         except Exception as e:
             log_api_interaction(None, None, e)
             raise
 class AnthropicBedrockClient(BaseAnthropicClient):
     """AWS Bedrock implementation of Anthropic client."""
     def __init__(self, model: str):
         self.model = model
         self.client = AnthropicBedrock()
     async def create_message(
         self,
         *,
@@ -189,6 +184,7 @@ class AnthropicBedrockClient(BaseAnthropicClient):
         betas: list[str],
     ) -> BetaMessage:
         """Create a message using AWS Bedrock with retry logic."""
         async def api_call():
             response = self.client.beta.messages.with_raw_response.create(
                 max_tokens=max_tokens,
@@ -200,23 +196,24 @@ class AnthropicBedrockClient(BaseAnthropicClient):
             )
             log_api_interaction(response.http_response.request, response.http_response, None)
             return response.parse()
         try:
             return await self._make_api_call_with_retries(api_call)
         except Exception as e:
             log_api_interaction(None, None, e)
             raise
 class AnthropicClientFactory:
     """Factory for creating appropriate Anthropic client implementations."""
     @staticmethod
-    def create_client(provider: APIProvider, api_key: str, model: str) -> BaseAnthropicClient:
+    def create_client(provider: LLMProvider, api_key: str, model: str) -> BaseAnthropicClient:
         """Create an appropriate client based on the provider."""
-        if provider == APIProvider.ANTHROPIC:
+        if provider == LLMProvider.ANTHROPIC:
             return AnthropicDirectClient(api_key, model)
-        elif provider == APIProvider.VERTEX:
+        elif provider == LLMProvider.VERTEX:
             return AnthropicVertexClient(model)
-        elif provider == APIProvider.BEDROCK:
+        elif provider == LLMProvider.BEDROCK:
             return AnthropicBedrockClient(model)
-        raise ValueError(f"Unsupported provider: {provider}")
+        raise ValueError(f"Unsupported provider: {provider}")

agent/providers/anthropic/loop.py CHANGED Viewed

@@ -32,7 +32,7 @@ from .tools.manager import ToolManager
 from .messages.manager import MessageManager
 from .callbacks.manager import CallbackManager
 from .prompts import SYSTEM_PROMPT
-from .types import APIProvider
+from .types import LLMProvider
 from .tools import ToolResult
 # Constants
@@ -86,7 +86,7 @@ class AnthropicLoop(BaseLoop):
         self.model = "claude-3-7-sonnet-20250219"
         # Anthropic-specific attributes
-        self.provider = APIProvider.ANTHROPIC
+        self.provider = LLMProvider.ANTHROPIC
         self.client = None
         self.retry_count = 0
         self.tool_manager = None

agent/providers/anthropic/types.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from enum import StrEnum
-class APIProvider(StrEnum):
+class LLMProvider(StrEnum):
     """Enum for supported API providers."""
     ANTHROPIC = "anthropic"
@@ -9,8 +9,8 @@ class APIProvider(StrEnum):
     VERTEX = "vertex"
-PROVIDER_TO_DEFAULT_MODEL_NAME: dict[APIProvider, str] = {
-    APIProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
-    APIProvider.BEDROCK: "anthropic.claude-3-7-sonnet-20250219-v2:0",
-    APIProvider.VERTEX: "claude-3-5-sonnet-v2@20241022",
+PROVIDER_TO_DEFAULT_MODEL_NAME: dict[LLMProvider, str] = {
+    LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
+    LLMProvider.BEDROCK: "anthropic.claude-3-7-sonnet-20250219-v2:0",
+    LLMProvider.VERTEX: "claude-3-5-sonnet-v2@20241022",
 }

agent/providers/omni/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # The OmniComputerAgent has been replaced by the unified ComputerAgent
 # which can be found in agent.core.agent
-from .types import APIProvider
+from .types import LLMProvider
 from .experiment import ExperimentManager
 from .visualization import visualize_click, visualize_scroll, calculate_element_center
 from .image_utils import (
@@ -14,7 +14,7 @@ from .image_utils import (
 )
 __all__ = [
-    "APIProvider",
+    "LLMProvider",
     "ExperimentManager",
     "visualize_click",
     "visualize_scroll",

agent/providers/omni/loop.py CHANGED Viewed

@@ -17,7 +17,7 @@ import copy
 from .parser import OmniParser, ParseResult, ParserMetadata, UIElement
 from ...core.loop import BaseLoop
 from computer import Computer
-from .types import APIProvider
+from .types import LLMProvider
 from .clients.base import BaseOmniClient
 from .clients.openai import OpenAIClient
 from .clients.groq import GroqClient
@@ -46,7 +46,7 @@ class OmniLoop(BaseLoop):
     def __init__(
         self,
         parser: OmniParser,
-        provider: APIProvider,
+        provider: LLMProvider,
         api_key: str,
         model: str,
         computer: Computer,
@@ -180,11 +180,11 @@ class OmniLoop(BaseLoop):
         try:
             logger.info(f"Initializing {self.provider} client with model {self.model}...")
-            if self.provider == APIProvider.OPENAI:
+            if self.provider == LLMProvider.OPENAI:
                 self.client = OpenAIClient(api_key=self.api_key, model=self.model)
-            elif self.provider == APIProvider.GROQ:
+            elif self.provider == LLMProvider.GROQ:
                 self.client = GroqClient(api_key=self.api_key, model=self.model)
-            elif self.provider == APIProvider.ANTHROPIC:
+            elif self.provider == LLMProvider.ANTHROPIC:
                 self.client = AnthropicClient(
                     api_key=self.api_key,
                     model=self.model,
@@ -219,12 +219,16 @@ class OmniLoop(BaseLoop):
                     if self.client is None:
                         raise RuntimeError("Failed to initialize client")
+                # Set the provider in message manager based on current provider
+                provider_name = str(self.provider).split(".")[-1].lower()  # Extract name from enum
+                self.message_manager.set_provider(provider_name)
                 # Apply image retention and prepare messages
                 # This will limit the number of images based on only_n_most_recent_images
-                prepared_messages = self.message_manager.prepare_messages(messages.copy())
+                prepared_messages = self.message_manager.get_formatted_messages(provider_name)
                 # Filter out system messages for Anthropic
-                if self.provider == APIProvider.ANTHROPIC:
+                if self.provider == LLMProvider.ANTHROPIC:
                     filtered_messages = [
                         msg for msg in prepared_messages if msg["role"] != "system"
                     ]
@@ -234,7 +238,7 @@ class OmniLoop(BaseLoop):
                 # Log request
                 request_data = {"messages": filtered_messages, "max_tokens": self.max_tokens}
-                if self.provider == APIProvider.ANTHROPIC:
+                if self.provider == LLMProvider.ANTHROPIC:
                     request_data["system"] = self._get_system_prompt()
                 else:
                     request_data["system"] = system_prompt
@@ -251,7 +255,7 @@ class OmniLoop(BaseLoop):
                 if is_async:
                     # For async implementations (AnthropicClient)
-                    if self.provider == APIProvider.ANTHROPIC:
+                    if self.provider == LLMProvider.ANTHROPIC:
                         response = await run_method(
                             messages=filtered_messages,
                             system=self._get_system_prompt(),
@@ -265,7 +269,7 @@ class OmniLoop(BaseLoop):
                         )
                 else:
                     # For non-async implementations (GroqClient, etc.)
-                    if self.provider == APIProvider.ANTHROPIC:
+                    if self.provider == LLMProvider.ANTHROPIC:
                         response = run_method(
                             messages=filtered_messages,
                             system=self._get_system_prompt(),
@@ -335,7 +339,7 @@ class OmniLoop(BaseLoop):
         action_screenshot_saved = False
         try:
             # Handle Anthropic response format
-            if self.provider == APIProvider.ANTHROPIC:
+            if self.provider == LLMProvider.ANTHROPIC:
                 if hasattr(response, "content") and isinstance(response.content, list):
                     # Extract text from content blocks
                     for block in response.content:
@@ -559,7 +563,7 @@ class OmniLoop(BaseLoop):
         """Process and add screen info to messages."""
         try:
             # Only add message if we have an image and provider supports it
-            if self.provider in [APIProvider.OPENAI, APIProvider.ANTHROPIC]:
+            if self.provider in [LLMProvider.OPENAI, LLMProvider.ANTHROPIC]:
                 image = parsed_screen.annotated_image_base64 or None
                 if image:
                     # Save screen info to current turn directory
@@ -573,7 +577,7 @@ class OmniLoop(BaseLoop):
                             logger.info(f"Saved elements to {elements_path}")
                     # Format the image content based on the provider
-                    if self.provider == APIProvider.ANTHROPIC:
+                    if self.provider == LLMProvider.ANTHROPIC:
                         # Compress the image before sending to Anthropic (5MB limit)
                         image_size = len(image)
                         logger.info(f"Image base64 is present, length: {image_size}")

agent/providers/omni/messages.py CHANGED Viewed

@@ -103,6 +103,9 @@ class OmniMessageManager(BaseMessageManager):
         Returns:
             List of formatted messages
         """
+        # Set the provider for message formatting
+        self.set_provider(provider)
         if provider == "anthropic":
             return self._format_for_anthropic()
         elif provider == "openai":

agent/providers/omni/prompts.py CHANGED Viewed

@@ -62,17 +62,3 @@ IMPORTANT NOTES:
 9. Reflect whether the element is clickable or not, for example reflect if it is an hyperlink or a button or a normal text.
 10. If you are prompted with login information page or captcha page, or you think it need user's permission to do the next action, you should say "Action": "None" in the json field.
 """
-# SYSTEM_PROMPT1 = """You are an AI assistant helping users interact with their computer.
-# Analyze the screen information and respond with JSON containing:
-# {
-#     "Box ID": "Numeric ID of the relevant UI element",
-#     "Action": "One of: left_click, right_click, double_click, move_cursor, drag_to, type_text, press_key, hotkey, scroll_down, scroll_up, wait",
-#     "Value": "Text to type, key to press",
-#     "Explanation": "Why this action was chosen"
-# }
-# Notes:
-# - For starting applications, use the "hotkey" action with command+space for starting a Spotlight search.
-# - Each UI element is highlighted with a colored bounding box, and its Box ID appears nearby in the same color for easy identification.
-# """

agent/providers/omni/types.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """Type definitions for the Omni provider."""
 from enum import StrEnum
-from typing import Dict
+from typing import Dict, Optional
+from dataclasses import dataclass
-class APIProvider(StrEnum):
-    """Supported API providers."""
+class LLMProvider(StrEnum):
+    """Supported LLM providers."""
     ANTHROPIC = "anthropic"
     OPENAI = "openai"
@@ -13,18 +14,39 @@ class APIProvider(StrEnum):
     QWEN = "qwen"
+LLMProvider
+@dataclass
+class LLM:
+    """Configuration for LLM model and provider."""
+    provider: LLMProvider
+    name: Optional[str] = None
+    def __post_init__(self):
+        """Set default model name if not provided."""
+        if self.name is None:
+            self.name = PROVIDER_TO_DEFAULT_MODEL.get(self.provider)
+# For backward compatibility
+LLMModel = LLM
+Model = LLM
 # Default models for each provider
-PROVIDER_TO_DEFAULT_MODEL: Dict[APIProvider, str] = {
-    APIProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
-    APIProvider.OPENAI: "gpt-4o",
-    APIProvider.GROQ: "deepseek-r1-distill-llama-70b",
-    APIProvider.QWEN: "qwen2.5-vl-72b-instruct",
+PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
+    LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
+    LLMProvider.OPENAI: "gpt-4o",
+    LLMProvider.GROQ: "deepseek-r1-distill-llama-70b",
+    LLMProvider.QWEN: "qwen2.5-vl-72b-instruct",
 }
 # Environment variable names for each provider
-PROVIDER_TO_ENV_VAR: Dict[APIProvider, str] = {
-    APIProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
-    APIProvider.OPENAI: "OPENAI_API_KEY",
-    APIProvider.GROQ: "GROQ_API_KEY",
-    APIProvider.QWEN: "QWEN_API_KEY",
+PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
+    LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
+    LLMProvider.OPENAI: "OPENAI_API_KEY",
+    LLMProvider.GROQ: "GROQ_API_KEY",
+    LLMProvider.QWEN: "QWEN_API_KEY",
 }

agent/types/base.py CHANGED Viewed

@@ -44,9 +44,10 @@ class Annotation(BaseModel):
     vm_url: str
-class AgenticLoop(Enum):
+class AgentLoop(Enum):
     """Enumeration of available loop types."""
     ANTHROPIC = auto()  # Anthropic implementation
+    OPENAI = auto()  # OpenAI implementation
     OMNI = auto()  # OmniLoop implementation
     # Add more loop types as needed

{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.1.0
+Version: 0.1.2
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: <3.13,>=3.10

{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 agent/README.md,sha256=8EFnLrKejthEcL9bZflQSbvA-KwpiPanBz8TEEwRub8,2153
-agent/__init__.py,sha256=16Q828puFb7Ucq_-de49moVCzl1-iDO8Uo5dzFwX0Ag,347
-agent/core/README.md,sha256=RY4kKEjm_-_Ul2xgY7ntzsXdPe0Tg1wvtOSZ4xp4DN0,3559
+agent/__init__.py,sha256=BRIunVPG0T5CdAiNJyElKxUZN8Mngg2_TmtLwaupG4I,355
+agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
 agent/core/__init__.py,sha256=0htZ-VfsH9ixHB8j_SXu_uv6r3XXsq5TrghFNd-yRNE,709
-agent/core/agent.py,sha256=q2x0vFykIavX_FBi4Eq222QCSFmuuekAin4FPrtSGbY,11711
+agent/core/agent.py,sha256=AQ-S2wVD82RFnD_HmR-zjA7Jj09CUKGp7KreWX1j6Fg,13495
 agent/core/base_agent.py,sha256=MgaMKTwgqNJ1-TgS_mxALoC9COzc7Acg9y7Q8HAFX2c,6266
 agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
 agent/core/computer_agent.py,sha256=JGLMl_PwImUttmQh2amdLlXHS9CUyZ9MW20J1Xid7dM,2417
 agent/core/experiment.py,sha256=AST1t83eqaGzjoW6KvrhfVIs3ELAR_I70VHq2NsMmNk,7446
 agent/core/factory.py,sha256=WraOEHWPXBSN4R3DO7M2ctyadodeA8tzHM3dUjdQ_3A,3441
 agent/core/loop.py,sha256=E-0pz7MaguZQrHs5GP98Oc8C_Iz8ier0vXrD9Ny2HL8,8999
-agent/core/messages.py,sha256=Ou0lLEwa2EQCartcTszsvNjCP6sHUxmr2_C9PGzbASg,7163
+agent/core/messages.py,sha256=N8pV8Eh-AJpMuDPRI5OGWUIOU6DRr-pQjK9XU0go9Hk,7637
 agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
 agent/core/tools/base.py,sha256=CdzRFNuOjNfzgyTUN4ZoCGkUDR5HI0ECQVpvrUdEij8,2295
 agent/core/tools/bash.py,sha256=jnJKVlHn8np8e0gWd8EO0_qqjMkfQzutSugA_Iol4jE,1585
@@ -18,11 +18,11 @@ agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,
 agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
 agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
 agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
-agent/providers/anthropic/__init__.py,sha256=vEqLDkYXZoXg9A64bOtWfv9hoJlJCXbTpQGcmQ9eec8,149
-agent/providers/anthropic/api/client.py,sha256=_DeCn6bYgVG0LcQYDO6VCjTPrt6U-PO5vr4GWmhCPH8,7404
+agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
+agent/providers/anthropic/api/client.py,sha256=Y_g4Xg8Ko4tCqjipVm0GBMw-86vw0KQVXS5aWzJinzw,7038
 agent/providers/anthropic/api/logging.py,sha256=vHpwkIyOZdkSTVIH4ycbBPd4a_rzhP7Osu1I-Ayouwc,5154
 agent/providers/anthropic/callbacks/manager.py,sha256=dRKN7MuBze2dLal0iHDxCKYqMdh_KShSphuwn7zC-c4,1878
-agent/providers/anthropic/loop.py,sha256=GfUU_0erZgaM8oENSbrKEepsYsYTfuOiygcjHK0pefY,17904
+agent/providers/anthropic/loop.py,sha256=-g-OUpdVPSTO5kFJSZ5AmnjoWSEs2niHZFSR6B_KKvU,17904
 agent/providers/anthropic/messages/manager.py,sha256=atD41v6bjC1STxRB-jLBty9wHlMwacH9cwsL4tBz3uo,4891
 agent/providers/anthropic/prompts.py,sha256=nHFfgPrfvnWrEdVP7EUBGUHAI85D2X9HeZirk9EwncU,1941
 agent/providers/anthropic/tools/__init__.py,sha256=JyZwuVtPUnZwRSZBSCdQv9yxbLCsygm3l8Ywjjt9qTQ,661
@@ -33,8 +33,8 @@ agent/providers/anthropic/tools/computer.py,sha256=WnQS2rIIDz1juwoQMun2ODJjOV134
 agent/providers/anthropic/tools/edit.py,sha256=EGRP61MDA4Oue1D7Q-_vLpd6LdGbdBA1Z4HSZ66DbmI,13465
 agent/providers/anthropic/tools/manager.py,sha256=zW-biqO_MV3fb1nDEOl3EmCXD1leoglFj6LDRSM3djs,1982
 agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
-agent/providers/anthropic/types.py,sha256=kKc4XvSuKfumv4KLpJOwyY4t5deBsLgZTSAP4raZGvg,421
-agent/providers/omni/__init__.py,sha256=wKOVVWHkD-p4QUz0TIEENkMb7Iq2LRSh88KUGBW1XQA,744
+agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
+agent/providers/omni/__init__.py,sha256=eTUh4Pmh4zO-RLnP-wAFm8EkJBMImT-G2xnVIYWRti0,744
 agent/providers/omni/callbacks.py,sha256=ZG9NCgsHWt6y5jKsfcGLaoLxTpmKnIhCArDdeP4q9sA,2369
 agent/providers/omni/clients/anthropic.py,sha256=X_QRVxqwA_ExdUqgBEwo1aHOfZQxVIBDmDugNHF97OM,3554
 agent/providers/omni/clients/base.py,sha256=zAAgPi0jl3SWPC730R9l79E8bfYPSo39UtCSE-mrK6I,1076
@@ -43,23 +43,23 @@ agent/providers/omni/clients/openai.py,sha256=E4TAXMUFoYTunJETCWCNx5XAc6xutiN4rB
 agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
 agent/providers/omni/experiment.py,sha256=JGAdHi7Nf73I48c9k3TY1Xpr_i6D2VG1wurOzw5cNGk,9888
 agent/providers/omni/image_utils.py,sha256=qIFuNi5cIMVwrqYBXG1T6PxUlbxz7gIngFFP39bZIlU,2782
-agent/providers/omni/loop.py,sha256=Xr2QeedAVJ_jHn3KMopRuH3mrm2Qn4ncxKjqj9hWxAw,43577
-agent/providers/omni/messages.py,sha256=6LkQfzYDWq2FvIHpqhs5pc0l6AmFx_xKCjj1R5czMPo,6047
+agent/providers/omni/loop.py,sha256=mHCs13in3mrLizF1x8OeCXECp4bL9-CYS_XOJOUZqu8,43827
+agent/providers/omni/messages.py,sha256=zdjQCAMH-hOyrQQesHhTiIsQbw43KqVSmVIzS8JOIFA,6134
 agent/providers/omni/parser.py,sha256=Iv-cXWG2qzdYjyZJH5pGUzfv6nOaiHQ2OXdQSe00Ydw,9151
-agent/providers/omni/prompts.py,sha256=29qy8ppbLOjLil3aiqryjaiBf8CQx-xXHN44O-85Q00,4503
+agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
 agent/providers/omni/tool_manager.py,sha256=O6DxyEI-Vg6jt99phh011o4q4me_vNhH2YffIxkO4GM,2585
 agent/providers/omni/tools/__init__.py,sha256=l636hx9Q5z9eaFdPanPwPENUE-w-Xm8kAZhPUq0ZQF4,309
 agent/providers/omni/tools/bash.py,sha256=y_ibfP9iRcbiU_E0faAoa4DCP_BlkMlKOOURdBBIGZE,2030
 agent/providers/omni/tools/computer.py,sha256=xkMmAR0e_kbf0Zs2mggCDyWrQOJZyXOKPFjkutaQb94,9108
 agent/providers/omni/tools/manager.py,sha256=V_tav2yU92PyQnFlxNXG1wvNEaJoEYudtKx5sRjj06Q,2619
-agent/providers/omni/types.py,sha256=cEH6M5fcRN8ZIv_jfcYkTYboGBM4EzglLZo1_Xk7Ip8,800
+agent/providers/omni/types.py,sha256=G7Zqm-nWMa3K2klj-D3KUVWc2r8NJB7sYZCwwl0m9Ic,1233
 agent/providers/omni/utils.py,sha256=JqSye1bEp4wxhUgmaMyZi172fTlgXtygJ7XlnvKdUtE,6337
 agent/providers/omni/visualization.py,sha256=N3qVQLxYmia3iSVC5oCt5YRlMPuVfylCOyB99R33u8U,3924
 agent/types/__init__.py,sha256=61UFJT-w0CT4YRn0LiTx4A7fsMdVQjlXO9vnmbI1A7Y,604
-agent/types/base.py,sha256=rVb4mPWp1SOHfrzOCDqx0pfCV5bgIsdrIzgM_kX_xVs,1090
+agent/types/base.py,sha256=Iy_Q2DIBMLtwWdLyfvHw_6E2ltYu3bIv8GUNy3LYkGs,1133
 agent/types/messages.py,sha256=4-hwtxeAhto90_EZpHFducddtsHUsHauvXzYrpKG4RE,953
 agent/types/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
-cua_agent-0.1.0.dist-info/METADATA,sha256=Q4nPzYL_UQwx82vuaRLBUFmA_Sgd37TVoGA9FNYDRmU,1890
-cua_agent-0.1.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
-cua_agent-0.1.0.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
-cua_agent-0.1.0.dist-info/RECORD,,
+cua_agent-0.1.2.dist-info/METADATA,sha256=bXSToJpS_e5KRzyRELUzCuOkozsDUD29pBMj3DKzF7U,1890
+cua_agent-0.1.2.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
+cua_agent-0.1.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+cua_agent-0.1.2.dist-info/RECORD,,

{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{cua_agent-0.1.0.dist-info → cua_agent-0.1.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cua-agent 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

Potentially problematic release.

cua-agent 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl