cua-agent 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/__init__.py CHANGED
@@ -49,7 +49,7 @@ except Exception as e:
49
49
  logger.warning(f"Error initializing telemetry: {e}")
50
50
 
51
51
  from .providers.omni.types import LLMProvider, LLM
52
- from .core.loop import AgentLoop
53
- from .core.computer_agent import ComputerAgent
52
+ from .core.factory import AgentLoop
53
+ from .core.agent import ComputerAgent
54
54
 
55
55
  __all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
agent/core/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Core agent components."""
2
2
 
3
- from .loop import BaseLoop
3
+ from .factory import BaseLoop
4
4
  from .messages import (
5
5
  BaseMessageManager,
6
6
  ImageRetentionConfig,
@@ -3,32 +3,18 @@
3
3
  import asyncio
4
4
  import logging
5
5
  import os
6
- from typing import Any, AsyncGenerator, Dict, Optional, cast, List
6
+ from typing import AsyncGenerator, Optional
7
7
 
8
8
  from computer import Computer
9
- from ..providers.anthropic.loop import AnthropicLoop
10
- from ..providers.omni.loop import OmniLoop
11
- from ..providers.omni.parser import OmniParser
12
- from ..providers.omni.types import LLMProvider, LLM
9
+ from ..providers.omni.types import LLM
13
10
  from .. import AgentLoop
14
- from .messages import StandardMessageManager, ImageRetentionConfig
15
11
  from .types import AgentResponse
12
+ from .factory import LoopFactory
13
+ from .provider_config import DEFAULT_MODELS, ENV_VARS
16
14
 
17
15
  logging.basicConfig(level=logging.INFO)
18
16
  logger = logging.getLogger(__name__)
19
17
 
20
- # Default models for different providers
21
- DEFAULT_MODELS = {
22
- LLMProvider.OPENAI: "gpt-4o",
23
- LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
24
- }
25
-
26
- # Map providers to their environment variable names
27
- ENV_VARS = {
28
- LLMProvider.OPENAI: "OPENAI_API_KEY",
29
- LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
30
- }
31
-
32
18
 
33
19
  class ComputerAgent:
34
20
  """A computer agent that can perform automated tasks using natural language instructions."""
@@ -98,35 +84,27 @@ class ComputerAgent:
98
84
  f"No model specified for provider {self.provider} and no default found"
99
85
  )
100
86
 
101
- # Ensure computer is properly cast for typing purposes
102
- computer_instance = self.computer
103
-
104
87
  # Get API key from environment if not provided
105
88
  actual_api_key = api_key or os.environ.get(ENV_VARS[self.provider], "")
106
89
  if not actual_api_key:
107
90
  raise ValueError(f"No API key provided for {self.provider}")
108
91
 
109
- # Initialize the appropriate loop based on the loop parameter
110
- if loop == AgentLoop.ANTHROPIC:
111
- self._loop = AnthropicLoop(
112
- api_key=actual_api_key,
113
- model=actual_model_name,
114
- computer=computer_instance,
115
- save_trajectory=save_trajectory,
116
- base_dir=trajectory_dir,
117
- only_n_most_recent_images=only_n_most_recent_images,
118
- )
119
- else:
120
- self._loop = OmniLoop(
92
+ # Create the appropriate loop using the factory
93
+ try:
94
+ # Let the factory create the appropriate loop with needed components
95
+ self._loop = LoopFactory.create_loop(
96
+ loop_type=loop,
121
97
  provider=self.provider,
98
+ computer=self.computer,
99
+ model_name=actual_model_name,
122
100
  api_key=actual_api_key,
123
- model=actual_model_name,
124
- computer=computer_instance,
125
101
  save_trajectory=save_trajectory,
126
- base_dir=trajectory_dir,
102
+ trajectory_dir=trajectory_dir,
127
103
  only_n_most_recent_images=only_n_most_recent_images,
128
- parser=OmniParser(),
129
104
  )
105
+ except ValueError as e:
106
+ logger.error(f"Failed to create loop: {str(e)}")
107
+ raise
130
108
 
131
109
  # Initialize the message manager from the loop
132
110
  self.message_manager = self._loop.message_manager
@@ -152,21 +130,6 @@ class ComputerAgent:
152
130
  else:
153
131
  logger.info("Computer already initialized, skipping initialization")
154
132
 
155
- # Take a test screenshot to verify the computer is working
156
- logger.info("Testing computer with a screenshot...")
157
- try:
158
- test_screenshot = await self.computer.interface.screenshot()
159
- # Determine the screenshot size based on its type
160
- if isinstance(test_screenshot, (bytes, bytearray, memoryview)):
161
- size = len(test_screenshot)
162
- elif hasattr(test_screenshot, "base64_image"):
163
- size = len(test_screenshot.base64_image)
164
- else:
165
- size = "unknown"
166
- logger.info(f"Screenshot test successful, size: {size}")
167
- except Exception as e:
168
- logger.error(f"Screenshot test failed: {str(e)}")
169
- # Even though screenshot failed, we continue since some tests might not need it
170
133
  except Exception as e:
171
134
  logger.error(f"Error initializing computer in __aenter__: {str(e)}")
172
135
  raise
@@ -232,7 +195,6 @@ class ComputerAgent:
232
195
 
233
196
  # Execute the task and yield results
234
197
  async for result in self._loop.run(self.message_manager.messages):
235
- # Yield the result to the caller
236
198
  yield result
237
199
 
238
200
  except Exception as e:
@@ -1,35 +1,21 @@
1
- """Base agent loop implementation."""
1
+ """Base loop definitions."""
2
2
 
3
3
  import logging
4
4
  import asyncio
5
5
  from abc import ABC, abstractmethod
6
- from enum import Enum, auto
7
- from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
8
- from datetime import datetime
6
+ from typing import Any, AsyncGenerator, Dict, List, Optional
9
7
 
10
8
  from computer import Computer
11
- from .experiment import ExperimentManager
12
9
  from .messages import StandardMessageManager, ImageRetentionConfig
13
10
  from .types import AgentResponse
11
+ from .experiment import ExperimentManager
14
12
 
15
13
  logger = logging.getLogger(__name__)
16
14
 
17
15
 
18
- class AgentLoop(Enum):
19
- """Enumeration of available loop types."""
20
-
21
- ANTHROPIC = auto() # Anthropic implementation
22
- OMNI = auto() # OmniLoop implementation
23
- # Add more loop types as needed
24
-
25
-
26
16
  class BaseLoop(ABC):
27
17
  """Base class for agent loops that handle message processing and tool execution."""
28
18
 
29
- ###########################################
30
- # INITIALIZATION AND CONFIGURATION
31
- ###########################################
32
-
33
19
  def __init__(
34
20
  self,
35
21
  computer: Computer,
@@ -68,6 +54,11 @@ class BaseLoop(ABC):
68
54
  self.only_n_most_recent_images = only_n_most_recent_images
69
55
  self._kwargs = kwargs
70
56
 
57
+ # Initialize message manager
58
+ self.message_manager = StandardMessageManager(
59
+ config=ImageRetentionConfig(num_images_to_keep=only_n_most_recent_images)
60
+ )
61
+
71
62
  # Initialize experiment manager
72
63
  if self.save_trajectory and self.base_dir:
73
64
  self.experiment_manager = ExperimentManager(
@@ -110,8 +101,7 @@ class BaseLoop(ABC):
110
101
  )
111
102
  raise RuntimeError(f"Failed to initialize: {str(e)}")
112
103
 
113
- ###########################################
114
-
104
+ ###########################################
115
105
  # ABSTRACT METHODS TO BE IMPLEMENTED BY SUBCLASSES
116
106
  ###########################################
117
107
 
@@ -125,17 +115,14 @@ class BaseLoop(ABC):
125
115
  raise NotImplementedError
126
116
 
127
117
  @abstractmethod
128
- async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
118
+ def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
129
119
  """Run the agent loop with provided messages.
130
120
 
131
- This method handles the main agent loop including message processing,
132
- API calls, response handling, and action execution.
133
-
134
121
  Args:
135
122
  messages: List of message objects
136
123
 
137
- Yields:
138
- Agent response format
124
+ Returns:
125
+ An async generator that yields agent responses
139
126
  """
140
127
  raise NotImplementedError
141
128
 
agent/core/factory.py ADDED
@@ -0,0 +1,104 @@
1
+ """Base agent loop implementation."""
2
+
3
+ import logging
4
+ import importlib.util
5
+ from typing import Dict, Optional, Type, TYPE_CHECKING, Any, cast, Callable, Awaitable
6
+
7
+ from computer import Computer
8
+ from .types import AgentLoop
9
+ from .base import BaseLoop
10
+
11
+ # For type checking only
12
+ if TYPE_CHECKING:
13
+ from ..providers.omni.types import LLMProvider
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class LoopFactory:
19
+ """Factory class for creating agent loops."""
20
+
21
+ # Registry to store loop implementations
22
+ _loop_registry: Dict[AgentLoop, Type[BaseLoop]] = {}
23
+
24
+ @classmethod
25
+ def create_loop(
26
+ cls,
27
+ loop_type: AgentLoop,
28
+ api_key: str,
29
+ model_name: str,
30
+ computer: Computer,
31
+ provider: Any = None,
32
+ save_trajectory: bool = True,
33
+ trajectory_dir: str = "trajectories",
34
+ only_n_most_recent_images: Optional[int] = None,
35
+ acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
36
+ ) -> BaseLoop:
37
+ """Create and return an appropriate loop instance based on type."""
38
+ if loop_type == AgentLoop.ANTHROPIC:
39
+ # Lazy import AnthropicLoop only when needed
40
+ try:
41
+ from ..providers.anthropic.loop import AnthropicLoop
42
+ except ImportError:
43
+ raise ImportError(
44
+ "The 'anthropic' provider is not installed. "
45
+ "Install it with 'pip install cua-agent[anthropic]'"
46
+ )
47
+
48
+ return AnthropicLoop(
49
+ api_key=api_key,
50
+ model=model_name,
51
+ computer=computer,
52
+ save_trajectory=save_trajectory,
53
+ base_dir=trajectory_dir,
54
+ only_n_most_recent_images=only_n_most_recent_images,
55
+ )
56
+ elif loop_type == AgentLoop.OPENAI:
57
+ # Lazy import OpenAILoop only when needed
58
+ try:
59
+ from ..providers.openai.loop import OpenAILoop
60
+ except ImportError:
61
+ raise ImportError(
62
+ "The 'openai' provider is not installed. "
63
+ "Install it with 'pip install cua-agent[openai]'"
64
+ )
65
+
66
+ return OpenAILoop(
67
+ api_key=api_key,
68
+ model=model_name,
69
+ computer=computer,
70
+ save_trajectory=save_trajectory,
71
+ base_dir=trajectory_dir,
72
+ only_n_most_recent_images=only_n_most_recent_images,
73
+ acknowledge_safety_check_callback=acknowledge_safety_check_callback,
74
+ )
75
+ elif loop_type == AgentLoop.OMNI:
76
+ # Lazy import OmniLoop and related classes only when needed
77
+ try:
78
+ from ..providers.omni.loop import OmniLoop
79
+ from ..providers.omni.parser import OmniParser
80
+ from ..providers.omni.types import LLMProvider
81
+ except ImportError:
82
+ raise ImportError(
83
+ "The 'omni' provider is not installed. "
84
+ "Install it with 'pip install cua-agent[all]'"
85
+ )
86
+
87
+ if provider is None:
88
+ raise ValueError("Provider is required for OMNI loop type")
89
+
90
+ # We know provider is the correct type at this point, so cast it
91
+ provider_instance = cast(LLMProvider, provider)
92
+
93
+ return OmniLoop(
94
+ provider=provider_instance,
95
+ api_key=api_key,
96
+ model=model_name,
97
+ computer=computer,
98
+ save_trajectory=save_trajectory,
99
+ base_dir=trajectory_dir,
100
+ only_n_most_recent_images=only_n_most_recent_images,
101
+ parser=OmniParser(),
102
+ )
103
+ else:
104
+ raise ValueError(f"Unsupported loop type: {loop_type}")
@@ -0,0 +1,15 @@
1
+ """Provider-specific configurations and constants."""
2
+
3
+ from ..providers.omni.types import LLMProvider
4
+
5
+ # Default models for different providers
6
+ DEFAULT_MODELS = {
7
+ LLMProvider.OPENAI: "gpt-4o",
8
+ LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
9
+ }
10
+
11
+ # Map providers to their environment variable names
12
+ ENV_VARS = {
13
+ LLMProvider.OPENAI: "OPENAI_API_KEY",
14
+ LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
15
+ }
agent/core/types.py CHANGED
@@ -1,6 +1,16 @@
1
1
  """Core type definitions."""
2
2
 
3
3
  from typing import Any, Dict, List, Optional, TypedDict, Union
4
+ from enum import Enum, auto
5
+
6
+
7
+ class AgentLoop(Enum):
8
+ """Enumeration of available loop types."""
9
+
10
+ ANTHROPIC = auto() # Anthropic implementation
11
+ OMNI = auto() # OmniLoop implementation
12
+ OPENAI = auto() # OpenAI implementation
13
+ # Add more loop types as needed
4
14
 
5
15
 
6
16
  class AgentResponse(TypedDict, total=False):
@@ -16,7 +16,7 @@ from datetime import datetime
16
16
  from computer import Computer
17
17
 
18
18
  # Base imports
19
- from ...core.loop import BaseLoop
19
+ from ...core.base import BaseLoop
20
20
  from ...core.messages import StandardMessageManager, ImageRetentionConfig
21
21
  from ...core.types import AgentResponse
22
22
 
@@ -1,14 +1,11 @@
1
1
  """Response and tool handling for Anthropic provider."""
2
2
 
3
3
  import logging
4
- from typing import Any, Dict, List, Optional, Tuple, cast
4
+ from typing import Any, Dict, List, Tuple, cast
5
5
 
6
6
  from anthropic.types.beta import (
7
7
  BetaMessage,
8
- BetaMessageParam,
9
8
  BetaTextBlock,
10
- BetaTextBlockParam,
11
- BetaToolUseBlockParam,
12
9
  BetaContentBlockParam,
13
10
  )
14
11
 
@@ -1,14 +1,12 @@
1
1
  """Utility functions for Anthropic message handling."""
2
2
 
3
- import time
4
3
  import logging
5
4
  import re
6
5
  from typing import Any, Dict, List, Optional, Tuple, cast
7
- from anthropic.types.beta import BetaMessage, BetaMessageParam, BetaTextBlock
6
+ from anthropic.types.beta import BetaMessage
8
7
  from ..omni.parser import ParseResult
9
8
  from ...core.types import AgentResponse
10
9
  from datetime import datetime
11
- import json
12
10
 
13
11
  # Configure module logger
14
12
  logger = logging.getLogger(__name__)
@@ -10,7 +10,7 @@ from httpx import ConnectError, ReadTimeout
10
10
  from typing import cast
11
11
 
12
12
  from .parser import OmniParser, ParseResult
13
- from ...core.loop import BaseLoop
13
+ from ...core.base import BaseLoop
14
14
  from ...core.visualization import VisualizationHelper
15
15
  from ...core.messages import StandardMessageManager, ImageRetentionConfig
16
16
  from .utils import to_openai_agent_response_format
@@ -9,8 +9,10 @@ class LLMProvider(StrEnum):
9
9
  """Supported LLM providers."""
10
10
 
11
11
  ANTHROPIC = "anthropic"
12
+ OMNI = "omni"
12
13
  OPENAI = "openai"
13
14
 
15
+
14
16
  @dataclass
15
17
  class LLM:
16
18
  """Configuration for LLM model and provider."""
@@ -0,0 +1,6 @@
1
+ """OpenAI Agent Response API provider for computer control."""
2
+
3
+ from .types import LLMProvider
4
+ from .loop import OpenAILoop
5
+
6
+ __all__ = ["OpenAILoop", "LLMProvider"]