cua-agent 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +2 -2
- agent/core/__init__.py +1 -1
- agent/core/{computer_agent.py → agent.py} +15 -53
- agent/core/{loop.py → base.py} +12 -25
- agent/core/factory.py +104 -0
- agent/core/provider_config.py +15 -0
- agent/core/types.py +10 -0
- agent/providers/anthropic/loop.py +1 -1
- agent/providers/anthropic/response_handler.py +1 -4
- agent/providers/anthropic/utils.py +1 -3
- agent/providers/omni/loop.py +1 -1
- agent/providers/omni/types.py +2 -0
- agent/providers/openai/__init__.py +6 -0
- agent/providers/openai/api_handler.py +453 -0
- agent/providers/openai/loop.py +440 -0
- agent/providers/openai/response_handler.py +205 -0
- agent/providers/openai/tools/__init__.py +15 -0
- agent/providers/openai/tools/base.py +79 -0
- agent/providers/openai/tools/computer.py +319 -0
- agent/providers/openai/tools/manager.py +106 -0
- agent/providers/openai/types.py +36 -0
- agent/providers/openai/utils.py +98 -0
- cua_agent-0.1.19.dist-info/METADATA +165 -0
- {cua_agent-0.1.17.dist-info → cua_agent-0.1.19.dist-info}/RECORD +26 -16
- agent/README.md +0 -63
- agent/providers/anthropic/messages/manager.py +0 -112
- cua_agent-0.1.17.dist-info/METADATA +0 -90
- {cua_agent-0.1.17.dist-info → cua_agent-0.1.19.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.17.dist-info → cua_agent-0.1.19.dist-info}/entry_points.txt +0 -0
agent/__init__.py
CHANGED
|
@@ -49,7 +49,7 @@ except Exception as e:
|
|
|
49
49
|
logger.warning(f"Error initializing telemetry: {e}")
|
|
50
50
|
|
|
51
51
|
from .providers.omni.types import LLMProvider, LLM
|
|
52
|
-
from .core.
|
|
53
|
-
from .core.
|
|
52
|
+
from .core.factory import AgentLoop
|
|
53
|
+
from .core.agent import ComputerAgent
|
|
54
54
|
|
|
55
55
|
__all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
|
agent/core/__init__.py
CHANGED
|
@@ -3,32 +3,18 @@
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import AsyncGenerator, Optional
|
|
7
7
|
|
|
8
8
|
from computer import Computer
|
|
9
|
-
from ..providers.
|
|
10
|
-
from ..providers.omni.loop import OmniLoop
|
|
11
|
-
from ..providers.omni.parser import OmniParser
|
|
12
|
-
from ..providers.omni.types import LLMProvider, LLM
|
|
9
|
+
from ..providers.omni.types import LLM
|
|
13
10
|
from .. import AgentLoop
|
|
14
|
-
from .messages import StandardMessageManager, ImageRetentionConfig
|
|
15
11
|
from .types import AgentResponse
|
|
12
|
+
from .factory import LoopFactory
|
|
13
|
+
from .provider_config import DEFAULT_MODELS, ENV_VARS
|
|
16
14
|
|
|
17
15
|
logging.basicConfig(level=logging.INFO)
|
|
18
16
|
logger = logging.getLogger(__name__)
|
|
19
17
|
|
|
20
|
-
# Default models for different providers
|
|
21
|
-
DEFAULT_MODELS = {
|
|
22
|
-
LLMProvider.OPENAI: "gpt-4o",
|
|
23
|
-
LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
# Map providers to their environment variable names
|
|
27
|
-
ENV_VARS = {
|
|
28
|
-
LLMProvider.OPENAI: "OPENAI_API_KEY",
|
|
29
|
-
LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
|
|
30
|
-
}
|
|
31
|
-
|
|
32
18
|
|
|
33
19
|
class ComputerAgent:
|
|
34
20
|
"""A computer agent that can perform automated tasks using natural language instructions."""
|
|
@@ -98,35 +84,27 @@ class ComputerAgent:
|
|
|
98
84
|
f"No model specified for provider {self.provider} and no default found"
|
|
99
85
|
)
|
|
100
86
|
|
|
101
|
-
# Ensure computer is properly cast for typing purposes
|
|
102
|
-
computer_instance = self.computer
|
|
103
|
-
|
|
104
87
|
# Get API key from environment if not provided
|
|
105
88
|
actual_api_key = api_key or os.environ.get(ENV_VARS[self.provider], "")
|
|
106
89
|
if not actual_api_key:
|
|
107
90
|
raise ValueError(f"No API key provided for {self.provider}")
|
|
108
91
|
|
|
109
|
-
#
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
computer=computer_instance,
|
|
115
|
-
save_trajectory=save_trajectory,
|
|
116
|
-
base_dir=trajectory_dir,
|
|
117
|
-
only_n_most_recent_images=only_n_most_recent_images,
|
|
118
|
-
)
|
|
119
|
-
else:
|
|
120
|
-
self._loop = OmniLoop(
|
|
92
|
+
# Create the appropriate loop using the factory
|
|
93
|
+
try:
|
|
94
|
+
# Let the factory create the appropriate loop with needed components
|
|
95
|
+
self._loop = LoopFactory.create_loop(
|
|
96
|
+
loop_type=loop,
|
|
121
97
|
provider=self.provider,
|
|
98
|
+
computer=self.computer,
|
|
99
|
+
model_name=actual_model_name,
|
|
122
100
|
api_key=actual_api_key,
|
|
123
|
-
model=actual_model_name,
|
|
124
|
-
computer=computer_instance,
|
|
125
101
|
save_trajectory=save_trajectory,
|
|
126
|
-
|
|
102
|
+
trajectory_dir=trajectory_dir,
|
|
127
103
|
only_n_most_recent_images=only_n_most_recent_images,
|
|
128
|
-
parser=OmniParser(),
|
|
129
104
|
)
|
|
105
|
+
except ValueError as e:
|
|
106
|
+
logger.error(f"Failed to create loop: {str(e)}")
|
|
107
|
+
raise
|
|
130
108
|
|
|
131
109
|
# Initialize the message manager from the loop
|
|
132
110
|
self.message_manager = self._loop.message_manager
|
|
@@ -152,21 +130,6 @@ class ComputerAgent:
|
|
|
152
130
|
else:
|
|
153
131
|
logger.info("Computer already initialized, skipping initialization")
|
|
154
132
|
|
|
155
|
-
# Take a test screenshot to verify the computer is working
|
|
156
|
-
logger.info("Testing computer with a screenshot...")
|
|
157
|
-
try:
|
|
158
|
-
test_screenshot = await self.computer.interface.screenshot()
|
|
159
|
-
# Determine the screenshot size based on its type
|
|
160
|
-
if isinstance(test_screenshot, (bytes, bytearray, memoryview)):
|
|
161
|
-
size = len(test_screenshot)
|
|
162
|
-
elif hasattr(test_screenshot, "base64_image"):
|
|
163
|
-
size = len(test_screenshot.base64_image)
|
|
164
|
-
else:
|
|
165
|
-
size = "unknown"
|
|
166
|
-
logger.info(f"Screenshot test successful, size: {size}")
|
|
167
|
-
except Exception as e:
|
|
168
|
-
logger.error(f"Screenshot test failed: {str(e)}")
|
|
169
|
-
# Even though screenshot failed, we continue since some tests might not need it
|
|
170
133
|
except Exception as e:
|
|
171
134
|
logger.error(f"Error initializing computer in __aenter__: {str(e)}")
|
|
172
135
|
raise
|
|
@@ -232,7 +195,6 @@ class ComputerAgent:
|
|
|
232
195
|
|
|
233
196
|
# Execute the task and yield results
|
|
234
197
|
async for result in self._loop.run(self.message_manager.messages):
|
|
235
|
-
# Yield the result to the caller
|
|
236
198
|
yield result
|
|
237
199
|
|
|
238
200
|
except Exception as e:
|
agent/core/{loop.py → base.py}
RENAMED
|
@@ -1,35 +1,21 @@
|
|
|
1
|
-
"""Base
|
|
1
|
+
"""Base loop definitions."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import asyncio
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
|
-
from
|
|
7
|
-
from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
|
|
8
|
-
from datetime import datetime
|
|
6
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
9
7
|
|
|
10
8
|
from computer import Computer
|
|
11
|
-
from .experiment import ExperimentManager
|
|
12
9
|
from .messages import StandardMessageManager, ImageRetentionConfig
|
|
13
10
|
from .types import AgentResponse
|
|
11
|
+
from .experiment import ExperimentManager
|
|
14
12
|
|
|
15
13
|
logger = logging.getLogger(__name__)
|
|
16
14
|
|
|
17
15
|
|
|
18
|
-
class AgentLoop(Enum):
|
|
19
|
-
"""Enumeration of available loop types."""
|
|
20
|
-
|
|
21
|
-
ANTHROPIC = auto() # Anthropic implementation
|
|
22
|
-
OMNI = auto() # OmniLoop implementation
|
|
23
|
-
# Add more loop types as needed
|
|
24
|
-
|
|
25
|
-
|
|
26
16
|
class BaseLoop(ABC):
|
|
27
17
|
"""Base class for agent loops that handle message processing and tool execution."""
|
|
28
18
|
|
|
29
|
-
###########################################
|
|
30
|
-
# INITIALIZATION AND CONFIGURATION
|
|
31
|
-
###########################################
|
|
32
|
-
|
|
33
19
|
def __init__(
|
|
34
20
|
self,
|
|
35
21
|
computer: Computer,
|
|
@@ -68,6 +54,11 @@ class BaseLoop(ABC):
|
|
|
68
54
|
self.only_n_most_recent_images = only_n_most_recent_images
|
|
69
55
|
self._kwargs = kwargs
|
|
70
56
|
|
|
57
|
+
# Initialize message manager
|
|
58
|
+
self.message_manager = StandardMessageManager(
|
|
59
|
+
config=ImageRetentionConfig(num_images_to_keep=only_n_most_recent_images)
|
|
60
|
+
)
|
|
61
|
+
|
|
71
62
|
# Initialize experiment manager
|
|
72
63
|
if self.save_trajectory and self.base_dir:
|
|
73
64
|
self.experiment_manager = ExperimentManager(
|
|
@@ -110,8 +101,7 @@ class BaseLoop(ABC):
|
|
|
110
101
|
)
|
|
111
102
|
raise RuntimeError(f"Failed to initialize: {str(e)}")
|
|
112
103
|
|
|
113
|
-
|
|
114
|
-
|
|
104
|
+
###########################################
|
|
115
105
|
# ABSTRACT METHODS TO BE IMPLEMENTED BY SUBCLASSES
|
|
116
106
|
###########################################
|
|
117
107
|
|
|
@@ -125,17 +115,14 @@ class BaseLoop(ABC):
|
|
|
125
115
|
raise NotImplementedError
|
|
126
116
|
|
|
127
117
|
@abstractmethod
|
|
128
|
-
|
|
118
|
+
def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
|
|
129
119
|
"""Run the agent loop with provided messages.
|
|
130
120
|
|
|
131
|
-
This method handles the main agent loop including message processing,
|
|
132
|
-
API calls, response handling, and action execution.
|
|
133
|
-
|
|
134
121
|
Args:
|
|
135
122
|
messages: List of message objects
|
|
136
123
|
|
|
137
|
-
|
|
138
|
-
|
|
124
|
+
Returns:
|
|
125
|
+
An async generator that yields agent responses
|
|
139
126
|
"""
|
|
140
127
|
raise NotImplementedError
|
|
141
128
|
|
agent/core/factory.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Base agent loop implementation."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import importlib.util
|
|
5
|
+
from typing import Dict, Optional, Type, TYPE_CHECKING, Any, cast, Callable, Awaitable
|
|
6
|
+
|
|
7
|
+
from computer import Computer
|
|
8
|
+
from .types import AgentLoop
|
|
9
|
+
from .base import BaseLoop
|
|
10
|
+
|
|
11
|
+
# For type checking only
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from ..providers.omni.types import LLMProvider
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LoopFactory:
|
|
19
|
+
"""Factory class for creating agent loops."""
|
|
20
|
+
|
|
21
|
+
# Registry to store loop implementations
|
|
22
|
+
_loop_registry: Dict[AgentLoop, Type[BaseLoop]] = {}
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def create_loop(
|
|
26
|
+
cls,
|
|
27
|
+
loop_type: AgentLoop,
|
|
28
|
+
api_key: str,
|
|
29
|
+
model_name: str,
|
|
30
|
+
computer: Computer,
|
|
31
|
+
provider: Any = None,
|
|
32
|
+
save_trajectory: bool = True,
|
|
33
|
+
trajectory_dir: str = "trajectories",
|
|
34
|
+
only_n_most_recent_images: Optional[int] = None,
|
|
35
|
+
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
|
|
36
|
+
) -> BaseLoop:
|
|
37
|
+
"""Create and return an appropriate loop instance based on type."""
|
|
38
|
+
if loop_type == AgentLoop.ANTHROPIC:
|
|
39
|
+
# Lazy import AnthropicLoop only when needed
|
|
40
|
+
try:
|
|
41
|
+
from ..providers.anthropic.loop import AnthropicLoop
|
|
42
|
+
except ImportError:
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"The 'anthropic' provider is not installed. "
|
|
45
|
+
"Install it with 'pip install cua-agent[anthropic]'"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return AnthropicLoop(
|
|
49
|
+
api_key=api_key,
|
|
50
|
+
model=model_name,
|
|
51
|
+
computer=computer,
|
|
52
|
+
save_trajectory=save_trajectory,
|
|
53
|
+
base_dir=trajectory_dir,
|
|
54
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
55
|
+
)
|
|
56
|
+
elif loop_type == AgentLoop.OPENAI:
|
|
57
|
+
# Lazy import OpenAILoop only when needed
|
|
58
|
+
try:
|
|
59
|
+
from ..providers.openai.loop import OpenAILoop
|
|
60
|
+
except ImportError:
|
|
61
|
+
raise ImportError(
|
|
62
|
+
"The 'openai' provider is not installed. "
|
|
63
|
+
"Install it with 'pip install cua-agent[openai]'"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return OpenAILoop(
|
|
67
|
+
api_key=api_key,
|
|
68
|
+
model=model_name,
|
|
69
|
+
computer=computer,
|
|
70
|
+
save_trajectory=save_trajectory,
|
|
71
|
+
base_dir=trajectory_dir,
|
|
72
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
73
|
+
acknowledge_safety_check_callback=acknowledge_safety_check_callback,
|
|
74
|
+
)
|
|
75
|
+
elif loop_type == AgentLoop.OMNI:
|
|
76
|
+
# Lazy import OmniLoop and related classes only when needed
|
|
77
|
+
try:
|
|
78
|
+
from ..providers.omni.loop import OmniLoop
|
|
79
|
+
from ..providers.omni.parser import OmniParser
|
|
80
|
+
from ..providers.omni.types import LLMProvider
|
|
81
|
+
except ImportError:
|
|
82
|
+
raise ImportError(
|
|
83
|
+
"The 'omni' provider is not installed. "
|
|
84
|
+
"Install it with 'pip install cua-agent[all]'"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if provider is None:
|
|
88
|
+
raise ValueError("Provider is required for OMNI loop type")
|
|
89
|
+
|
|
90
|
+
# We know provider is the correct type at this point, so cast it
|
|
91
|
+
provider_instance = cast(LLMProvider, provider)
|
|
92
|
+
|
|
93
|
+
return OmniLoop(
|
|
94
|
+
provider=provider_instance,
|
|
95
|
+
api_key=api_key,
|
|
96
|
+
model=model_name,
|
|
97
|
+
computer=computer,
|
|
98
|
+
save_trajectory=save_trajectory,
|
|
99
|
+
base_dir=trajectory_dir,
|
|
100
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
101
|
+
parser=OmniParser(),
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
raise ValueError(f"Unsupported loop type: {loop_type}")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Provider-specific configurations and constants."""
|
|
2
|
+
|
|
3
|
+
from ..providers.omni.types import LLMProvider
|
|
4
|
+
|
|
5
|
+
# Default models for different providers
|
|
6
|
+
DEFAULT_MODELS = {
|
|
7
|
+
LLMProvider.OPENAI: "gpt-4o",
|
|
8
|
+
LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
# Map providers to their environment variable names
|
|
12
|
+
ENV_VARS = {
|
|
13
|
+
LLMProvider.OPENAI: "OPENAI_API_KEY",
|
|
14
|
+
LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
|
|
15
|
+
}
|
agent/core/types.py
CHANGED
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
"""Core type definitions."""
|
|
2
2
|
|
|
3
3
|
from typing import Any, Dict, List, Optional, TypedDict, Union
|
|
4
|
+
from enum import Enum, auto
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AgentLoop(Enum):
|
|
8
|
+
"""Enumeration of available loop types."""
|
|
9
|
+
|
|
10
|
+
ANTHROPIC = auto() # Anthropic implementation
|
|
11
|
+
OMNI = auto() # OmniLoop implementation
|
|
12
|
+
OPENAI = auto() # OpenAI implementation
|
|
13
|
+
# Add more loop types as needed
|
|
4
14
|
|
|
5
15
|
|
|
6
16
|
class AgentResponse(TypedDict, total=False):
|
|
@@ -16,7 +16,7 @@ from datetime import datetime
|
|
|
16
16
|
from computer import Computer
|
|
17
17
|
|
|
18
18
|
# Base imports
|
|
19
|
-
from ...core.
|
|
19
|
+
from ...core.base import BaseLoop
|
|
20
20
|
from ...core.messages import StandardMessageManager, ImageRetentionConfig
|
|
21
21
|
from ...core.types import AgentResponse
|
|
22
22
|
|
|
@@ -1,14 +1,11 @@
|
|
|
1
1
|
"""Response and tool handling for Anthropic provider."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Dict, List,
|
|
4
|
+
from typing import Any, Dict, List, Tuple, cast
|
|
5
5
|
|
|
6
6
|
from anthropic.types.beta import (
|
|
7
7
|
BetaMessage,
|
|
8
|
-
BetaMessageParam,
|
|
9
8
|
BetaTextBlock,
|
|
10
|
-
BetaTextBlockParam,
|
|
11
|
-
BetaToolUseBlockParam,
|
|
12
9
|
BetaContentBlockParam,
|
|
13
10
|
)
|
|
14
11
|
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
"""Utility functions for Anthropic message handling."""
|
|
2
2
|
|
|
3
|
-
import time
|
|
4
3
|
import logging
|
|
5
4
|
import re
|
|
6
5
|
from typing import Any, Dict, List, Optional, Tuple, cast
|
|
7
|
-
from anthropic.types.beta import BetaMessage
|
|
6
|
+
from anthropic.types.beta import BetaMessage
|
|
8
7
|
from ..omni.parser import ParseResult
|
|
9
8
|
from ...core.types import AgentResponse
|
|
10
9
|
from datetime import datetime
|
|
11
|
-
import json
|
|
12
10
|
|
|
13
11
|
# Configure module logger
|
|
14
12
|
logger = logging.getLogger(__name__)
|
agent/providers/omni/loop.py
CHANGED
|
@@ -10,7 +10,7 @@ from httpx import ConnectError, ReadTimeout
|
|
|
10
10
|
from typing import cast
|
|
11
11
|
|
|
12
12
|
from .parser import OmniParser, ParseResult
|
|
13
|
-
from ...core.
|
|
13
|
+
from ...core.base import BaseLoop
|
|
14
14
|
from ...core.visualization import VisualizationHelper
|
|
15
15
|
from ...core.messages import StandardMessageManager, ImageRetentionConfig
|
|
16
16
|
from .utils import to_openai_agent_response_format
|
agent/providers/omni/types.py
CHANGED