cua-agent 0.1.6__py3-none-any.whl → 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +3 -2
- agent/core/__init__.py +1 -6
- agent/core/{computer_agent.py → agent.py} +31 -76
- agent/core/{loop.py → base.py} +68 -127
- agent/core/factory.py +104 -0
- agent/core/messages.py +279 -125
- agent/core/provider_config.py +15 -0
- agent/core/types.py +45 -0
- agent/core/visualization.py +197 -0
- agent/providers/anthropic/api/client.py +142 -1
- agent/providers/anthropic/api_handler.py +140 -0
- agent/providers/anthropic/callbacks/__init__.py +5 -0
- agent/providers/anthropic/loop.py +207 -221
- agent/providers/anthropic/response_handler.py +226 -0
- agent/providers/anthropic/tools/bash.py +0 -97
- agent/providers/anthropic/utils.py +368 -0
- agent/providers/omni/__init__.py +1 -20
- agent/providers/omni/api_handler.py +42 -0
- agent/providers/omni/clients/anthropic.py +4 -0
- agent/providers/omni/image_utils.py +0 -72
- agent/providers/omni/loop.py +491 -607
- agent/providers/omni/parser.py +58 -4
- agent/providers/omni/tools/__init__.py +25 -7
- agent/providers/omni/tools/base.py +29 -0
- agent/providers/omni/tools/bash.py +43 -38
- agent/providers/omni/tools/computer.py +144 -182
- agent/providers/omni/tools/manager.py +25 -45
- agent/providers/omni/types.py +1 -3
- agent/providers/omni/utils.py +224 -145
- agent/providers/openai/__init__.py +6 -0
- agent/providers/openai/api_handler.py +453 -0
- agent/providers/openai/loop.py +440 -0
- agent/providers/openai/response_handler.py +205 -0
- agent/providers/openai/tools/__init__.py +15 -0
- agent/providers/openai/tools/base.py +79 -0
- agent/providers/openai/tools/computer.py +319 -0
- agent/providers/openai/tools/manager.py +106 -0
- agent/providers/openai/types.py +36 -0
- agent/providers/openai/utils.py +98 -0
- cua_agent-0.1.18.dist-info/METADATA +165 -0
- cua_agent-0.1.18.dist-info/RECORD +73 -0
- agent/README.md +0 -63
- agent/providers/anthropic/messages/manager.py +0 -112
- agent/providers/omni/callbacks.py +0 -78
- agent/providers/omni/clients/groq.py +0 -101
- agent/providers/omni/experiment.py +0 -276
- agent/providers/omni/messages.py +0 -171
- agent/providers/omni/tool_manager.py +0 -91
- agent/providers/omni/visualization.py +0 -130
- agent/types/__init__.py +0 -23
- agent/types/base.py +0 -41
- agent/types/messages.py +0 -36
- cua_agent-0.1.6.dist-info/METADATA +0 -120
- cua_agent-0.1.6.dist-info/RECORD +0 -64
- /agent/{types → core}/tools.py +0 -0
- {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/entry_points.txt +0 -0
agent/__init__.py
CHANGED
|
@@ -49,6 +49,7 @@ except Exception as e:
|
|
|
49
49
|
logger.warning(f"Error initializing telemetry: {e}")
|
|
50
50
|
|
|
51
51
|
from .providers.omni.types import LLMProvider, LLM
|
|
52
|
-
from .
|
|
52
|
+
from .core.factory import AgentLoop
|
|
53
|
+
from .core.agent import ComputerAgent
|
|
53
54
|
|
|
54
|
-
__all__ = ["AgentLoop", "LLMProvider", "LLM"]
|
|
55
|
+
__all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
|
agent/core/__init__.py
CHANGED
|
@@ -1,12 +1,7 @@
|
|
|
1
1
|
"""Core agent components."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
3
|
+
from .factory import BaseLoop
|
|
4
4
|
from .messages import (
|
|
5
|
-
create_user_message,
|
|
6
|
-
create_assistant_message,
|
|
7
|
-
create_system_message,
|
|
8
|
-
create_image_message,
|
|
9
|
-
create_screen_message,
|
|
10
5
|
BaseMessageManager,
|
|
11
6
|
ImageRetentionConfig,
|
|
12
7
|
)
|
|
@@ -3,31 +3,18 @@
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
-
from typing import
|
|
7
|
-
from dataclasses import dataclass
|
|
6
|
+
from typing import AsyncGenerator, Optional
|
|
8
7
|
|
|
9
8
|
from computer import Computer
|
|
10
|
-
from ..providers.
|
|
11
|
-
from ..providers.omni.loop import OmniLoop
|
|
12
|
-
from ..providers.omni.parser import OmniParser
|
|
13
|
-
from ..providers.omni.types import LLMProvider, LLM
|
|
9
|
+
from ..providers.omni.types import LLM
|
|
14
10
|
from .. import AgentLoop
|
|
11
|
+
from .types import AgentResponse
|
|
12
|
+
from .factory import LoopFactory
|
|
13
|
+
from .provider_config import DEFAULT_MODELS, ENV_VARS
|
|
15
14
|
|
|
16
15
|
logging.basicConfig(level=logging.INFO)
|
|
17
16
|
logger = logging.getLogger(__name__)
|
|
18
17
|
|
|
19
|
-
# Default models for different providers
|
|
20
|
-
DEFAULT_MODELS = {
|
|
21
|
-
LLMProvider.OPENAI: "gpt-4o",
|
|
22
|
-
LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
# Map providers to their environment variable names
|
|
26
|
-
ENV_VARS = {
|
|
27
|
-
LLMProvider.OPENAI: "OPENAI_API_KEY",
|
|
28
|
-
LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
|
|
29
|
-
}
|
|
30
|
-
|
|
31
18
|
|
|
32
19
|
class ComputerAgent:
|
|
33
20
|
"""A computer agent that can perform automated tasks using natural language instructions."""
|
|
@@ -44,7 +31,6 @@ class ComputerAgent:
|
|
|
44
31
|
save_trajectory: bool = True,
|
|
45
32
|
trajectory_dir: str = "trajectories",
|
|
46
33
|
only_n_most_recent_images: Optional[int] = None,
|
|
47
|
-
parser: Optional[OmniParser] = None,
|
|
48
34
|
verbosity: int = logging.INFO,
|
|
49
35
|
):
|
|
50
36
|
"""Initialize the ComputerAgent.
|
|
@@ -61,12 +47,11 @@ class ComputerAgent:
|
|
|
61
47
|
save_trajectory: Whether to save the trajectory.
|
|
62
48
|
trajectory_dir: Directory to save the trajectory.
|
|
63
49
|
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
|
|
64
|
-
parser: Parser instance for the OmniLoop. Only used if provider is not ANTHROPIC.
|
|
65
50
|
verbosity: Logging level.
|
|
66
51
|
"""
|
|
67
52
|
# Basic agent configuration
|
|
68
53
|
self.max_retries = max_retries
|
|
69
|
-
self.computer = computer
|
|
54
|
+
self.computer = computer
|
|
70
55
|
self.queue = asyncio.Queue()
|
|
71
56
|
self.screenshot_dir = screenshot_dir
|
|
72
57
|
self.log_dir = log_dir
|
|
@@ -99,39 +84,30 @@ class ComputerAgent:
|
|
|
99
84
|
f"No model specified for provider {self.provider} and no default found"
|
|
100
85
|
)
|
|
101
86
|
|
|
102
|
-
# Ensure computer is properly cast for typing purposes
|
|
103
|
-
computer_instance = cast(Computer, self.computer)
|
|
104
|
-
|
|
105
87
|
# Get API key from environment if not provided
|
|
106
88
|
actual_api_key = api_key or os.environ.get(ENV_VARS[self.provider], "")
|
|
107
89
|
if not actual_api_key:
|
|
108
90
|
raise ValueError(f"No API key provided for {self.provider}")
|
|
109
91
|
|
|
110
|
-
#
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
computer=computer_instance,
|
|
116
|
-
save_trajectory=save_trajectory,
|
|
117
|
-
base_dir=trajectory_dir,
|
|
118
|
-
only_n_most_recent_images=only_n_most_recent_images,
|
|
119
|
-
)
|
|
120
|
-
else:
|
|
121
|
-
# Default to OmniLoop for other loop types
|
|
122
|
-
# Initialize parser if not provided
|
|
123
|
-
actual_parser = parser or OmniParser()
|
|
124
|
-
|
|
125
|
-
self._loop = OmniLoop(
|
|
92
|
+
# Create the appropriate loop using the factory
|
|
93
|
+
try:
|
|
94
|
+
# Let the factory create the appropriate loop with needed components
|
|
95
|
+
self._loop = LoopFactory.create_loop(
|
|
96
|
+
loop_type=loop,
|
|
126
97
|
provider=self.provider,
|
|
98
|
+
computer=self.computer,
|
|
99
|
+
model_name=actual_model_name,
|
|
127
100
|
api_key=actual_api_key,
|
|
128
|
-
model=actual_model_name,
|
|
129
|
-
computer=computer_instance,
|
|
130
101
|
save_trajectory=save_trajectory,
|
|
131
|
-
|
|
102
|
+
trajectory_dir=trajectory_dir,
|
|
132
103
|
only_n_most_recent_images=only_n_most_recent_images,
|
|
133
|
-
parser=actual_parser,
|
|
134
104
|
)
|
|
105
|
+
except ValueError as e:
|
|
106
|
+
logger.error(f"Failed to create loop: {str(e)}")
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
# Initialize the message manager from the loop
|
|
110
|
+
self.message_manager = self._loop.message_manager
|
|
135
111
|
|
|
136
112
|
logger.info(
|
|
137
113
|
f"ComputerAgent initialized with provider: {self.provider}, model: {actual_model_name}"
|
|
@@ -154,21 +130,6 @@ class ComputerAgent:
|
|
|
154
130
|
else:
|
|
155
131
|
logger.info("Computer already initialized, skipping initialization")
|
|
156
132
|
|
|
157
|
-
# Take a test screenshot to verify the computer is working
|
|
158
|
-
logger.info("Testing computer with a screenshot...")
|
|
159
|
-
try:
|
|
160
|
-
test_screenshot = await self.computer.interface.screenshot()
|
|
161
|
-
# Determine the screenshot size based on its type
|
|
162
|
-
if isinstance(test_screenshot, (bytes, bytearray, memoryview)):
|
|
163
|
-
size = len(test_screenshot)
|
|
164
|
-
elif hasattr(test_screenshot, "base64_image"):
|
|
165
|
-
size = len(test_screenshot.base64_image)
|
|
166
|
-
else:
|
|
167
|
-
size = "unknown"
|
|
168
|
-
logger.info(f"Screenshot test successful, size: {size}")
|
|
169
|
-
except Exception as e:
|
|
170
|
-
logger.error(f"Screenshot test failed: {str(e)}")
|
|
171
|
-
# Even though screenshot failed, we continue since some tests might not need it
|
|
172
133
|
except Exception as e:
|
|
173
134
|
logger.error(f"Error initializing computer in __aenter__: {str(e)}")
|
|
174
135
|
raise
|
|
@@ -201,36 +162,30 @@ class ComputerAgent:
|
|
|
201
162
|
await self.computer.run()
|
|
202
163
|
self._initialized = True
|
|
203
164
|
|
|
204
|
-
async def
|
|
205
|
-
"""Initialize the computer interface if it hasn't been initialized yet."""
|
|
206
|
-
if not self.computer._initialized:
|
|
207
|
-
logger.info("Computer not initialized, initializing now...")
|
|
208
|
-
try:
|
|
209
|
-
# Call run directly
|
|
210
|
-
await self.computer.run()
|
|
211
|
-
logger.info("Computer interface initialized successfully")
|
|
212
|
-
except Exception as e:
|
|
213
|
-
logger.error(f"Error initializing computer interface: {str(e)}")
|
|
214
|
-
raise
|
|
215
|
-
|
|
216
|
-
async def run(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
|
|
165
|
+
async def run(self, task: str) -> AsyncGenerator[AgentResponse, None]:
|
|
217
166
|
"""Run a task using the computer agent.
|
|
218
167
|
|
|
219
168
|
Args:
|
|
220
169
|
task: Task description
|
|
221
170
|
|
|
222
171
|
Yields:
|
|
223
|
-
|
|
172
|
+
Agent response format
|
|
224
173
|
"""
|
|
225
174
|
try:
|
|
226
175
|
logger.info(f"Running task: {task}")
|
|
176
|
+
logger.info(
|
|
177
|
+
f"Message history before task has {len(self.message_manager.messages)} messages"
|
|
178
|
+
)
|
|
227
179
|
|
|
228
180
|
# Initialize the computer if needed
|
|
229
181
|
if not self._initialized:
|
|
230
182
|
await self.initialize()
|
|
231
183
|
|
|
232
|
-
#
|
|
233
|
-
|
|
184
|
+
# Add task as a user message using the message manager
|
|
185
|
+
self.message_manager.add_user_message([{"type": "text", "text": task}])
|
|
186
|
+
logger.info(
|
|
187
|
+
f"Added task message. Message history now has {len(self.message_manager.messages)} messages"
|
|
188
|
+
)
|
|
234
189
|
|
|
235
190
|
# Pass properly formatted messages to the loop
|
|
236
191
|
if self._loop is None:
|
|
@@ -239,7 +194,7 @@ class ComputerAgent:
|
|
|
239
194
|
return
|
|
240
195
|
|
|
241
196
|
# Execute the task and yield results
|
|
242
|
-
async for result in self._loop.run(messages):
|
|
197
|
+
async for result in self._loop.run(self.message_manager.messages):
|
|
243
198
|
yield result
|
|
244
199
|
|
|
245
200
|
except Exception as e:
|
agent/core/{loop.py → base.py}
RENAMED
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
"""Base
|
|
1
|
+
"""Base loop definitions."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import asyncio
|
|
5
|
-
import json
|
|
6
|
-
import os
|
|
7
5
|
from abc import ABC, abstractmethod
|
|
8
|
-
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
9
|
-
from datetime import datetime
|
|
10
|
-
import base64
|
|
6
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
11
7
|
|
|
12
8
|
from computer import Computer
|
|
9
|
+
from .messages import StandardMessageManager, ImageRetentionConfig
|
|
10
|
+
from .types import AgentResponse
|
|
13
11
|
from .experiment import ExperimentManager
|
|
14
12
|
|
|
15
13
|
logger = logging.getLogger(__name__)
|
|
@@ -55,8 +53,11 @@ class BaseLoop(ABC):
|
|
|
55
53
|
self.save_trajectory = save_trajectory
|
|
56
54
|
self.only_n_most_recent_images = only_n_most_recent_images
|
|
57
55
|
self._kwargs = kwargs
|
|
58
|
-
|
|
59
|
-
#
|
|
56
|
+
|
|
57
|
+
# Initialize message manager
|
|
58
|
+
self.message_manager = StandardMessageManager(
|
|
59
|
+
config=ImageRetentionConfig(num_images_to_keep=only_n_most_recent_images)
|
|
60
|
+
)
|
|
60
61
|
|
|
61
62
|
# Initialize experiment manager
|
|
62
63
|
if self.save_trajectory and self.base_dir:
|
|
@@ -75,6 +76,60 @@ class BaseLoop(ABC):
|
|
|
75
76
|
# Initialize basic tracking
|
|
76
77
|
self.turn_count = 0
|
|
77
78
|
|
|
79
|
+
async def initialize(self) -> None:
|
|
80
|
+
"""Initialize both the API client and computer interface with retries."""
|
|
81
|
+
for attempt in range(self.max_retries):
|
|
82
|
+
try:
|
|
83
|
+
logger.info(
|
|
84
|
+
f"Starting initialization (attempt {attempt + 1}/{self.max_retries})..."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Initialize API client
|
|
88
|
+
await self.initialize_client()
|
|
89
|
+
|
|
90
|
+
logger.info("Initialization complete.")
|
|
91
|
+
return
|
|
92
|
+
except Exception as e:
|
|
93
|
+
if attempt < self.max_retries - 1:
|
|
94
|
+
logger.warning(
|
|
95
|
+
f"Initialization failed (attempt {attempt + 1}/{self.max_retries}): {str(e)}. Retrying..."
|
|
96
|
+
)
|
|
97
|
+
await asyncio.sleep(self.retry_delay)
|
|
98
|
+
else:
|
|
99
|
+
logger.error(
|
|
100
|
+
f"Initialization failed after {self.max_retries} attempts: {str(e)}"
|
|
101
|
+
)
|
|
102
|
+
raise RuntimeError(f"Failed to initialize: {str(e)}")
|
|
103
|
+
|
|
104
|
+
###########################################
|
|
105
|
+
# ABSTRACT METHODS TO BE IMPLEMENTED BY SUBCLASSES
|
|
106
|
+
###########################################
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
async def initialize_client(self) -> None:
|
|
110
|
+
"""Initialize the API client and any provider-specific components.
|
|
111
|
+
|
|
112
|
+
This method must be implemented by subclasses to set up
|
|
113
|
+
provider-specific clients and tools.
|
|
114
|
+
"""
|
|
115
|
+
raise NotImplementedError
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
|
|
119
|
+
"""Run the agent loop with provided messages.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
messages: List of message objects
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
An async generator that yields agent responses
|
|
126
|
+
"""
|
|
127
|
+
raise NotImplementedError
|
|
128
|
+
|
|
129
|
+
###########################################
|
|
130
|
+
# EXPERIMENT AND TRAJECTORY MANAGEMENT
|
|
131
|
+
###########################################
|
|
132
|
+
|
|
78
133
|
def _setup_experiment_dirs(self) -> None:
|
|
79
134
|
"""Setup the experiment directory structure."""
|
|
80
135
|
if self.experiment_manager:
|
|
@@ -100,10 +155,13 @@ class BaseLoop(ABC):
|
|
|
100
155
|
) -> None:
|
|
101
156
|
"""Log API call details to file.
|
|
102
157
|
|
|
158
|
+
Preserves provider-specific formats for requests and responses to ensure
|
|
159
|
+
accurate logging for debugging and analysis purposes.
|
|
160
|
+
|
|
103
161
|
Args:
|
|
104
162
|
call_type: Type of API call (e.g., 'request', 'response', 'error')
|
|
105
|
-
request: The API request data
|
|
106
|
-
response: Optional API response data
|
|
163
|
+
request: The API request data in provider-specific format
|
|
164
|
+
response: Optional API response data in provider-specific format
|
|
107
165
|
error: Optional error information
|
|
108
166
|
"""
|
|
109
167
|
if self.experiment_manager:
|
|
@@ -129,120 +187,3 @@ class BaseLoop(ABC):
|
|
|
129
187
|
"""
|
|
130
188
|
if self.experiment_manager:
|
|
131
189
|
self.experiment_manager.save_screenshot(img_base64, action_type)
|
|
132
|
-
|
|
133
|
-
async def initialize(self) -> None:
|
|
134
|
-
"""Initialize both the API client and computer interface with retries."""
|
|
135
|
-
for attempt in range(self.max_retries):
|
|
136
|
-
try:
|
|
137
|
-
logger.info(
|
|
138
|
-
f"Starting initialization (attempt {attempt + 1}/{self.max_retries})..."
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# Initialize API client
|
|
142
|
-
await self.initialize_client()
|
|
143
|
-
|
|
144
|
-
logger.info("Initialization complete.")
|
|
145
|
-
return
|
|
146
|
-
except Exception as e:
|
|
147
|
-
if attempt < self.max_retries - 1:
|
|
148
|
-
logger.warning(
|
|
149
|
-
f"Initialization failed (attempt {attempt + 1}/{self.max_retries}): {str(e)}. Retrying..."
|
|
150
|
-
)
|
|
151
|
-
await asyncio.sleep(self.retry_delay)
|
|
152
|
-
else:
|
|
153
|
-
logger.error(
|
|
154
|
-
f"Initialization failed after {self.max_retries} attempts: {str(e)}"
|
|
155
|
-
)
|
|
156
|
-
raise RuntimeError(f"Failed to initialize: {str(e)}")
|
|
157
|
-
|
|
158
|
-
async def _get_parsed_screen_som(self) -> Dict[str, Any]:
|
|
159
|
-
"""Get parsed screen information.
|
|
160
|
-
|
|
161
|
-
Returns:
|
|
162
|
-
Dict containing screen information
|
|
163
|
-
"""
|
|
164
|
-
try:
|
|
165
|
-
# Take screenshot
|
|
166
|
-
screenshot = await self.computer.interface.screenshot()
|
|
167
|
-
|
|
168
|
-
# Initialize with default values
|
|
169
|
-
width, height = 1024, 768
|
|
170
|
-
base64_image = ""
|
|
171
|
-
|
|
172
|
-
# Handle different types of screenshot returns
|
|
173
|
-
if isinstance(screenshot, (bytes, bytearray, memoryview)):
|
|
174
|
-
# Raw bytes screenshot
|
|
175
|
-
base64_image = base64.b64encode(screenshot).decode("utf-8")
|
|
176
|
-
elif hasattr(screenshot, "base64_image"):
|
|
177
|
-
# Object-style screenshot with attributes
|
|
178
|
-
# Type checking can't infer these attributes, but they exist at runtime
|
|
179
|
-
# on certain screenshot return types
|
|
180
|
-
base64_image = getattr(screenshot, "base64_image")
|
|
181
|
-
width = (
|
|
182
|
-
getattr(screenshot, "width", width) if hasattr(screenshot, "width") else width
|
|
183
|
-
)
|
|
184
|
-
height = (
|
|
185
|
-
getattr(screenshot, "height", height)
|
|
186
|
-
if hasattr(screenshot, "height")
|
|
187
|
-
else height
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
# Create parsed screen data
|
|
191
|
-
parsed_screen = {
|
|
192
|
-
"width": width,
|
|
193
|
-
"height": height,
|
|
194
|
-
"parsed_content_list": [],
|
|
195
|
-
"timestamp": datetime.now().isoformat(),
|
|
196
|
-
"screenshot_base64": base64_image,
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
# Save screenshot if requested
|
|
200
|
-
if self.save_trajectory and self.experiment_manager:
|
|
201
|
-
try:
|
|
202
|
-
img_data = base64_image
|
|
203
|
-
if "," in img_data:
|
|
204
|
-
img_data = img_data.split(",")[1]
|
|
205
|
-
self._save_screenshot(img_data, action_type="state")
|
|
206
|
-
except Exception as e:
|
|
207
|
-
logger.error(f"Error saving screenshot: {str(e)}")
|
|
208
|
-
|
|
209
|
-
return parsed_screen
|
|
210
|
-
except Exception as e:
|
|
211
|
-
logger.error(f"Error taking screenshot: {str(e)}")
|
|
212
|
-
return {
|
|
213
|
-
"width": 1024,
|
|
214
|
-
"height": 768,
|
|
215
|
-
"parsed_content_list": [],
|
|
216
|
-
"timestamp": datetime.now().isoformat(),
|
|
217
|
-
"error": f"Error taking screenshot: {str(e)}",
|
|
218
|
-
"screenshot_base64": "",
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
@abstractmethod
|
|
222
|
-
async def initialize_client(self) -> None:
|
|
223
|
-
"""Initialize the API client and any provider-specific components."""
|
|
224
|
-
raise NotImplementedError
|
|
225
|
-
|
|
226
|
-
@abstractmethod
|
|
227
|
-
async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[Dict[str, Any], None]:
|
|
228
|
-
"""Run the agent loop with provided messages.
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
messages: List of message objects
|
|
232
|
-
|
|
233
|
-
Yields:
|
|
234
|
-
Dict containing response data
|
|
235
|
-
"""
|
|
236
|
-
raise NotImplementedError
|
|
237
|
-
|
|
238
|
-
@abstractmethod
|
|
239
|
-
async def _process_screen(
|
|
240
|
-
self, parsed_screen: Dict[str, Any], messages: List[Dict[str, Any]]
|
|
241
|
-
) -> None:
|
|
242
|
-
"""Process screen information and add to messages.
|
|
243
|
-
|
|
244
|
-
Args:
|
|
245
|
-
parsed_screen: Dictionary containing parsed screen info
|
|
246
|
-
messages: List of messages to update
|
|
247
|
-
"""
|
|
248
|
-
raise NotImplementedError
|
agent/core/factory.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Base agent loop implementation."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import importlib.util
|
|
5
|
+
from typing import Dict, Optional, Type, TYPE_CHECKING, Any, cast, Callable, Awaitable
|
|
6
|
+
|
|
7
|
+
from computer import Computer
|
|
8
|
+
from .types import AgentLoop
|
|
9
|
+
from .base import BaseLoop
|
|
10
|
+
|
|
11
|
+
# For type checking only
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from ..providers.omni.types import LLMProvider
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LoopFactory:
|
|
19
|
+
"""Factory class for creating agent loops."""
|
|
20
|
+
|
|
21
|
+
# Registry to store loop implementations
|
|
22
|
+
_loop_registry: Dict[AgentLoop, Type[BaseLoop]] = {}
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def create_loop(
|
|
26
|
+
cls,
|
|
27
|
+
loop_type: AgentLoop,
|
|
28
|
+
api_key: str,
|
|
29
|
+
model_name: str,
|
|
30
|
+
computer: Computer,
|
|
31
|
+
provider: Any = None,
|
|
32
|
+
save_trajectory: bool = True,
|
|
33
|
+
trajectory_dir: str = "trajectories",
|
|
34
|
+
only_n_most_recent_images: Optional[int] = None,
|
|
35
|
+
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
|
|
36
|
+
) -> BaseLoop:
|
|
37
|
+
"""Create and return an appropriate loop instance based on type."""
|
|
38
|
+
if loop_type == AgentLoop.ANTHROPIC:
|
|
39
|
+
# Lazy import AnthropicLoop only when needed
|
|
40
|
+
try:
|
|
41
|
+
from ..providers.anthropic.loop import AnthropicLoop
|
|
42
|
+
except ImportError:
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"The 'anthropic' provider is not installed. "
|
|
45
|
+
"Install it with 'pip install cua-agent[anthropic]'"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return AnthropicLoop(
|
|
49
|
+
api_key=api_key,
|
|
50
|
+
model=model_name,
|
|
51
|
+
computer=computer,
|
|
52
|
+
save_trajectory=save_trajectory,
|
|
53
|
+
base_dir=trajectory_dir,
|
|
54
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
55
|
+
)
|
|
56
|
+
elif loop_type == AgentLoop.OPENAI:
|
|
57
|
+
# Lazy import OpenAILoop only when needed
|
|
58
|
+
try:
|
|
59
|
+
from ..providers.openai.loop import OpenAILoop
|
|
60
|
+
except ImportError:
|
|
61
|
+
raise ImportError(
|
|
62
|
+
"The 'openai' provider is not installed. "
|
|
63
|
+
"Install it with 'pip install cua-agent[openai]'"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return OpenAILoop(
|
|
67
|
+
api_key=api_key,
|
|
68
|
+
model=model_name,
|
|
69
|
+
computer=computer,
|
|
70
|
+
save_trajectory=save_trajectory,
|
|
71
|
+
base_dir=trajectory_dir,
|
|
72
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
73
|
+
acknowledge_safety_check_callback=acknowledge_safety_check_callback,
|
|
74
|
+
)
|
|
75
|
+
elif loop_type == AgentLoop.OMNI:
|
|
76
|
+
# Lazy import OmniLoop and related classes only when needed
|
|
77
|
+
try:
|
|
78
|
+
from ..providers.omni.loop import OmniLoop
|
|
79
|
+
from ..providers.omni.parser import OmniParser
|
|
80
|
+
from ..providers.omni.types import LLMProvider
|
|
81
|
+
except ImportError:
|
|
82
|
+
raise ImportError(
|
|
83
|
+
"The 'omni' provider is not installed. "
|
|
84
|
+
"Install it with 'pip install cua-agent[all]'"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if provider is None:
|
|
88
|
+
raise ValueError("Provider is required for OMNI loop type")
|
|
89
|
+
|
|
90
|
+
# We know provider is the correct type at this point, so cast it
|
|
91
|
+
provider_instance = cast(LLMProvider, provider)
|
|
92
|
+
|
|
93
|
+
return OmniLoop(
|
|
94
|
+
provider=provider_instance,
|
|
95
|
+
api_key=api_key,
|
|
96
|
+
model=model_name,
|
|
97
|
+
computer=computer,
|
|
98
|
+
save_trajectory=save_trajectory,
|
|
99
|
+
base_dir=trajectory_dir,
|
|
100
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
101
|
+
parser=OmniParser(),
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
raise ValueError(f"Unsupported loop type: {loop_type}")
|