cua-agent 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

@@ -1,16 +1,13 @@
1
1
  agent/README.md,sha256=8EFnLrKejthEcL9bZflQSbvA-KwpiPanBz8TEEwRub8,2153
2
- agent/__init__.py,sha256=sxUp_I8cI4NVa2DY8g-tsJe5_XJJe5VdeS_VLgA98EM,1569
2
+ agent/__init__.py,sha256=Pil-INEbTU7iOlZXP3QN-kV_IHtW9uk7PuJCON319Ws,1437
3
3
  agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
4
- agent/core/__init__.py,sha256=0htZ-VfsH9ixHB8j_SXu_uv6r3XXsq5TrghFNd-yRNE,709
5
- agent/core/agent.py,sha256=A07a7mRtKqpX2AHCP1i8KesOqoOETfh23CyTTQth6vI,9327
6
- agent/core/base_agent.py,sha256=te9rk2tJZpEhDUEB1xSaFqe1zeOjmzMdHF5LaUDP2K0,6276
4
+ agent/core/__init__.py,sha256=bds3kSkCILroIxxqU4scCPPCr-pooIkF2S4oU3OgsGY,638
7
5
  agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
8
- agent/core/computer_agent.py,sha256=JGLMl_PwImUttmQh2amdLlXHS9CUyZ9MW20J1Xid7dM,2417
9
- agent/core/experiment.py,sha256=FKmSDyA2YFSrO3q-91ZT29Jm1lm24YCuK59wQ6z-6IM,7930
10
- agent/core/factory.py,sha256=WraOEHWPXBSN4R3DO7M2ctyadodeA8tzHM3dUjdQ_3A,3441
11
- agent/core/loop.py,sha256=vhdlSy_hIY3-a92uTGdF3oYE5Qcq0U2hyTJNmXunnfc,9009
6
+ agent/core/computer_agent.py,sha256=7JOwAEeB1CL8Sw_1WAE3_a9tswZyCaBdXWDCfD3r7M4,9976
7
+ agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
8
+ agent/core/loop.py,sha256=j4zI7h6mifQ5kMn9y0NcjAi1ZpvSBHoPL7Lk2e1OGsQ,9255
12
9
  agent/core/messages.py,sha256=N8pV8Eh-AJpMuDPRI5OGWUIOU6DRr-pQjK9XU0go9Hk,7637
13
- agent/core/telemetry.py,sha256=cCnITdDxOSHM0qKV7Fe5sV2gD6B_InRxMVFm-EgKF7M,4083
10
+ agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
14
11
  agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
15
12
  agent/core/tools/base.py,sha256=CdzRFNuOjNfzgyTUN4ZoCGkUDR5HI0ECQVpvrUdEij8,2295
16
13
  agent/core/tools/bash.py,sha256=jnJKVlHn8np8e0gWd8EO0_qqjMkfQzutSugA_Iol4jE,1585
@@ -23,16 +20,16 @@ agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Z
23
20
  agent/providers/anthropic/api/client.py,sha256=Y_g4Xg8Ko4tCqjipVm0GBMw-86vw0KQVXS5aWzJinzw,7038
24
21
  agent/providers/anthropic/api/logging.py,sha256=vHpwkIyOZdkSTVIH4ycbBPd4a_rzhP7Osu1I-Ayouwc,5154
25
22
  agent/providers/anthropic/callbacks/manager.py,sha256=dRKN7MuBze2dLal0iHDxCKYqMdh_KShSphuwn7zC-c4,1878
26
- agent/providers/anthropic/loop.py,sha256=-g-OUpdVPSTO5kFJSZ5AmnjoWSEs2niHZFSR6B_KKvU,17904
27
- agent/providers/anthropic/messages/manager.py,sha256=atD41v6bjC1STxRB-jLBty9wHlMwacH9cwsL4tBz3uo,4891
23
+ agent/providers/anthropic/loop.py,sha256=uPjgXoGRdJb5bsJchUh_0aUuyRBm-HSp7jaM2cKg61I,19466
24
+ agent/providers/anthropic/messages/manager.py,sha256=6FobzAHh5-7dxaxbUdG1--1UY4w-mh3MFytX6ONrK3c,4972
28
25
  agent/providers/anthropic/prompts.py,sha256=nHFfgPrfvnWrEdVP7EUBGUHAI85D2X9HeZirk9EwncU,1941
29
26
  agent/providers/anthropic/tools/__init__.py,sha256=JyZwuVtPUnZwRSZBSCdQv9yxbLCsygm3l8Ywjjt9qTQ,661
30
- agent/providers/anthropic/tools/base.py,sha256=B1oKv9syFv_JNuCybpllf1PxO8D7ZVtt6C-uoP-GYgw,2799
27
+ agent/providers/anthropic/tools/base.py,sha256=WnRDbqO25tQzLpS2RU2ZXTLF5wd5IqU7SiyRAglQat4,2752
31
28
  agent/providers/anthropic/tools/bash.py,sha256=CIh4pO0jEdSZApnjpmFhrQbTTiwxivuOgv1-QLN0Ydw,5740
32
- agent/providers/anthropic/tools/collection.py,sha256=8RzHLobL44_Jjt8ltXS6I8XJlEAQOfc75dmnDUaHE-8,922
33
- agent/providers/anthropic/tools/computer.py,sha256=WnQS2rIIDz1juwoQMun2ODJjOV134tiZRKOyFzLmshk,24900
29
+ agent/providers/anthropic/tools/collection.py,sha256=RBK_6hxfHExR-EOxadiLl0OznmFj07nyIUjFgaYZ6Eo,960
30
+ agent/providers/anthropic/tools/computer.py,sha256=vYni1jDOOgzSSBOJxHcEKxvKUYRp5_nQ-9dmpGdLwm4,25858
34
31
  agent/providers/anthropic/tools/edit.py,sha256=EGRP61MDA4Oue1D7Q-_vLpd6LdGbdBA1Z4HSZ66DbmI,13465
35
- agent/providers/anthropic/tools/manager.py,sha256=zW-biqO_MV3fb1nDEOl3EmCXD1leoglFj6LDRSM3djs,1982
32
+ agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2PipFEH-PUXpoY,2020
36
33
  agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
37
34
  agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
38
35
  agent/providers/omni/__init__.py,sha256=eTUh4Pmh4zO-RLnP-wAFm8EkJBMImT-G2xnVIYWRti0,744
@@ -42,26 +39,26 @@ agent/providers/omni/clients/base.py,sha256=zAAgPi0jl3SWPC730R9l79E8bfYPSo39UtCS
42
39
  agent/providers/omni/clients/groq.py,sha256=HEinpE0_Cp_-geMyjJ8qaTPl0regPtETPkem4U13qG4,3599
43
40
  agent/providers/omni/clients/openai.py,sha256=E4TAXMUFoYTunJETCWCNx5XAc6xutiN4rB6PlVpzC5s,5972
44
41
  agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
45
- agent/providers/omni/experiment.py,sha256=JGAdHi7Nf73I48c9k3TY1Xpr_i6D2VG1wurOzw5cNGk,9888
42
+ agent/providers/omni/experiment.py,sha256=ZZ45U5NEkpzMNeMO9hJfpRf3iPNFaSZVwChcfqOgbI0,10002
46
43
  agent/providers/omni/image_utils.py,sha256=qIFuNi5cIMVwrqYBXG1T6PxUlbxz7gIngFFP39bZIlU,2782
47
- agent/providers/omni/loop.py,sha256=72o7q92nO7i0EUrVhEPCEHprRKdBYsg5iLTLfLHXAsw,43847
44
+ agent/providers/omni/loop.py,sha256=10GxyZFG8wAYObaaInWSZDRVwWNnZk_qhqdGr3PIPe0,44022
48
45
  agent/providers/omni/messages.py,sha256=zdjQCAMH-hOyrQQesHhTiIsQbw43KqVSmVIzS8JOIFA,6134
49
- agent/providers/omni/parser.py,sha256=lTAoSMSf2zpwqR_8W0SXG3cYIFeUiZa5vXdpjqZwEHY,9161
46
+ agent/providers/omni/parser.py,sha256=4n1rzaD-mHi7sMfeqChgOyrJuciwzL95x32BGI6GATM,9194
50
47
  agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
51
48
  agent/providers/omni/tool_manager.py,sha256=O6DxyEI-Vg6jt99phh011o4q4me_vNhH2YffIxkO4GM,2585
52
- agent/providers/omni/tools/__init__.py,sha256=l636hx9Q5z9eaFdPanPwPENUE-w-Xm8kAZhPUq0ZQF4,309
49
+ agent/providers/omni/tools/__init__.py,sha256=RkxsPTow3jpOKuXJ1ZKb-KBi6lbxGWfjC9gaV6hSZIs,278
53
50
  agent/providers/omni/tools/bash.py,sha256=y_ibfP9iRcbiU_E0faAoa4DCP_BlkMlKOOURdBBIGZE,2030
54
- agent/providers/omni/tools/computer.py,sha256=xkMmAR0e_kbf0Zs2mggCDyWrQOJZyXOKPFjkutaQb94,9108
55
- agent/providers/omni/tools/manager.py,sha256=V_tav2yU92PyQnFlxNXG1wvNEaJoEYudtKx5sRjj06Q,2619
51
+ agent/providers/omni/tools/computer.py,sha256=s8WVA_xGROEfdmCYjEqr563ySp4DRMlsLVuu54nH0Ww,9129
52
+ agent/providers/omni/tools/manager.py,sha256=EyNABQeJc8bEcJ4hFsRodKyBNkZpThfOHk1Ku3Pzsg8,2519
56
53
  agent/providers/omni/types.py,sha256=rpr7-mH9VK1R-nJ6tVu1gKp427j-hw1DpHc197b44nU,1017
57
- agent/providers/omni/utils.py,sha256=JqSye1bEp4wxhUgmaMyZi172fTlgXtygJ7XlnvKdUtE,6337
54
+ agent/providers/omni/utils.py,sha256=X2IBki6yJQFaEz7PDjkx8CqQq2R1v7nldRcOT5j7YcA,6381
58
55
  agent/providers/omni/visualization.py,sha256=N3qVQLxYmia3iSVC5oCt5YRlMPuVfylCOyB99R33u8U,3924
59
56
  agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
60
- agent/types/__init__.py,sha256=61UFJT-w0CT4YRn0LiTx4A7fsMdVQjlXO9vnmbI1A7Y,604
61
- agent/types/base.py,sha256=Iy_Q2DIBMLtwWdLyfvHw_6E2ltYu3bIv8GUNy3LYkGs,1133
57
+ agent/types/__init__.py,sha256=Xi6nNSsfbsGxs3We8gbdEY0ew0Jf0A0Prs5393Tvveg,568
58
+ agent/types/base.py,sha256=_5LNleRTqoL55VHEEZntL8x-OQom2A3FiTf2ttdM_HQ,857
62
59
  agent/types/messages.py,sha256=4-hwtxeAhto90_EZpHFducddtsHUsHauvXzYrpKG4RE,953
63
60
  agent/types/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
64
- cua_agent-0.1.5.dist-info/METADATA,sha256=yyoO57p8w39kdnzJY1bT6YelaaZRxzokxQUNMFyJQZY,4528
65
- cua_agent-0.1.5.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
66
- cua_agent-0.1.5.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
67
- cua_agent-0.1.5.dist-info/RECORD,,
61
+ cua_agent-0.1.6.dist-info/METADATA,sha256=rD_j8q7aC5wkNQtpbgRLyo3-5z_zCDivJE8MwyPpz6I,4528
62
+ cua_agent-0.1.6.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
63
+ cua_agent-0.1.6.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
64
+ cua_agent-0.1.6.dist-info/RECORD,,
agent/core/agent.py DELETED
@@ -1,252 +0,0 @@
1
- """Unified computer agent implementation that supports multiple loops."""
2
-
3
- import os
4
- import logging
5
- import asyncio
6
- import time
7
- import uuid
8
- from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING, Union, cast
9
- from datetime import datetime
10
- from enum import Enum
11
-
12
- from computer import Computer
13
-
14
- from ..types.base import Provider, AgentLoop
15
- from .base_agent import BaseComputerAgent
16
- from ..core.telemetry import record_agent_initialization
17
-
18
- # Only import types for type checking to avoid circular imports
19
- if TYPE_CHECKING:
20
- from ..providers.anthropic.loop import AnthropicLoop
21
- from ..providers.omni.loop import OmniLoop
22
- from ..providers.omni.parser import OmniParser
23
-
24
- # Import the provider types
25
- from ..providers.omni.types import LLMProvider, LLM, Model, LLMModel
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
- # Default models for different providers
30
- DEFAULT_MODELS = {
31
- LLMProvider.OPENAI: "gpt-4o",
32
- LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
33
- }
34
-
35
- # Map providers to their environment variable names
36
- ENV_VARS = {
37
- LLMProvider.OPENAI: "OPENAI_API_KEY",
38
- LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
39
- }
40
-
41
-
42
- class ComputerAgent(BaseComputerAgent):
43
- """Unified implementation of the computer agent supporting multiple loop types.
44
-
45
- This class consolidates the previous AnthropicComputerAgent and OmniComputerAgent
46
- into a single implementation with configurable loop type.
47
- """
48
-
49
- def __init__(
50
- self,
51
- computer: Computer,
52
- loop: AgentLoop = AgentLoop.OMNI,
53
- model: Optional[Union[LLM, Dict[str, str], str]] = None,
54
- api_key: Optional[str] = None,
55
- save_trajectory: bool = True,
56
- trajectory_dir: Optional[str] = "trajectories",
57
- only_n_most_recent_images: Optional[int] = None,
58
- max_retries: int = 3,
59
- verbosity: int = logging.INFO,
60
- telemetry_enabled: bool = True,
61
- **kwargs,
62
- ):
63
- """Initialize a ComputerAgent instance.
64
-
65
- Args:
66
- computer: The Computer instance to control
67
- loop: The agent loop to use: ANTHROPIC or OMNI
68
- model: The model to use. Can be a string, dict or LLM object.
69
- Defaults to LLM for the loop type.
70
- api_key: The API key to use. If None, will use environment variables.
71
- save_trajectory: Whether to save the trajectory.
72
- trajectory_dir: The directory to save trajectories to.
73
- only_n_most_recent_images: Only keep this many most recent images.
74
- max_retries: Maximum number of retries for failed requests.
75
- verbosity: Logging level (standard Python logging levels).
76
- telemetry_enabled: Whether to enable telemetry tracking. Defaults to True.
77
- **kwargs: Additional keyword arguments to pass to the loop.
78
- """
79
- super().__init__(computer)
80
- self._configure_logging(verbosity)
81
- logger.info(f"Initializing ComputerAgent with {loop} loop")
82
-
83
- # Store telemetry preference
84
- self.telemetry_enabled = telemetry_enabled
85
-
86
- # Process the model configuration
87
- self.model = self._process_model_config(model, loop)
88
- self.loop_type = loop
89
- self.api_key = api_key
90
-
91
- # Store computer
92
- self.computer = computer
93
-
94
- # Save trajectory settings
95
- self.save_trajectory = save_trajectory
96
- self.trajectory_dir = trajectory_dir
97
- self.only_n_most_recent_images = only_n_most_recent_images
98
-
99
- # Store the max retries setting
100
- self.max_retries = max_retries
101
-
102
- # Initialize message history
103
- self.messages = []
104
-
105
- # Extra kwargs for the loop
106
- self.loop_kwargs = kwargs
107
-
108
- # Initialize the actual loop implementation
109
- self.loop = self._init_loop()
110
-
111
- # Record initialization in telemetry if enabled
112
- if telemetry_enabled:
113
- record_agent_initialization()
114
-
115
- def _process_model_config(
116
- self, model_input: Optional[Union[LLM, Dict[str, str], str]], loop: AgentLoop
117
- ) -> LLM:
118
- """Process and normalize model configuration.
119
-
120
- Args:
121
- model_input: Input model configuration (LLM, dict, string, or None)
122
- loop: The loop type being used
123
-
124
- Returns:
125
- Normalized LLM instance
126
- """
127
- # Handle case where model_input is None
128
- if model_input is None:
129
- # Use Anthropic for Anthropic loop, OpenAI for Omni loop
130
- default_provider = (
131
- LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
132
- )
133
- return LLM(provider=default_provider)
134
-
135
- # Handle case where model_input is already a LLM or one of its aliases
136
- if isinstance(model_input, (LLM, Model, LLMModel)):
137
- return model_input
138
-
139
- # Handle case where model_input is a dict
140
- if isinstance(model_input, dict):
141
- provider = model_input.get("provider", LLMProvider.OPENAI)
142
- if isinstance(provider, str):
143
- provider = LLMProvider(provider)
144
- return LLM(provider=provider, name=model_input.get("name"))
145
-
146
- # Handle case where model_input is a string (model name)
147
- if isinstance(model_input, str):
148
- default_provider = (
149
- LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
150
- )
151
- return LLM(provider=default_provider, name=model_input)
152
-
153
- raise ValueError(f"Unsupported model configuration: {model_input}")
154
-
155
- def _configure_logging(self, verbosity: int):
156
- """Configure logging based on verbosity level."""
157
- # Use the logging level directly without mapping
158
- logger.setLevel(verbosity)
159
- logging.getLogger("agent").setLevel(verbosity)
160
-
161
- # Log the verbosity level that was set
162
- if verbosity <= logging.DEBUG:
163
- logger.info("Agent logging set to DEBUG level (full debug information)")
164
- elif verbosity <= logging.INFO:
165
- logger.info("Agent logging set to INFO level (standard output)")
166
- elif verbosity <= logging.WARNING:
167
- logger.warning("Agent logging set to WARNING level (warnings and errors only)")
168
- elif verbosity <= logging.ERROR:
169
- logger.warning("Agent logging set to ERROR level (errors only)")
170
- elif verbosity <= logging.CRITICAL:
171
- logger.warning("Agent logging set to CRITICAL level (critical errors only)")
172
-
173
- def _init_loop(self) -> Any:
174
- """Initialize the loop based on the loop_type.
175
-
176
- Returns:
177
- Initialized loop instance
178
- """
179
- # Lazy import OmniLoop and OmniParser to avoid circular imports
180
- from ..providers.omni.loop import OmniLoop
181
- from ..providers.omni.parser import OmniParser
182
-
183
- if self.loop_type == AgentLoop.ANTHROPIC:
184
- from ..providers.anthropic.loop import AnthropicLoop
185
-
186
- # Ensure we always have a valid model name
187
- model_name = self.model.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
188
-
189
- return AnthropicLoop(
190
- api_key=self.api_key,
191
- model=model_name,
192
- computer=self.computer,
193
- save_trajectory=self.save_trajectory,
194
- base_dir=self.trajectory_dir,
195
- only_n_most_recent_images=self.only_n_most_recent_images,
196
- **self.loop_kwargs,
197
- )
198
-
199
- # Initialize parser for OmniLoop with appropriate device
200
- if "parser" not in self.loop_kwargs:
201
- self.loop_kwargs["parser"] = OmniParser()
202
-
203
- # Ensure we always have a valid model name
204
- model_name = self.model.name or DEFAULT_MODELS[self.model.provider]
205
-
206
- return OmniLoop(
207
- provider=self.model.provider,
208
- api_key=self.api_key,
209
- model=model_name,
210
- computer=self.computer,
211
- save_trajectory=self.save_trajectory,
212
- base_dir=self.trajectory_dir,
213
- only_n_most_recent_images=self.only_n_most_recent_images,
214
- **self.loop_kwargs,
215
- )
216
-
217
- async def _execute_task(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
218
- """Execute a task using the appropriate agent loop.
219
-
220
- Args:
221
- task: The task to execute
222
-
223
- Returns:
224
- AsyncGenerator yielding task outputs
225
- """
226
- logger.info(f"Executing task: {task}")
227
-
228
- try:
229
- # Create a message from the task
230
- task_message = {"role": "user", "content": task}
231
- messages_with_task = self.messages + [task_message]
232
-
233
- # Use the run method of the loop
234
- async for output in self.loop.run(messages_with_task):
235
- yield output
236
- except Exception as e:
237
- logger.error(f"Error executing task: {e}")
238
- raise
239
- finally:
240
- pass
241
-
242
- async def _execute_action(self, action_type: str, **action_params) -> Any:
243
- """Execute an action with telemetry tracking."""
244
- try:
245
- # Execute the action
246
- result = await super()._execute_action(action_type, **action_params)
247
- return result
248
- except Exception as e:
249
- logger.exception(f"Error executing action {action_type}: {e}")
250
- raise
251
- finally:
252
- pass
agent/core/base_agent.py DELETED
@@ -1,164 +0,0 @@
1
- """Base computer agent implementation."""
2
-
3
- import asyncio
4
- import logging
5
- import os
6
- from abc import ABC, abstractmethod
7
- from typing import Any, AsyncGenerator, Dict, Optional
8
-
9
- from computer import Computer
10
-
11
- from ..types.base import Provider
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- class BaseComputerAgent(ABC):
17
- """Base class for computer agents."""
18
-
19
- def __init__(
20
- self,
21
- max_retries: int = 3,
22
- computer: Optional[Computer] = None,
23
- screenshot_dir: Optional[str] = None,
24
- log_dir: Optional[str] = None,
25
- **kwargs,
26
- ):
27
- """Initialize the base computer agent.
28
-
29
- Args:
30
- max_retries: Maximum number of retry attempts
31
- computer: Optional Computer instance
32
- screenshot_dir: Directory to save screenshots
33
- log_dir: Directory to save logs (set to None to disable logging to files)
34
- **kwargs: Additional provider-specific arguments
35
- """
36
- self.max_retries = max_retries
37
- self.computer = computer or Computer()
38
- self.queue = asyncio.Queue()
39
- self.screenshot_dir = screenshot_dir
40
- self.log_dir = log_dir
41
- self._retry_count = 0
42
- self.provider = Provider.UNKNOWN
43
-
44
- # Setup logging
45
- if self.log_dir:
46
- os.makedirs(self.log_dir, exist_ok=True)
47
- logger.info(f"Created logs directory: {self.log_dir}")
48
-
49
- # Setup screenshots directory
50
- if self.screenshot_dir:
51
- os.makedirs(self.screenshot_dir, exist_ok=True)
52
- logger.info(f"Created screenshots directory: {self.screenshot_dir}")
53
-
54
- logger.info("BaseComputerAgent initialized")
55
-
56
- async def run(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
57
- """Run a task using the computer agent.
58
-
59
- Args:
60
- task: Task description
61
-
62
- Yields:
63
- Task execution updates
64
- """
65
- try:
66
- logger.info(f"Running task: {task}")
67
-
68
- # Initialize the computer if needed
69
- await self._init_if_needed()
70
-
71
- # Execute the task and yield results
72
- # The _execute_task method should be implemented to yield results
73
- async for result in self._execute_task(task):
74
- yield result
75
-
76
- except Exception as e:
77
- logger.error(f"Error in agent run method: {str(e)}")
78
- yield {
79
- "role": "assistant",
80
- "content": f"Error: {str(e)}",
81
- "metadata": {"title": "❌ Error"},
82
- }
83
-
84
- async def _init_if_needed(self):
85
- """Initialize the computer interface if it hasn't been initialized yet."""
86
- if not self.computer._initialized:
87
- logger.info("Computer not initialized, initializing now...")
88
- try:
89
- # Call run directly without setting the flag first
90
- await self.computer.run()
91
- logger.info("Computer interface initialized successfully")
92
- except Exception as e:
93
- logger.error(f"Error initializing computer interface: {str(e)}")
94
- raise
95
-
96
- async def __aenter__(self):
97
- """Initialize the agent when used as a context manager."""
98
- logger.info("Entering BaseComputerAgent context")
99
-
100
- # In case the computer wasn't initialized
101
- try:
102
- # Initialize the computer only if not already initialized
103
- logger.info("Checking if computer is already initialized...")
104
- if not self.computer._initialized:
105
- logger.info("Initializing computer in __aenter__...")
106
- # Use the computer's __aenter__ directly instead of calling run()
107
- # This avoids the circular dependency
108
- await self.computer.__aenter__()
109
- logger.info("Computer initialized in __aenter__")
110
- else:
111
- logger.info("Computer already initialized, skipping initialization")
112
-
113
- # Take a test screenshot to verify the computer is working
114
- logger.info("Testing computer with a screenshot...")
115
- try:
116
- test_screenshot = await self.computer.interface.screenshot()
117
- # Determine the screenshot size based on its type
118
- if isinstance(test_screenshot, bytes):
119
- size = len(test_screenshot)
120
- else:
121
- # Assume it's an object with base64_image attribute
122
- try:
123
- size = len(test_screenshot.base64_image)
124
- except AttributeError:
125
- size = "unknown"
126
- logger.info(f"Screenshot test successful, size: {size}")
127
- except Exception as e:
128
- logger.error(f"Screenshot test failed: {str(e)}")
129
- # Even though screenshot failed, we continue since some tests might not need it
130
- except Exception as e:
131
- logger.error(f"Error initializing computer in __aenter__: {str(e)}")
132
- raise
133
-
134
- return self
135
-
136
- async def __aexit__(self, exc_type, exc_val, exc_tb):
137
- """Cleanup computer resources if needed."""
138
- logger.info("Cleaning up agent resources")
139
-
140
- # Do any necessary cleanup
141
- # We're not shutting down the computer here as it might be shared
142
- # Just log that we're exiting
143
- if exc_type:
144
- logger.error(f"Exiting agent context with error: {exc_type.__name__}: {exc_val}")
145
- else:
146
- logger.info("Exiting agent context normally")
147
-
148
- # If we have a queue, make sure to signal it's done
149
- if hasattr(self, "queue") and self.queue:
150
- await self.queue.put(None) # Signal that we're done
151
-
152
- @abstractmethod
153
- async def _execute_task(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
154
- """Execute a task. Must be implemented by subclasses.
155
-
156
- This is an async method that returns an AsyncGenerator. Implementations
157
- should use 'yield' statements to produce results asynchronously.
158
- """
159
- yield {
160
- "role": "assistant",
161
- "content": "Base class method called",
162
- "metadata": {"title": "Error"},
163
- }
164
- raise NotImplementedError("Subclasses must implement _execute_task")
agent/core/factory.py DELETED
@@ -1,102 +0,0 @@
1
- """Factory for creating provider-specific agents."""
2
-
3
- from typing import Optional, Dict, Any, List
4
-
5
- from computer import Computer
6
- from ..types.base import Provider
7
- from .base_agent import BaseComputerAgent
8
-
9
- # Import provider-specific implementations
10
- _ANTHROPIC_AVAILABLE = False
11
- _OPENAI_AVAILABLE = False
12
- _OLLAMA_AVAILABLE = False
13
- _OMNI_AVAILABLE = False
14
-
15
- # Try importing providers
16
- try:
17
- import anthropic
18
- from ..providers.anthropic.agent import AnthropicComputerAgent
19
-
20
- _ANTHROPIC_AVAILABLE = True
21
- except ImportError:
22
- pass
23
-
24
- try:
25
- import openai
26
-
27
- _OPENAI_AVAILABLE = True
28
- except ImportError:
29
- pass
30
-
31
- try:
32
- from ..providers.omni.agent import OmniComputerAgent
33
-
34
- _OMNI_AVAILABLE = True
35
- except ImportError:
36
- pass
37
-
38
-
39
- class AgentFactory:
40
- """Factory for creating provider-specific agent implementations."""
41
-
42
- @staticmethod
43
- def create(
44
- provider: Provider, computer: Optional[Computer] = None, **kwargs: Any
45
- ) -> BaseComputerAgent:
46
- """Create an agent based on the specified provider.
47
-
48
- Args:
49
- provider: The AI provider to use
50
- computer: Optional Computer instance
51
- **kwargs: Additional provider-specific arguments
52
-
53
- Returns:
54
- A provider-specific agent implementation
55
-
56
- Raises:
57
- ImportError: If provider dependencies are not installed
58
- ValueError: If provider is not supported
59
- """
60
- # Create a Computer instance if none is provided
61
- if computer is None:
62
- computer = Computer()
63
-
64
- if provider == Provider.ANTHROPIC:
65
- if not _ANTHROPIC_AVAILABLE:
66
- raise ImportError(
67
- "Anthropic provider requires additional dependencies. "
68
- "Install them with: pip install cua-agent[anthropic]"
69
- )
70
- return AnthropicComputerAgent(max_retries=3, computer=computer, **kwargs)
71
- elif provider == Provider.OPENAI:
72
- if not _OPENAI_AVAILABLE:
73
- raise ImportError(
74
- "OpenAI provider requires additional dependencies. "
75
- "Install them with: pip install cua-agent[openai]"
76
- )
77
- raise NotImplementedError("OpenAI provider not yet implemented")
78
- elif provider == Provider.OLLAMA:
79
- if not _OLLAMA_AVAILABLE:
80
- raise ImportError(
81
- "Ollama provider requires additional dependencies. "
82
- "Install them with: pip install cua-agent[ollama]"
83
- )
84
- # Only import ollama when actually creating an Ollama agent
85
- try:
86
- import ollama
87
- from ..providers.ollama.agent import OllamaComputerAgent
88
-
89
- return OllamaComputerAgent(max_retries=3, computer=computer, **kwargs)
90
- except ImportError:
91
- raise ImportError(
92
- "Failed to import ollama package. " "Install it with: pip install ollama"
93
- )
94
- elif provider == Provider.OMNI:
95
- if not _OMNI_AVAILABLE:
96
- raise ImportError(
97
- "Omni provider requires additional dependencies. "
98
- "Install them with: pip install cua-agent[omni]"
99
- )
100
- return OmniComputerAgent(max_retries=3, computer=computer, **kwargs)
101
- else:
102
- raise ValueError(f"Unsupported provider: {provider}")