cua-agent 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +2 -4
- agent/core/__init__.py +3 -5
- agent/core/computer_agent.py +213 -31
- agent/core/experiment.py +20 -3
- agent/core/loop.py +12 -8
- agent/core/telemetry.py +44 -32
- agent/providers/anthropic/loop.py +44 -15
- agent/providers/anthropic/messages/manager.py +3 -1
- agent/providers/anthropic/tools/base.py +1 -1
- agent/providers/anthropic/tools/collection.py +2 -2
- agent/providers/anthropic/tools/computer.py +34 -24
- agent/providers/anthropic/tools/manager.py +2 -2
- agent/providers/omni/experiment.py +5 -2
- agent/providers/omni/loop.py +12 -6
- agent/providers/omni/parser.py +2 -1
- agent/providers/omni/tools/__init__.py +0 -1
- agent/providers/omni/tools/computer.py +3 -2
- agent/providers/omni/tools/manager.py +1 -3
- agent/providers/omni/utils.py +4 -2
- agent/types/__init__.py +1 -4
- agent/types/base.py +0 -12
- {cua_agent-0.1.5.dist-info → cua_agent-0.1.6.dist-info}/METADATA +1 -1
- {cua_agent-0.1.5.dist-info → cua_agent-0.1.6.dist-info}/RECORD +25 -28
- agent/core/agent.py +0 -252
- agent/core/base_agent.py +0 -164
- agent/core/factory.py +0 -102
- {cua_agent-0.1.5.dist-info → cua_agent-0.1.6.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.5.dist-info → cua_agent-0.1.6.dist-info}/entry_points.txt +0 -0
|
@@ -1,16 +1,13 @@
|
|
|
1
1
|
agent/README.md,sha256=8EFnLrKejthEcL9bZflQSbvA-KwpiPanBz8TEEwRub8,2153
|
|
2
|
-
agent/__init__.py,sha256=
|
|
2
|
+
agent/__init__.py,sha256=Pil-INEbTU7iOlZXP3QN-kV_IHtW9uk7PuJCON319Ws,1437
|
|
3
3
|
agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
|
|
4
|
-
agent/core/__init__.py,sha256=
|
|
5
|
-
agent/core/agent.py,sha256=A07a7mRtKqpX2AHCP1i8KesOqoOETfh23CyTTQth6vI,9327
|
|
6
|
-
agent/core/base_agent.py,sha256=te9rk2tJZpEhDUEB1xSaFqe1zeOjmzMdHF5LaUDP2K0,6276
|
|
4
|
+
agent/core/__init__.py,sha256=bds3kSkCILroIxxqU4scCPPCr-pooIkF2S4oU3OgsGY,638
|
|
7
5
|
agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
|
|
8
|
-
agent/core/computer_agent.py,sha256=
|
|
9
|
-
agent/core/experiment.py,sha256=
|
|
10
|
-
agent/core/
|
|
11
|
-
agent/core/loop.py,sha256=vhdlSy_hIY3-a92uTGdF3oYE5Qcq0U2hyTJNmXunnfc,9009
|
|
6
|
+
agent/core/computer_agent.py,sha256=7JOwAEeB1CL8Sw_1WAE3_a9tswZyCaBdXWDCfD3r7M4,9976
|
|
7
|
+
agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
|
|
8
|
+
agent/core/loop.py,sha256=j4zI7h6mifQ5kMn9y0NcjAi1ZpvSBHoPL7Lk2e1OGsQ,9255
|
|
12
9
|
agent/core/messages.py,sha256=N8pV8Eh-AJpMuDPRI5OGWUIOU6DRr-pQjK9XU0go9Hk,7637
|
|
13
|
-
agent/core/telemetry.py,sha256=
|
|
10
|
+
agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
|
|
14
11
|
agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
|
|
15
12
|
agent/core/tools/base.py,sha256=CdzRFNuOjNfzgyTUN4ZoCGkUDR5HI0ECQVpvrUdEij8,2295
|
|
16
13
|
agent/core/tools/bash.py,sha256=jnJKVlHn8np8e0gWd8EO0_qqjMkfQzutSugA_Iol4jE,1585
|
|
@@ -23,16 +20,16 @@ agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Z
|
|
|
23
20
|
agent/providers/anthropic/api/client.py,sha256=Y_g4Xg8Ko4tCqjipVm0GBMw-86vw0KQVXS5aWzJinzw,7038
|
|
24
21
|
agent/providers/anthropic/api/logging.py,sha256=vHpwkIyOZdkSTVIH4ycbBPd4a_rzhP7Osu1I-Ayouwc,5154
|
|
25
22
|
agent/providers/anthropic/callbacks/manager.py,sha256=dRKN7MuBze2dLal0iHDxCKYqMdh_KShSphuwn7zC-c4,1878
|
|
26
|
-
agent/providers/anthropic/loop.py,sha256
|
|
27
|
-
agent/providers/anthropic/messages/manager.py,sha256=
|
|
23
|
+
agent/providers/anthropic/loop.py,sha256=uPjgXoGRdJb5bsJchUh_0aUuyRBm-HSp7jaM2cKg61I,19466
|
|
24
|
+
agent/providers/anthropic/messages/manager.py,sha256=6FobzAHh5-7dxaxbUdG1--1UY4w-mh3MFytX6ONrK3c,4972
|
|
28
25
|
agent/providers/anthropic/prompts.py,sha256=nHFfgPrfvnWrEdVP7EUBGUHAI85D2X9HeZirk9EwncU,1941
|
|
29
26
|
agent/providers/anthropic/tools/__init__.py,sha256=JyZwuVtPUnZwRSZBSCdQv9yxbLCsygm3l8Ywjjt9qTQ,661
|
|
30
|
-
agent/providers/anthropic/tools/base.py,sha256=
|
|
27
|
+
agent/providers/anthropic/tools/base.py,sha256=WnRDbqO25tQzLpS2RU2ZXTLF5wd5IqU7SiyRAglQat4,2752
|
|
31
28
|
agent/providers/anthropic/tools/bash.py,sha256=CIh4pO0jEdSZApnjpmFhrQbTTiwxivuOgv1-QLN0Ydw,5740
|
|
32
|
-
agent/providers/anthropic/tools/collection.py,sha256=
|
|
33
|
-
agent/providers/anthropic/tools/computer.py,sha256=
|
|
29
|
+
agent/providers/anthropic/tools/collection.py,sha256=RBK_6hxfHExR-EOxadiLl0OznmFj07nyIUjFgaYZ6Eo,960
|
|
30
|
+
agent/providers/anthropic/tools/computer.py,sha256=vYni1jDOOgzSSBOJxHcEKxvKUYRp5_nQ-9dmpGdLwm4,25858
|
|
34
31
|
agent/providers/anthropic/tools/edit.py,sha256=EGRP61MDA4Oue1D7Q-_vLpd6LdGbdBA1Z4HSZ66DbmI,13465
|
|
35
|
-
agent/providers/anthropic/tools/manager.py,sha256=
|
|
32
|
+
agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2PipFEH-PUXpoY,2020
|
|
36
33
|
agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
|
|
37
34
|
agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
|
|
38
35
|
agent/providers/omni/__init__.py,sha256=eTUh4Pmh4zO-RLnP-wAFm8EkJBMImT-G2xnVIYWRti0,744
|
|
@@ -42,26 +39,26 @@ agent/providers/omni/clients/base.py,sha256=zAAgPi0jl3SWPC730R9l79E8bfYPSo39UtCS
|
|
|
42
39
|
agent/providers/omni/clients/groq.py,sha256=HEinpE0_Cp_-geMyjJ8qaTPl0regPtETPkem4U13qG4,3599
|
|
43
40
|
agent/providers/omni/clients/openai.py,sha256=E4TAXMUFoYTunJETCWCNx5XAc6xutiN4rB6PlVpzC5s,5972
|
|
44
41
|
agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
|
|
45
|
-
agent/providers/omni/experiment.py,sha256=
|
|
42
|
+
agent/providers/omni/experiment.py,sha256=ZZ45U5NEkpzMNeMO9hJfpRf3iPNFaSZVwChcfqOgbI0,10002
|
|
46
43
|
agent/providers/omni/image_utils.py,sha256=qIFuNi5cIMVwrqYBXG1T6PxUlbxz7gIngFFP39bZIlU,2782
|
|
47
|
-
agent/providers/omni/loop.py,sha256=
|
|
44
|
+
agent/providers/omni/loop.py,sha256=10GxyZFG8wAYObaaInWSZDRVwWNnZk_qhqdGr3PIPe0,44022
|
|
48
45
|
agent/providers/omni/messages.py,sha256=zdjQCAMH-hOyrQQesHhTiIsQbw43KqVSmVIzS8JOIFA,6134
|
|
49
|
-
agent/providers/omni/parser.py,sha256=
|
|
46
|
+
agent/providers/omni/parser.py,sha256=4n1rzaD-mHi7sMfeqChgOyrJuciwzL95x32BGI6GATM,9194
|
|
50
47
|
agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
|
|
51
48
|
agent/providers/omni/tool_manager.py,sha256=O6DxyEI-Vg6jt99phh011o4q4me_vNhH2YffIxkO4GM,2585
|
|
52
|
-
agent/providers/omni/tools/__init__.py,sha256=
|
|
49
|
+
agent/providers/omni/tools/__init__.py,sha256=RkxsPTow3jpOKuXJ1ZKb-KBi6lbxGWfjC9gaV6hSZIs,278
|
|
53
50
|
agent/providers/omni/tools/bash.py,sha256=y_ibfP9iRcbiU_E0faAoa4DCP_BlkMlKOOURdBBIGZE,2030
|
|
54
|
-
agent/providers/omni/tools/computer.py,sha256=
|
|
55
|
-
agent/providers/omni/tools/manager.py,sha256=
|
|
51
|
+
agent/providers/omni/tools/computer.py,sha256=s8WVA_xGROEfdmCYjEqr563ySp4DRMlsLVuu54nH0Ww,9129
|
|
52
|
+
agent/providers/omni/tools/manager.py,sha256=EyNABQeJc8bEcJ4hFsRodKyBNkZpThfOHk1Ku3Pzsg8,2519
|
|
56
53
|
agent/providers/omni/types.py,sha256=rpr7-mH9VK1R-nJ6tVu1gKp427j-hw1DpHc197b44nU,1017
|
|
57
|
-
agent/providers/omni/utils.py,sha256=
|
|
54
|
+
agent/providers/omni/utils.py,sha256=X2IBki6yJQFaEz7PDjkx8CqQq2R1v7nldRcOT5j7YcA,6381
|
|
58
55
|
agent/providers/omni/visualization.py,sha256=N3qVQLxYmia3iSVC5oCt5YRlMPuVfylCOyB99R33u8U,3924
|
|
59
56
|
agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
|
|
60
|
-
agent/types/__init__.py,sha256=
|
|
61
|
-
agent/types/base.py,sha256=
|
|
57
|
+
agent/types/__init__.py,sha256=Xi6nNSsfbsGxs3We8gbdEY0ew0Jf0A0Prs5393Tvveg,568
|
|
58
|
+
agent/types/base.py,sha256=_5LNleRTqoL55VHEEZntL8x-OQom2A3FiTf2ttdM_HQ,857
|
|
62
59
|
agent/types/messages.py,sha256=4-hwtxeAhto90_EZpHFducddtsHUsHauvXzYrpKG4RE,953
|
|
63
60
|
agent/types/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
|
|
64
|
-
cua_agent-0.1.
|
|
65
|
-
cua_agent-0.1.
|
|
66
|
-
cua_agent-0.1.
|
|
67
|
-
cua_agent-0.1.
|
|
61
|
+
cua_agent-0.1.6.dist-info/METADATA,sha256=rD_j8q7aC5wkNQtpbgRLyo3-5z_zCDivJE8MwyPpz6I,4528
|
|
62
|
+
cua_agent-0.1.6.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
63
|
+
cua_agent-0.1.6.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
64
|
+
cua_agent-0.1.6.dist-info/RECORD,,
|
agent/core/agent.py
DELETED
|
@@ -1,252 +0,0 @@
|
|
|
1
|
-
"""Unified computer agent implementation that supports multiple loops."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import logging
|
|
5
|
-
import asyncio
|
|
6
|
-
import time
|
|
7
|
-
import uuid
|
|
8
|
-
from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING, Union, cast
|
|
9
|
-
from datetime import datetime
|
|
10
|
-
from enum import Enum
|
|
11
|
-
|
|
12
|
-
from computer import Computer
|
|
13
|
-
|
|
14
|
-
from ..types.base import Provider, AgentLoop
|
|
15
|
-
from .base_agent import BaseComputerAgent
|
|
16
|
-
from ..core.telemetry import record_agent_initialization
|
|
17
|
-
|
|
18
|
-
# Only import types for type checking to avoid circular imports
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from ..providers.anthropic.loop import AnthropicLoop
|
|
21
|
-
from ..providers.omni.loop import OmniLoop
|
|
22
|
-
from ..providers.omni.parser import OmniParser
|
|
23
|
-
|
|
24
|
-
# Import the provider types
|
|
25
|
-
from ..providers.omni.types import LLMProvider, LLM, Model, LLMModel
|
|
26
|
-
|
|
27
|
-
logger = logging.getLogger(__name__)
|
|
28
|
-
|
|
29
|
-
# Default models for different providers
|
|
30
|
-
DEFAULT_MODELS = {
|
|
31
|
-
LLMProvider.OPENAI: "gpt-4o",
|
|
32
|
-
LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
# Map providers to their environment variable names
|
|
36
|
-
ENV_VARS = {
|
|
37
|
-
LLMProvider.OPENAI: "OPENAI_API_KEY",
|
|
38
|
-
LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class ComputerAgent(BaseComputerAgent):
|
|
43
|
-
"""Unified implementation of the computer agent supporting multiple loop types.
|
|
44
|
-
|
|
45
|
-
This class consolidates the previous AnthropicComputerAgent and OmniComputerAgent
|
|
46
|
-
into a single implementation with configurable loop type.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
def __init__(
|
|
50
|
-
self,
|
|
51
|
-
computer: Computer,
|
|
52
|
-
loop: AgentLoop = AgentLoop.OMNI,
|
|
53
|
-
model: Optional[Union[LLM, Dict[str, str], str]] = None,
|
|
54
|
-
api_key: Optional[str] = None,
|
|
55
|
-
save_trajectory: bool = True,
|
|
56
|
-
trajectory_dir: Optional[str] = "trajectories",
|
|
57
|
-
only_n_most_recent_images: Optional[int] = None,
|
|
58
|
-
max_retries: int = 3,
|
|
59
|
-
verbosity: int = logging.INFO,
|
|
60
|
-
telemetry_enabled: bool = True,
|
|
61
|
-
**kwargs,
|
|
62
|
-
):
|
|
63
|
-
"""Initialize a ComputerAgent instance.
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
computer: The Computer instance to control
|
|
67
|
-
loop: The agent loop to use: ANTHROPIC or OMNI
|
|
68
|
-
model: The model to use. Can be a string, dict or LLM object.
|
|
69
|
-
Defaults to LLM for the loop type.
|
|
70
|
-
api_key: The API key to use. If None, will use environment variables.
|
|
71
|
-
save_trajectory: Whether to save the trajectory.
|
|
72
|
-
trajectory_dir: The directory to save trajectories to.
|
|
73
|
-
only_n_most_recent_images: Only keep this many most recent images.
|
|
74
|
-
max_retries: Maximum number of retries for failed requests.
|
|
75
|
-
verbosity: Logging level (standard Python logging levels).
|
|
76
|
-
telemetry_enabled: Whether to enable telemetry tracking. Defaults to True.
|
|
77
|
-
**kwargs: Additional keyword arguments to pass to the loop.
|
|
78
|
-
"""
|
|
79
|
-
super().__init__(computer)
|
|
80
|
-
self._configure_logging(verbosity)
|
|
81
|
-
logger.info(f"Initializing ComputerAgent with {loop} loop")
|
|
82
|
-
|
|
83
|
-
# Store telemetry preference
|
|
84
|
-
self.telemetry_enabled = telemetry_enabled
|
|
85
|
-
|
|
86
|
-
# Process the model configuration
|
|
87
|
-
self.model = self._process_model_config(model, loop)
|
|
88
|
-
self.loop_type = loop
|
|
89
|
-
self.api_key = api_key
|
|
90
|
-
|
|
91
|
-
# Store computer
|
|
92
|
-
self.computer = computer
|
|
93
|
-
|
|
94
|
-
# Save trajectory settings
|
|
95
|
-
self.save_trajectory = save_trajectory
|
|
96
|
-
self.trajectory_dir = trajectory_dir
|
|
97
|
-
self.only_n_most_recent_images = only_n_most_recent_images
|
|
98
|
-
|
|
99
|
-
# Store the max retries setting
|
|
100
|
-
self.max_retries = max_retries
|
|
101
|
-
|
|
102
|
-
# Initialize message history
|
|
103
|
-
self.messages = []
|
|
104
|
-
|
|
105
|
-
# Extra kwargs for the loop
|
|
106
|
-
self.loop_kwargs = kwargs
|
|
107
|
-
|
|
108
|
-
# Initialize the actual loop implementation
|
|
109
|
-
self.loop = self._init_loop()
|
|
110
|
-
|
|
111
|
-
# Record initialization in telemetry if enabled
|
|
112
|
-
if telemetry_enabled:
|
|
113
|
-
record_agent_initialization()
|
|
114
|
-
|
|
115
|
-
def _process_model_config(
|
|
116
|
-
self, model_input: Optional[Union[LLM, Dict[str, str], str]], loop: AgentLoop
|
|
117
|
-
) -> LLM:
|
|
118
|
-
"""Process and normalize model configuration.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
model_input: Input model configuration (LLM, dict, string, or None)
|
|
122
|
-
loop: The loop type being used
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
Normalized LLM instance
|
|
126
|
-
"""
|
|
127
|
-
# Handle case where model_input is None
|
|
128
|
-
if model_input is None:
|
|
129
|
-
# Use Anthropic for Anthropic loop, OpenAI for Omni loop
|
|
130
|
-
default_provider = (
|
|
131
|
-
LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
|
|
132
|
-
)
|
|
133
|
-
return LLM(provider=default_provider)
|
|
134
|
-
|
|
135
|
-
# Handle case where model_input is already a LLM or one of its aliases
|
|
136
|
-
if isinstance(model_input, (LLM, Model, LLMModel)):
|
|
137
|
-
return model_input
|
|
138
|
-
|
|
139
|
-
# Handle case where model_input is a dict
|
|
140
|
-
if isinstance(model_input, dict):
|
|
141
|
-
provider = model_input.get("provider", LLMProvider.OPENAI)
|
|
142
|
-
if isinstance(provider, str):
|
|
143
|
-
provider = LLMProvider(provider)
|
|
144
|
-
return LLM(provider=provider, name=model_input.get("name"))
|
|
145
|
-
|
|
146
|
-
# Handle case where model_input is a string (model name)
|
|
147
|
-
if isinstance(model_input, str):
|
|
148
|
-
default_provider = (
|
|
149
|
-
LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
|
|
150
|
-
)
|
|
151
|
-
return LLM(provider=default_provider, name=model_input)
|
|
152
|
-
|
|
153
|
-
raise ValueError(f"Unsupported model configuration: {model_input}")
|
|
154
|
-
|
|
155
|
-
def _configure_logging(self, verbosity: int):
|
|
156
|
-
"""Configure logging based on verbosity level."""
|
|
157
|
-
# Use the logging level directly without mapping
|
|
158
|
-
logger.setLevel(verbosity)
|
|
159
|
-
logging.getLogger("agent").setLevel(verbosity)
|
|
160
|
-
|
|
161
|
-
# Log the verbosity level that was set
|
|
162
|
-
if verbosity <= logging.DEBUG:
|
|
163
|
-
logger.info("Agent logging set to DEBUG level (full debug information)")
|
|
164
|
-
elif verbosity <= logging.INFO:
|
|
165
|
-
logger.info("Agent logging set to INFO level (standard output)")
|
|
166
|
-
elif verbosity <= logging.WARNING:
|
|
167
|
-
logger.warning("Agent logging set to WARNING level (warnings and errors only)")
|
|
168
|
-
elif verbosity <= logging.ERROR:
|
|
169
|
-
logger.warning("Agent logging set to ERROR level (errors only)")
|
|
170
|
-
elif verbosity <= logging.CRITICAL:
|
|
171
|
-
logger.warning("Agent logging set to CRITICAL level (critical errors only)")
|
|
172
|
-
|
|
173
|
-
def _init_loop(self) -> Any:
|
|
174
|
-
"""Initialize the loop based on the loop_type.
|
|
175
|
-
|
|
176
|
-
Returns:
|
|
177
|
-
Initialized loop instance
|
|
178
|
-
"""
|
|
179
|
-
# Lazy import OmniLoop and OmniParser to avoid circular imports
|
|
180
|
-
from ..providers.omni.loop import OmniLoop
|
|
181
|
-
from ..providers.omni.parser import OmniParser
|
|
182
|
-
|
|
183
|
-
if self.loop_type == AgentLoop.ANTHROPIC:
|
|
184
|
-
from ..providers.anthropic.loop import AnthropicLoop
|
|
185
|
-
|
|
186
|
-
# Ensure we always have a valid model name
|
|
187
|
-
model_name = self.model.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
|
|
188
|
-
|
|
189
|
-
return AnthropicLoop(
|
|
190
|
-
api_key=self.api_key,
|
|
191
|
-
model=model_name,
|
|
192
|
-
computer=self.computer,
|
|
193
|
-
save_trajectory=self.save_trajectory,
|
|
194
|
-
base_dir=self.trajectory_dir,
|
|
195
|
-
only_n_most_recent_images=self.only_n_most_recent_images,
|
|
196
|
-
**self.loop_kwargs,
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
# Initialize parser for OmniLoop with appropriate device
|
|
200
|
-
if "parser" not in self.loop_kwargs:
|
|
201
|
-
self.loop_kwargs["parser"] = OmniParser()
|
|
202
|
-
|
|
203
|
-
# Ensure we always have a valid model name
|
|
204
|
-
model_name = self.model.name or DEFAULT_MODELS[self.model.provider]
|
|
205
|
-
|
|
206
|
-
return OmniLoop(
|
|
207
|
-
provider=self.model.provider,
|
|
208
|
-
api_key=self.api_key,
|
|
209
|
-
model=model_name,
|
|
210
|
-
computer=self.computer,
|
|
211
|
-
save_trajectory=self.save_trajectory,
|
|
212
|
-
base_dir=self.trajectory_dir,
|
|
213
|
-
only_n_most_recent_images=self.only_n_most_recent_images,
|
|
214
|
-
**self.loop_kwargs,
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
async def _execute_task(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
|
|
218
|
-
"""Execute a task using the appropriate agent loop.
|
|
219
|
-
|
|
220
|
-
Args:
|
|
221
|
-
task: The task to execute
|
|
222
|
-
|
|
223
|
-
Returns:
|
|
224
|
-
AsyncGenerator yielding task outputs
|
|
225
|
-
"""
|
|
226
|
-
logger.info(f"Executing task: {task}")
|
|
227
|
-
|
|
228
|
-
try:
|
|
229
|
-
# Create a message from the task
|
|
230
|
-
task_message = {"role": "user", "content": task}
|
|
231
|
-
messages_with_task = self.messages + [task_message]
|
|
232
|
-
|
|
233
|
-
# Use the run method of the loop
|
|
234
|
-
async for output in self.loop.run(messages_with_task):
|
|
235
|
-
yield output
|
|
236
|
-
except Exception as e:
|
|
237
|
-
logger.error(f"Error executing task: {e}")
|
|
238
|
-
raise
|
|
239
|
-
finally:
|
|
240
|
-
pass
|
|
241
|
-
|
|
242
|
-
async def _execute_action(self, action_type: str, **action_params) -> Any:
|
|
243
|
-
"""Execute an action with telemetry tracking."""
|
|
244
|
-
try:
|
|
245
|
-
# Execute the action
|
|
246
|
-
result = await super()._execute_action(action_type, **action_params)
|
|
247
|
-
return result
|
|
248
|
-
except Exception as e:
|
|
249
|
-
logger.exception(f"Error executing action {action_type}: {e}")
|
|
250
|
-
raise
|
|
251
|
-
finally:
|
|
252
|
-
pass
|
agent/core/base_agent.py
DELETED
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
"""Base computer agent implementation."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import logging
|
|
5
|
-
import os
|
|
6
|
-
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Any, AsyncGenerator, Dict, Optional
|
|
8
|
-
|
|
9
|
-
from computer import Computer
|
|
10
|
-
|
|
11
|
-
from ..types.base import Provider
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class BaseComputerAgent(ABC):
|
|
17
|
-
"""Base class for computer agents."""
|
|
18
|
-
|
|
19
|
-
def __init__(
|
|
20
|
-
self,
|
|
21
|
-
max_retries: int = 3,
|
|
22
|
-
computer: Optional[Computer] = None,
|
|
23
|
-
screenshot_dir: Optional[str] = None,
|
|
24
|
-
log_dir: Optional[str] = None,
|
|
25
|
-
**kwargs,
|
|
26
|
-
):
|
|
27
|
-
"""Initialize the base computer agent.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
max_retries: Maximum number of retry attempts
|
|
31
|
-
computer: Optional Computer instance
|
|
32
|
-
screenshot_dir: Directory to save screenshots
|
|
33
|
-
log_dir: Directory to save logs (set to None to disable logging to files)
|
|
34
|
-
**kwargs: Additional provider-specific arguments
|
|
35
|
-
"""
|
|
36
|
-
self.max_retries = max_retries
|
|
37
|
-
self.computer = computer or Computer()
|
|
38
|
-
self.queue = asyncio.Queue()
|
|
39
|
-
self.screenshot_dir = screenshot_dir
|
|
40
|
-
self.log_dir = log_dir
|
|
41
|
-
self._retry_count = 0
|
|
42
|
-
self.provider = Provider.UNKNOWN
|
|
43
|
-
|
|
44
|
-
# Setup logging
|
|
45
|
-
if self.log_dir:
|
|
46
|
-
os.makedirs(self.log_dir, exist_ok=True)
|
|
47
|
-
logger.info(f"Created logs directory: {self.log_dir}")
|
|
48
|
-
|
|
49
|
-
# Setup screenshots directory
|
|
50
|
-
if self.screenshot_dir:
|
|
51
|
-
os.makedirs(self.screenshot_dir, exist_ok=True)
|
|
52
|
-
logger.info(f"Created screenshots directory: {self.screenshot_dir}")
|
|
53
|
-
|
|
54
|
-
logger.info("BaseComputerAgent initialized")
|
|
55
|
-
|
|
56
|
-
async def run(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
|
|
57
|
-
"""Run a task using the computer agent.
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
task: Task description
|
|
61
|
-
|
|
62
|
-
Yields:
|
|
63
|
-
Task execution updates
|
|
64
|
-
"""
|
|
65
|
-
try:
|
|
66
|
-
logger.info(f"Running task: {task}")
|
|
67
|
-
|
|
68
|
-
# Initialize the computer if needed
|
|
69
|
-
await self._init_if_needed()
|
|
70
|
-
|
|
71
|
-
# Execute the task and yield results
|
|
72
|
-
# The _execute_task method should be implemented to yield results
|
|
73
|
-
async for result in self._execute_task(task):
|
|
74
|
-
yield result
|
|
75
|
-
|
|
76
|
-
except Exception as e:
|
|
77
|
-
logger.error(f"Error in agent run method: {str(e)}")
|
|
78
|
-
yield {
|
|
79
|
-
"role": "assistant",
|
|
80
|
-
"content": f"Error: {str(e)}",
|
|
81
|
-
"metadata": {"title": "❌ Error"},
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
async def _init_if_needed(self):
|
|
85
|
-
"""Initialize the computer interface if it hasn't been initialized yet."""
|
|
86
|
-
if not self.computer._initialized:
|
|
87
|
-
logger.info("Computer not initialized, initializing now...")
|
|
88
|
-
try:
|
|
89
|
-
# Call run directly without setting the flag first
|
|
90
|
-
await self.computer.run()
|
|
91
|
-
logger.info("Computer interface initialized successfully")
|
|
92
|
-
except Exception as e:
|
|
93
|
-
logger.error(f"Error initializing computer interface: {str(e)}")
|
|
94
|
-
raise
|
|
95
|
-
|
|
96
|
-
async def __aenter__(self):
|
|
97
|
-
"""Initialize the agent when used as a context manager."""
|
|
98
|
-
logger.info("Entering BaseComputerAgent context")
|
|
99
|
-
|
|
100
|
-
# In case the computer wasn't initialized
|
|
101
|
-
try:
|
|
102
|
-
# Initialize the computer only if not already initialized
|
|
103
|
-
logger.info("Checking if computer is already initialized...")
|
|
104
|
-
if not self.computer._initialized:
|
|
105
|
-
logger.info("Initializing computer in __aenter__...")
|
|
106
|
-
# Use the computer's __aenter__ directly instead of calling run()
|
|
107
|
-
# This avoids the circular dependency
|
|
108
|
-
await self.computer.__aenter__()
|
|
109
|
-
logger.info("Computer initialized in __aenter__")
|
|
110
|
-
else:
|
|
111
|
-
logger.info("Computer already initialized, skipping initialization")
|
|
112
|
-
|
|
113
|
-
# Take a test screenshot to verify the computer is working
|
|
114
|
-
logger.info("Testing computer with a screenshot...")
|
|
115
|
-
try:
|
|
116
|
-
test_screenshot = await self.computer.interface.screenshot()
|
|
117
|
-
# Determine the screenshot size based on its type
|
|
118
|
-
if isinstance(test_screenshot, bytes):
|
|
119
|
-
size = len(test_screenshot)
|
|
120
|
-
else:
|
|
121
|
-
# Assume it's an object with base64_image attribute
|
|
122
|
-
try:
|
|
123
|
-
size = len(test_screenshot.base64_image)
|
|
124
|
-
except AttributeError:
|
|
125
|
-
size = "unknown"
|
|
126
|
-
logger.info(f"Screenshot test successful, size: {size}")
|
|
127
|
-
except Exception as e:
|
|
128
|
-
logger.error(f"Screenshot test failed: {str(e)}")
|
|
129
|
-
# Even though screenshot failed, we continue since some tests might not need it
|
|
130
|
-
except Exception as e:
|
|
131
|
-
logger.error(f"Error initializing computer in __aenter__: {str(e)}")
|
|
132
|
-
raise
|
|
133
|
-
|
|
134
|
-
return self
|
|
135
|
-
|
|
136
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
137
|
-
"""Cleanup computer resources if needed."""
|
|
138
|
-
logger.info("Cleaning up agent resources")
|
|
139
|
-
|
|
140
|
-
# Do any necessary cleanup
|
|
141
|
-
# We're not shutting down the computer here as it might be shared
|
|
142
|
-
# Just log that we're exiting
|
|
143
|
-
if exc_type:
|
|
144
|
-
logger.error(f"Exiting agent context with error: {exc_type.__name__}: {exc_val}")
|
|
145
|
-
else:
|
|
146
|
-
logger.info("Exiting agent context normally")
|
|
147
|
-
|
|
148
|
-
# If we have a queue, make sure to signal it's done
|
|
149
|
-
if hasattr(self, "queue") and self.queue:
|
|
150
|
-
await self.queue.put(None) # Signal that we're done
|
|
151
|
-
|
|
152
|
-
@abstractmethod
|
|
153
|
-
async def _execute_task(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
|
|
154
|
-
"""Execute a task. Must be implemented by subclasses.
|
|
155
|
-
|
|
156
|
-
This is an async method that returns an AsyncGenerator. Implementations
|
|
157
|
-
should use 'yield' statements to produce results asynchronously.
|
|
158
|
-
"""
|
|
159
|
-
yield {
|
|
160
|
-
"role": "assistant",
|
|
161
|
-
"content": "Base class method called",
|
|
162
|
-
"metadata": {"title": "Error"},
|
|
163
|
-
}
|
|
164
|
-
raise NotImplementedError("Subclasses must implement _execute_task")
|
agent/core/factory.py
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
"""Factory for creating provider-specific agents."""
|
|
2
|
-
|
|
3
|
-
from typing import Optional, Dict, Any, List
|
|
4
|
-
|
|
5
|
-
from computer import Computer
|
|
6
|
-
from ..types.base import Provider
|
|
7
|
-
from .base_agent import BaseComputerAgent
|
|
8
|
-
|
|
9
|
-
# Import provider-specific implementations
|
|
10
|
-
_ANTHROPIC_AVAILABLE = False
|
|
11
|
-
_OPENAI_AVAILABLE = False
|
|
12
|
-
_OLLAMA_AVAILABLE = False
|
|
13
|
-
_OMNI_AVAILABLE = False
|
|
14
|
-
|
|
15
|
-
# Try importing providers
|
|
16
|
-
try:
|
|
17
|
-
import anthropic
|
|
18
|
-
from ..providers.anthropic.agent import AnthropicComputerAgent
|
|
19
|
-
|
|
20
|
-
_ANTHROPIC_AVAILABLE = True
|
|
21
|
-
except ImportError:
|
|
22
|
-
pass
|
|
23
|
-
|
|
24
|
-
try:
|
|
25
|
-
import openai
|
|
26
|
-
|
|
27
|
-
_OPENAI_AVAILABLE = True
|
|
28
|
-
except ImportError:
|
|
29
|
-
pass
|
|
30
|
-
|
|
31
|
-
try:
|
|
32
|
-
from ..providers.omni.agent import OmniComputerAgent
|
|
33
|
-
|
|
34
|
-
_OMNI_AVAILABLE = True
|
|
35
|
-
except ImportError:
|
|
36
|
-
pass
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class AgentFactory:
|
|
40
|
-
"""Factory for creating provider-specific agent implementations."""
|
|
41
|
-
|
|
42
|
-
@staticmethod
|
|
43
|
-
def create(
|
|
44
|
-
provider: Provider, computer: Optional[Computer] = None, **kwargs: Any
|
|
45
|
-
) -> BaseComputerAgent:
|
|
46
|
-
"""Create an agent based on the specified provider.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
provider: The AI provider to use
|
|
50
|
-
computer: Optional Computer instance
|
|
51
|
-
**kwargs: Additional provider-specific arguments
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
A provider-specific agent implementation
|
|
55
|
-
|
|
56
|
-
Raises:
|
|
57
|
-
ImportError: If provider dependencies are not installed
|
|
58
|
-
ValueError: If provider is not supported
|
|
59
|
-
"""
|
|
60
|
-
# Create a Computer instance if none is provided
|
|
61
|
-
if computer is None:
|
|
62
|
-
computer = Computer()
|
|
63
|
-
|
|
64
|
-
if provider == Provider.ANTHROPIC:
|
|
65
|
-
if not _ANTHROPIC_AVAILABLE:
|
|
66
|
-
raise ImportError(
|
|
67
|
-
"Anthropic provider requires additional dependencies. "
|
|
68
|
-
"Install them with: pip install cua-agent[anthropic]"
|
|
69
|
-
)
|
|
70
|
-
return AnthropicComputerAgent(max_retries=3, computer=computer, **kwargs)
|
|
71
|
-
elif provider == Provider.OPENAI:
|
|
72
|
-
if not _OPENAI_AVAILABLE:
|
|
73
|
-
raise ImportError(
|
|
74
|
-
"OpenAI provider requires additional dependencies. "
|
|
75
|
-
"Install them with: pip install cua-agent[openai]"
|
|
76
|
-
)
|
|
77
|
-
raise NotImplementedError("OpenAI provider not yet implemented")
|
|
78
|
-
elif provider == Provider.OLLAMA:
|
|
79
|
-
if not _OLLAMA_AVAILABLE:
|
|
80
|
-
raise ImportError(
|
|
81
|
-
"Ollama provider requires additional dependencies. "
|
|
82
|
-
"Install them with: pip install cua-agent[ollama]"
|
|
83
|
-
)
|
|
84
|
-
# Only import ollama when actually creating an Ollama agent
|
|
85
|
-
try:
|
|
86
|
-
import ollama
|
|
87
|
-
from ..providers.ollama.agent import OllamaComputerAgent
|
|
88
|
-
|
|
89
|
-
return OllamaComputerAgent(max_retries=3, computer=computer, **kwargs)
|
|
90
|
-
except ImportError:
|
|
91
|
-
raise ImportError(
|
|
92
|
-
"Failed to import ollama package. " "Install it with: pip install ollama"
|
|
93
|
-
)
|
|
94
|
-
elif provider == Provider.OMNI:
|
|
95
|
-
if not _OMNI_AVAILABLE:
|
|
96
|
-
raise ImportError(
|
|
97
|
-
"Omni provider requires additional dependencies. "
|
|
98
|
-
"Install them with: pip install cua-agent[omni]"
|
|
99
|
-
)
|
|
100
|
-
return OmniComputerAgent(max_retries=3, computer=computer, **kwargs)
|
|
101
|
-
else:
|
|
102
|
-
raise ValueError(f"Unsupported provider: {provider}")
|
|
File without changes
|
|
File without changes
|