cua-agent 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (68) hide show
  1. {cua_agent-0.1.2 → cua_agent-0.1.3}/PKG-INFO +2 -1
  2. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/__init__.py +12 -0
  3. cua_agent-0.1.3/agent/core/agent.py +259 -0
  4. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/base_agent.py +1 -1
  5. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/experiment.py +11 -1
  6. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/loop.py +1 -1
  7. cua_agent-0.1.3/agent/core/telemetry.py +138 -0
  8. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/loop.py +2 -2
  9. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/parser.py +1 -1
  10. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/types.py +0 -6
  11. cua_agent-0.1.3/agent/telemetry.py +21 -0
  12. {cua_agent-0.1.2 → cua_agent-0.1.3}/pyproject.toml +4 -3
  13. cua_agent-0.1.2/agent/core/agent.py +0 -327
  14. {cua_agent-0.1.2 → cua_agent-0.1.3}/README.md +0 -0
  15. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/README.md +0 -0
  16. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/README.md +0 -0
  17. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/__init__.py +0 -0
  18. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/callbacks.py +0 -0
  19. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/computer_agent.py +0 -0
  20. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/factory.py +0 -0
  21. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/messages.py +0 -0
  22. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/tools/__init__.py +0 -0
  23. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/tools/base.py +0 -0
  24. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/tools/bash.py +0 -0
  25. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/tools/collection.py +0 -0
  26. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/tools/computer.py +0 -0
  27. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/tools/edit.py +0 -0
  28. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/core/tools/manager.py +0 -0
  29. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/__init__.py +0 -0
  30. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/__init__.py +0 -0
  31. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/api/client.py +0 -0
  32. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/api/logging.py +0 -0
  33. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/callbacks/manager.py +0 -0
  34. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/loop.py +0 -0
  35. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/messages/manager.py +0 -0
  36. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/prompts.py +0 -0
  37. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/__init__.py +0 -0
  38. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/base.py +0 -0
  39. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/bash.py +0 -0
  40. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/collection.py +0 -0
  41. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/computer.py +0 -0
  42. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/edit.py +0 -0
  43. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/manager.py +0 -0
  44. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/tools/run.py +0 -0
  45. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/anthropic/types.py +0 -0
  46. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/__init__.py +0 -0
  47. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/callbacks.py +0 -0
  48. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/clients/anthropic.py +0 -0
  49. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/clients/base.py +0 -0
  50. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/clients/groq.py +0 -0
  51. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/clients/openai.py +0 -0
  52. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/clients/utils.py +0 -0
  53. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/experiment.py +0 -0
  54. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/image_utils.py +0 -0
  55. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/messages.py +0 -0
  56. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/prompts.py +0 -0
  57. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/tool_manager.py +0 -0
  58. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/tools/__init__.py +0 -0
  59. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/tools/bash.py +0 -0
  60. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/tools/computer.py +0 -0
  61. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/tools/manager.py +0 -0
  62. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/utils.py +0 -0
  63. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/providers/omni/visualization.py +0 -0
  64. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/types/__init__.py +0 -0
  65. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/types/base.py +0 -0
  66. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/types/messages.py +0 -0
  67. {cua_agent-0.1.2 → cua_agent-0.1.3}/agent/types/tools.py +0 -0
  68. {cua_agent-0.1.2 → cua_agent-0.1.3}/tests/test_agent.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: <3.13,>=3.10
@@ -13,6 +13,7 @@ Requires-Dist: pydantic<3.0.0,>=2.6.4
13
13
  Requires-Dist: rich<14.0.0,>=13.7.1
14
14
  Requires-Dist: python-dotenv<2.0.0,>=1.0.1
15
15
  Requires-Dist: cua-computer<0.2.0,>=0.1.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.0
16
17
  Requires-Dist: certifi>=2024.2.2
17
18
  Provides-Extra: anthropic
18
19
  Requires-Dist: anthropic>=0.49.0; extra == "anthropic"
@@ -2,6 +2,18 @@
2
2
 
3
3
  __version__ = "0.1.0"
4
4
 
5
+ # Initialize telemetry when the package is imported
6
+ try:
7
+ from core.telemetry import enable_telemetry, set_dimension
8
+
9
+ # Enable telemetry by default
10
+ enable_telemetry()
11
+ # Set the package version as a dimension
12
+ set_dimension("agent_version", __version__)
13
+ except ImportError:
14
+ # Core telemetry not available
15
+ pass
16
+
5
17
  from .core.factory import AgentFactory
6
18
  from .core.agent import ComputerAgent
7
19
  from .providers.omni.types import LLMProvider, LLM
@@ -0,0 +1,259 @@
1
+ """Unified computer agent implementation that supports multiple loops."""
2
+
3
+ import os
4
+ import logging
5
+ import asyncio
6
+ import time
7
+ import uuid
8
+ from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING, Union, cast
9
+ from datetime import datetime
10
+ from enum import Enum
11
+
12
+ from computer import Computer
13
+
14
+ from ..types.base import Provider, AgentLoop
15
+ from .base_agent import BaseComputerAgent
16
+ from ..core.telemetry import record_agent_initialization
17
+
18
+ # Only import types for type checking to avoid circular imports
19
+ if TYPE_CHECKING:
20
+ from ..providers.anthropic.loop import AnthropicLoop
21
+ from ..providers.omni.loop import OmniLoop
22
+ from ..providers.omni.parser import OmniParser
23
+
24
+ # Import the provider types
25
+ from ..providers.omni.types import LLMProvider, LLM, Model, LLMModel
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Default models for different providers
30
+ DEFAULT_MODELS = {
31
+ LLMProvider.OPENAI: "gpt-4o",
32
+ LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
33
+ }
34
+
35
+ # Map providers to their environment variable names
36
+ ENV_VARS = {
37
+ LLMProvider.OPENAI: "OPENAI_API_KEY",
38
+ LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
39
+ }
40
+
41
+
42
+ class ComputerAgent(BaseComputerAgent):
43
+ """Unified implementation of the computer agent supporting multiple loop types.
44
+
45
+ This class consolidates the previous AnthropicComputerAgent and OmniComputerAgent
46
+ into a single implementation with configurable loop type.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ computer: Computer,
52
+ loop: AgentLoop = AgentLoop.OMNI,
53
+ model: Optional[Union[LLM, Dict[str, str], str]] = None,
54
+ api_key: Optional[str] = None,
55
+ save_trajectory: bool = True,
56
+ trajectory_dir: Optional[str] = "trajectories",
57
+ only_n_most_recent_images: Optional[int] = None,
58
+ max_retries: int = 3,
59
+ verbosity: int = logging.INFO,
60
+ telemetry_enabled: bool = True,
61
+ **kwargs,
62
+ ):
63
+ """Initialize a ComputerAgent instance.
64
+
65
+ Args:
66
+ computer: The Computer instance to control
67
+ loop: The agent loop to use: ANTHROPIC or OMNI
68
+ model: The model to use. Can be a string, dict or LLM object.
69
+ Defaults to LLM for the loop type.
70
+ api_key: The API key to use. If None, will use environment variables.
71
+ save_trajectory: Whether to save the trajectory.
72
+ trajectory_dir: The directory to save trajectories to.
73
+ only_n_most_recent_images: Only keep this many most recent images.
74
+ max_retries: Maximum number of retries for failed requests.
75
+ verbosity: Logging level (standard Python logging levels).
76
+ telemetry_enabled: Whether to enable telemetry tracking. Defaults to True.
77
+ **kwargs: Additional keyword arguments to pass to the loop.
78
+ """
79
+ super().__init__(computer)
80
+ self._configure_logging(verbosity)
81
+ logger.info(f"Initializing ComputerAgent with {loop} loop")
82
+
83
+ # Store telemetry preference
84
+ self.telemetry_enabled = telemetry_enabled
85
+
86
+ # Pass telemetry preference to computer if available
87
+ if hasattr(computer, "telemetry_enabled"):
88
+ # Computer doesn't have a setter for telemetry_enabled
89
+ # Use disable_telemetry() method if telemetry is disabled
90
+ if not telemetry_enabled and hasattr(computer, "disable_telemetry"):
91
+ computer.disable_telemetry()
92
+
93
+ # Process the model configuration
94
+ self.model = self._process_model_config(model, loop)
95
+ self.loop_type = loop
96
+ self.api_key = api_key
97
+
98
+ # Store computer
99
+ self.computer = computer
100
+
101
+ # Save trajectory settings
102
+ self.save_trajectory = save_trajectory
103
+ self.trajectory_dir = trajectory_dir
104
+ self.only_n_most_recent_images = only_n_most_recent_images
105
+
106
+ # Store the max retries setting
107
+ self.max_retries = max_retries
108
+
109
+ # Initialize message history
110
+ self.messages = []
111
+
112
+ # Extra kwargs for the loop
113
+ self.loop_kwargs = kwargs
114
+
115
+ # Initialize the actual loop implementation
116
+ self.loop = self._init_loop()
117
+
118
+ # Record initialization in telemetry if enabled
119
+ if telemetry_enabled:
120
+ record_agent_initialization()
121
+
122
+ def _process_model_config(
123
+ self, model_input: Optional[Union[LLM, Dict[str, str], str]], loop: AgentLoop
124
+ ) -> LLM:
125
+ """Process and normalize model configuration.
126
+
127
+ Args:
128
+ model_input: Input model configuration (LLM, dict, string, or None)
129
+ loop: The loop type being used
130
+
131
+ Returns:
132
+ Normalized LLM instance
133
+ """
134
+ # Handle case where model_input is None
135
+ if model_input is None:
136
+ # Use Anthropic for Anthropic loop, OpenAI for Omni loop
137
+ default_provider = (
138
+ LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
139
+ )
140
+ return LLM(provider=default_provider)
141
+
142
+ # Handle case where model_input is already a LLM or one of its aliases
143
+ if isinstance(model_input, (LLM, Model, LLMModel)):
144
+ return model_input
145
+
146
+ # Handle case where model_input is a dict
147
+ if isinstance(model_input, dict):
148
+ provider = model_input.get("provider", LLMProvider.OPENAI)
149
+ if isinstance(provider, str):
150
+ provider = LLMProvider(provider)
151
+ return LLM(provider=provider, name=model_input.get("name"))
152
+
153
+ # Handle case where model_input is a string (model name)
154
+ if isinstance(model_input, str):
155
+ default_provider = (
156
+ LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
157
+ )
158
+ return LLM(provider=default_provider, name=model_input)
159
+
160
+ raise ValueError(f"Unsupported model configuration: {model_input}")
161
+
162
+ def _configure_logging(self, verbosity: int):
163
+ """Configure logging based on verbosity level."""
164
+ # Use the logging level directly without mapping
165
+ logger.setLevel(verbosity)
166
+ logging.getLogger("agent").setLevel(verbosity)
167
+
168
+ # Log the verbosity level that was set
169
+ if verbosity <= logging.DEBUG:
170
+ logger.info("Agent logging set to DEBUG level (full debug information)")
171
+ elif verbosity <= logging.INFO:
172
+ logger.info("Agent logging set to INFO level (standard output)")
173
+ elif verbosity <= logging.WARNING:
174
+ logger.warning("Agent logging set to WARNING level (warnings and errors only)")
175
+ elif verbosity <= logging.ERROR:
176
+ logger.warning("Agent logging set to ERROR level (errors only)")
177
+ elif verbosity <= logging.CRITICAL:
178
+ logger.warning("Agent logging set to CRITICAL level (critical errors only)")
179
+
180
+ def _init_loop(self) -> Any:
181
+ """Initialize the loop based on the loop_type.
182
+
183
+ Returns:
184
+ Initialized loop instance
185
+ """
186
+ # Lazy import OmniLoop and OmniParser to avoid circular imports
187
+ from ..providers.omni.loop import OmniLoop
188
+ from ..providers.omni.parser import OmniParser
189
+
190
+ if self.loop_type == AgentLoop.ANTHROPIC:
191
+ from ..providers.anthropic.loop import AnthropicLoop
192
+
193
+ # Ensure we always have a valid model name
194
+ model_name = self.model.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
195
+
196
+ return AnthropicLoop(
197
+ api_key=self.api_key,
198
+ model=model_name,
199
+ computer=self.computer,
200
+ save_trajectory=self.save_trajectory,
201
+ base_dir=self.trajectory_dir,
202
+ only_n_most_recent_images=self.only_n_most_recent_images,
203
+ **self.loop_kwargs,
204
+ )
205
+
206
+ # Initialize parser for OmniLoop with appropriate device
207
+ if "parser" not in self.loop_kwargs:
208
+ self.loop_kwargs["parser"] = OmniParser()
209
+
210
+ # Ensure we always have a valid model name
211
+ model_name = self.model.name or DEFAULT_MODELS[self.model.provider]
212
+
213
+ return OmniLoop(
214
+ provider=self.model.provider,
215
+ api_key=self.api_key,
216
+ model=model_name,
217
+ computer=self.computer,
218
+ save_trajectory=self.save_trajectory,
219
+ base_dir=self.trajectory_dir,
220
+ only_n_most_recent_images=self.only_n_most_recent_images,
221
+ **self.loop_kwargs,
222
+ )
223
+
224
+ async def _execute_task(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
225
+ """Execute a task using the appropriate agent loop.
226
+
227
+ Args:
228
+ task: The task to execute
229
+
230
+ Returns:
231
+ AsyncGenerator yielding task outputs
232
+ """
233
+ logger.info(f"Executing task: {task}")
234
+
235
+ try:
236
+ # Create a message from the task
237
+ task_message = {"role": "user", "content": task}
238
+ messages_with_task = self.messages + [task_message]
239
+
240
+ # Use the run method of the loop
241
+ async for output in self.loop.run(messages_with_task):
242
+ yield output
243
+ except Exception as e:
244
+ logger.error(f"Error executing task: {e}")
245
+ raise
246
+ finally:
247
+ pass
248
+
249
+ async def _execute_action(self, action_type: str, **action_params) -> Any:
250
+ """Execute an action with telemetry tracking."""
251
+ try:
252
+ # Execute the action
253
+ result = await super()._execute_action(action_type, **action_params)
254
+ return result
255
+ except Exception as e:
256
+ logger.exception(f"Error executing action {action_type}: {e}")
257
+ raise
258
+ finally:
259
+ pass
@@ -113,7 +113,7 @@ class BaseComputerAgent(ABC):
113
113
  # Take a test screenshot to verify the computer is working
114
114
  logger.info("Testing computer with a screenshot...")
115
115
  try:
116
- test_screenshot = await self.computer.screenshot()
116
+ test_screenshot = await self.computer.interface.screenshot()
117
117
  # Determine the screenshot size based on its type
118
118
  if isinstance(test_screenshot, bytes):
119
119
  size = len(test_screenshot)
@@ -8,6 +8,7 @@ from datetime import datetime
8
8
  from typing import Any, Dict, List, Optional
9
9
  from PIL import Image
10
10
  import json
11
+ import re
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
@@ -106,9 +107,18 @@ class ExperimentManager:
106
107
  # Increment screenshot counter
107
108
  self.screenshot_count += 1
108
109
 
110
+ # Sanitize action_type to ensure valid filename
111
+ # Replace characters that are not safe for filenames
112
+ sanitized_action = ""
113
+ if action_type:
114
+ # Replace invalid filename characters with underscores
115
+ sanitized_action = re.sub(r'[\\/*?:"<>|]', "_", action_type)
116
+ # Limit the length to avoid excessively long filenames
117
+ sanitized_action = sanitized_action[:50]
118
+
109
119
  # Create a descriptive filename
110
120
  timestamp = int(datetime.now().timestamp() * 1000)
111
- action_suffix = f"_{action_type}" if action_type else ""
121
+ action_suffix = f"_{sanitized_action}" if sanitized_action else ""
112
122
  filename = f"screenshot_{self.screenshot_count:03d}{action_suffix}_{timestamp}.png"
113
123
 
114
124
  # Save directly to the turn directory
@@ -166,7 +166,7 @@ class BaseLoop(ABC):
166
166
  """
167
167
  try:
168
168
  # Take screenshot
169
- screenshot = await self.computer.screenshot()
169
+ screenshot = await self.computer.interface.screenshot()
170
170
 
171
171
  # Initialize with default values
172
172
  width, height = 1024, 768
@@ -0,0 +1,138 @@
1
+ """Agent telemetry for tracking anonymous usage and feature usage."""
2
+
3
+ import logging
4
+ import os
5
+ import platform
6
+ import sys
7
+ import time
8
+ from typing import Dict, Any, Optional
9
+
10
+ # Import the core telemetry module
11
+ TELEMETRY_AVAILABLE = False
12
+
13
+ try:
14
+ from core.telemetry import (
15
+ record_event,
16
+ increment,
17
+ get_telemetry_client,
18
+ flush,
19
+ is_telemetry_enabled,
20
+ is_telemetry_globally_disabled,
21
+ )
22
+
23
+ def increment_counter(counter_name: str, value: int = 1) -> None:
24
+ """Wrapper for increment to maintain backward compatibility."""
25
+ if is_telemetry_enabled():
26
+ increment(counter_name, value)
27
+
28
+ def set_dimension(name: str, value: Any) -> None:
29
+ """Set a dimension that will be attached to all events."""
30
+ logger = logging.getLogger("cua.agent.telemetry")
31
+ logger.debug(f"Setting dimension {name}={value}")
32
+
33
+ TELEMETRY_AVAILABLE = True
34
+ logger = logging.getLogger("cua.agent.telemetry")
35
+ logger.info("Successfully imported telemetry")
36
+ except ImportError as e:
37
+ logger = logging.getLogger("cua.agent.telemetry")
38
+ logger.warning(f"Could not import telemetry: {e}")
39
+ TELEMETRY_AVAILABLE = False
40
+
41
+
42
+ # Local fallbacks in case core telemetry isn't available
43
+ def _noop(*args: Any, **kwargs: Any) -> None:
44
+ """No-op function for when telemetry is not available."""
45
+ pass
46
+
47
+
48
+ logger = logging.getLogger("cua.agent.telemetry")
49
+
50
+ # If telemetry isn't available, use no-op functions
51
+ if not TELEMETRY_AVAILABLE:
52
+ logger.debug("Telemetry not available, using no-op functions")
53
+ record_event = _noop # type: ignore
54
+ increment_counter = _noop # type: ignore
55
+ set_dimension = _noop # type: ignore
56
+ get_telemetry_client = lambda: None # type: ignore
57
+ flush = _noop # type: ignore
58
+ is_telemetry_enabled = lambda: False # type: ignore
59
+ is_telemetry_globally_disabled = lambda: True # type: ignore
60
+
61
+ # Get system info once to use in telemetry
62
+ SYSTEM_INFO = {
63
+ "os": platform.system().lower(),
64
+ "os_version": platform.release(),
65
+ "python_version": platform.python_version(),
66
+ }
67
+
68
+
69
+ def enable_telemetry() -> bool:
70
+ """Enable telemetry if available.
71
+
72
+ Returns:
73
+ bool: True if telemetry was successfully enabled, False otherwise
74
+ """
75
+ global TELEMETRY_AVAILABLE
76
+
77
+ # Check if globally disabled using core function
78
+ if TELEMETRY_AVAILABLE and is_telemetry_globally_disabled():
79
+ logger.info("Telemetry is globally disabled via environment variable - cannot enable")
80
+ return False
81
+
82
+ # Already enabled
83
+ if TELEMETRY_AVAILABLE:
84
+ return True
85
+
86
+ # Try to import and enable
87
+ try:
88
+ from core.telemetry import (
89
+ record_event,
90
+ increment,
91
+ get_telemetry_client,
92
+ flush,
93
+ is_telemetry_globally_disabled,
94
+ )
95
+
96
+ # Check again after import
97
+ if is_telemetry_globally_disabled():
98
+ logger.info("Telemetry is globally disabled via environment variable - cannot enable")
99
+ return False
100
+
101
+ TELEMETRY_AVAILABLE = True
102
+ logger.info("Telemetry successfully enabled")
103
+ return True
104
+ except ImportError as e:
105
+ logger.warning(f"Could not enable telemetry: {e}")
106
+ return False
107
+
108
+
109
+ def disable_telemetry() -> None:
110
+ """Disable telemetry for this session."""
111
+ global TELEMETRY_AVAILABLE
112
+ TELEMETRY_AVAILABLE = False
113
+ logger.info("Telemetry disabled for this session")
114
+
115
+
116
+ def is_telemetry_enabled() -> bool:
117
+ """Check if telemetry is enabled.
118
+
119
+ Returns:
120
+ bool: True if telemetry is enabled, False otherwise
121
+ """
122
+ # Use the core function if available, otherwise use our local flag
123
+ if TELEMETRY_AVAILABLE:
124
+ from core.telemetry import is_telemetry_enabled as core_is_enabled
125
+
126
+ return core_is_enabled()
127
+ return False
128
+
129
+
130
+ def record_agent_initialization() -> None:
131
+ """Record when an agent instance is initialized."""
132
+ if TELEMETRY_AVAILABLE and is_telemetry_enabled():
133
+ record_event("agent_initialized", SYSTEM_INFO)
134
+
135
+ # Set dimensions that will be attached to all events
136
+ set_dimension("os", SYSTEM_INFO["os"])
137
+ set_dimension("os_version", SYSTEM_INFO["os_version"])
138
+ set_dimension("python_version", SYSTEM_INFO["python_version"])
@@ -731,7 +731,7 @@ class OmniLoop(BaseLoop):
731
731
  action_type = f"hotkey_{content['Value'].replace('+', '_')}"
732
732
  logger.info(f"Preparing hotkey with keys: {keys}")
733
733
  # Get the method but call it with *args instead of **kwargs
734
- method = getattr(self.computer, action)
734
+ method = getattr(self.computer.interface, action)
735
735
  await method(*keys) # Unpack the keys list as positional arguments
736
736
  logger.info(f"Tool execution completed successfully: {action}")
737
737
 
@@ -776,7 +776,7 @@ class OmniLoop(BaseLoop):
776
776
 
777
777
  # Execute tool and handle result
778
778
  try:
779
- method = getattr(self.computer, action)
779
+ method = getattr(self.computer.interface, action)
780
780
  logger.info(f"Found method for action '{action}': {method}")
781
781
  await method(**kwargs)
782
782
  logger.info(f"Tool execution completed successfully: {action}")
@@ -79,7 +79,7 @@ class OmniParser:
79
79
  try:
80
80
  # Get screenshot from computer
81
81
  logger.info("Taking screenshot...")
82
- screenshot = await computer.screenshot()
82
+ screenshot = await computer.interface.screenshot()
83
83
 
84
84
  # Log screenshot info
85
85
  logger.info(f"Screenshot type: {type(screenshot)}")
@@ -10,8 +10,6 @@ class LLMProvider(StrEnum):
10
10
 
11
11
  ANTHROPIC = "anthropic"
12
12
  OPENAI = "openai"
13
- GROQ = "groq"
14
- QWEN = "qwen"
15
13
 
16
14
 
17
15
  LLMProvider
@@ -39,14 +37,10 @@ Model = LLM
39
37
  PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
40
38
  LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
41
39
  LLMProvider.OPENAI: "gpt-4o",
42
- LLMProvider.GROQ: "deepseek-r1-distill-llama-70b",
43
- LLMProvider.QWEN: "qwen2.5-vl-72b-instruct",
44
40
  }
45
41
 
46
42
  # Environment variable names for each provider
47
43
  PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
48
44
  LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
49
45
  LLMProvider.OPENAI: "OPENAI_API_KEY",
50
- LLMProvider.GROQ: "GROQ_API_KEY",
51
- LLMProvider.QWEN: "QWEN_API_KEY",
52
46
  }
@@ -0,0 +1,21 @@
1
+ """Telemetry support for Agent class."""
2
+
3
+ import os
4
+ import platform
5
+ import sys
6
+ import time
7
+ from typing import Any, Dict, Optional
8
+
9
+ from core.telemetry import (
10
+ record_event,
11
+ is_telemetry_enabled,
12
+ flush,
13
+ get_telemetry_client,
14
+ increment,
15
+ )
16
+
17
+ # System information used for telemetry
18
+ SYSTEM_INFO = {
19
+ "os": sys.platform,
20
+ "python_version": platform.python_version(),
21
+ }
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "cua-agent"
9
- version = "0.1.2"
9
+ version = "0.1.3"
10
10
  description = "CUA (Computer Use) Agent for AI-driven computer interaction"
11
11
  authors = [
12
12
  { name = "TryCua", email = "gh@trycua.com" },
@@ -21,6 +21,7 @@ dependencies = [
21
21
  "rich>=13.7.1,<14.0.0",
22
22
  "python-dotenv>=1.0.1,<2.0.0",
23
23
  "cua-computer>=0.1.0,<0.2.0",
24
+ "cua-core>=0.1.0,<0.2.0",
24
25
  "certifi>=2024.2.2",
25
26
  ]
26
27
  requires-python = ">=3.10,<3.13"
@@ -78,7 +79,7 @@ target-version = [
78
79
 
79
80
  [tool.ruff]
80
81
  line-length = 100
81
- target-version = "0.1.2"
82
+ target-version = "0.1.3"
82
83
  select = [
83
84
  "E",
84
85
  "F",
@@ -92,7 +93,7 @@ docstring-code-format = true
92
93
 
93
94
  [tool.mypy]
94
95
  strict = true
95
- python_version = "0.1.2"
96
+ python_version = "0.1.3"
96
97
  ignore_missing_imports = true
97
98
  disallow_untyped_defs = true
98
99
  check_untyped_defs = true
@@ -1,327 +0,0 @@
1
- """Unified computer agent implementation that supports multiple loops."""
2
-
3
- import os
4
- import logging
5
- import asyncio
6
- from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING, Union, cast
7
- from datetime import datetime
8
-
9
- from computer import Computer
10
-
11
- from ..types.base import Provider, AgentLoop
12
- from .base_agent import BaseComputerAgent
13
-
14
- # Only import types for type checking to avoid circular imports
15
- if TYPE_CHECKING:
16
- from ..providers.anthropic.loop import AnthropicLoop
17
- from ..providers.omni.loop import OmniLoop
18
- from ..providers.omni.parser import OmniParser
19
-
20
- # Import the provider types
21
- from ..providers.omni.types import LLMProvider, LLM, Model, LLMModel
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
- # Default models for different providers
26
- DEFAULT_MODELS = {
27
- LLMProvider.OPENAI: "gpt-4o",
28
- LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
29
- LLMProvider.GROQ: "llama3-70b-8192",
30
- }
31
-
32
- # Map providers to their environment variable names
33
- ENV_VARS = {
34
- LLMProvider.OPENAI: "OPENAI_API_KEY",
35
- LLMProvider.GROQ: "GROQ_API_KEY",
36
- LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
37
- }
38
-
39
-
40
- class ComputerAgent(BaseComputerAgent):
41
- """Unified implementation of the computer agent supporting multiple loop types.
42
-
43
- This class consolidates the previous AnthropicComputerAgent and OmniComputerAgent
44
- into a single implementation with configurable loop type.
45
- """
46
-
47
- def __init__(
48
- self,
49
- computer: Computer,
50
- loop: AgentLoop = AgentLoop.OMNI,
51
- model: Optional[Union[LLM, Dict[str, str], str]] = None,
52
- api_key: Optional[str] = None,
53
- save_trajectory: bool = True,
54
- trajectory_dir: Optional[str] = "trajectories",
55
- only_n_most_recent_images: Optional[int] = None,
56
- max_retries: int = 3,
57
- verbosity: int = logging.INFO,
58
- **kwargs,
59
- ):
60
- """Initialize the computer agent.
61
-
62
- Args:
63
- computer: Computer instance to control
64
- loop: The type of loop to use (Anthropic or Omni)
65
- model: LLM configuration. Can be:
66
- - LLM object with provider and name
67
- - Dict with 'provider' and 'name' keys
68
- - String with model name (defaults to OpenAI provider)
69
- - None (defaults based on loop)
70
- api_key: Optional API key (will use environment variable if not provided)
71
- save_trajectory: Whether to save screenshots and logs
72
- trajectory_dir: Directory to save trajectories (defaults to "trajectories")
73
- only_n_most_recent_images: Limit history to N most recent images
74
- max_retries: Maximum number of retry attempts for failed operations
75
- verbosity: Logging level (standard Python logging levels: logging.DEBUG, logging.INFO, etc.)
76
- **kwargs: Additional keyword arguments to pass to the loop
77
- """
78
- # Set up trajectory directories based on save_trajectory
79
- base_dir = trajectory_dir if save_trajectory else None
80
- # Don't create a redundant screenshots directory - directly use the timestamp folder
81
- screenshot_dir = None # This was previously set to os.path.join(base_dir, "screenshots")
82
- log_dir = None
83
-
84
- super().__init__(
85
- max_retries=max_retries,
86
- computer=computer,
87
- screenshot_dir=screenshot_dir,
88
- log_dir=log_dir,
89
- **kwargs,
90
- )
91
-
92
- self.loop_type = loop
93
- self.save_trajectory = save_trajectory
94
- self.trajectory_dir = trajectory_dir
95
- self.only_n_most_recent_images = only_n_most_recent_images
96
- self.verbosity = verbosity
97
- self._kwargs = kwargs # Keep this for loop initialization
98
-
99
- # Configure logging based on verbosity
100
- self._configure_logging(verbosity)
101
-
102
- # Process model configuration
103
- self.model_config = self._process_model_config(model, loop)
104
-
105
- # Get API key from environment if not provided
106
- if api_key is None:
107
- env_var = (
108
- ENV_VARS.get(self.model_config.provider)
109
- if loop == AgentLoop.OMNI
110
- else "ANTHROPIC_API_KEY"
111
- )
112
- if not env_var:
113
- raise ValueError(
114
- f"Unsupported provider: {self.model_config.provider}. Please use one of: {list(ENV_VARS.keys())}"
115
- )
116
-
117
- api_key = os.environ.get(env_var)
118
- if not api_key:
119
- raise ValueError(
120
- f"No API key provided and {env_var} environment variable is not set.\n"
121
- f"Please set the {env_var} environment variable or pass the api_key directly:\n"
122
- f" - Export in terminal: export {env_var}=your_api_key_here\n"
123
- f" - Add to .env file: {env_var}=your_api_key_here\n"
124
- f" - Pass directly: api_key='your_api_key_here'"
125
- )
126
- self.api_key = api_key
127
-
128
- # Initialize the appropriate loop based on loop_type
129
- self.loop = self._init_loop()
130
-
131
- def _process_model_config(
132
- self, model_input: Optional[Union[LLM, Dict[str, str], str]], loop: AgentLoop
133
- ) -> LLM:
134
- """Process and normalize model configuration.
135
-
136
- Args:
137
- model_input: Input model configuration (LLM, dict, string, or None)
138
- loop: The loop type being used
139
-
140
- Returns:
141
- Normalized LLM instance
142
- """
143
- # Handle case where model_input is None
144
- if model_input is None:
145
- # Use Anthropic for Anthropic loop, OpenAI for Omni loop
146
- default_provider = (
147
- LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
148
- )
149
- return LLM(provider=default_provider)
150
-
151
- # Handle case where model_input is already a LLM or one of its aliases
152
- if isinstance(model_input, (LLM, Model, LLMModel)):
153
- return model_input
154
-
155
- # Handle case where model_input is a dict
156
- if isinstance(model_input, dict):
157
- provider = model_input.get("provider", LLMProvider.OPENAI)
158
- if isinstance(provider, str):
159
- provider = LLMProvider(provider)
160
- return LLM(provider=provider, name=model_input.get("name"))
161
-
162
- # Handle case where model_input is a string (model name)
163
- if isinstance(model_input, str):
164
- default_provider = (
165
- LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
166
- )
167
- return LLM(provider=default_provider, name=model_input)
168
-
169
- raise ValueError(f"Unsupported model configuration: {model_input}")
170
-
171
- def _configure_logging(self, verbosity: int):
172
- """Configure logging based on verbosity level."""
173
- # Use the logging level directly without mapping
174
- logger.setLevel(verbosity)
175
- logging.getLogger("agent").setLevel(verbosity)
176
-
177
- # Log the verbosity level that was set
178
- if verbosity <= logging.DEBUG:
179
- logger.info("Agent logging set to DEBUG level (full debug information)")
180
- elif verbosity <= logging.INFO:
181
- logger.info("Agent logging set to INFO level (standard output)")
182
- elif verbosity <= logging.WARNING:
183
- logger.warning("Agent logging set to WARNING level (warnings and errors only)")
184
- elif verbosity <= logging.ERROR:
185
- logger.warning("Agent logging set to ERROR level (errors only)")
186
- elif verbosity <= logging.CRITICAL:
187
- logger.warning("Agent logging set to CRITICAL level (critical errors only)")
188
-
189
- def _init_loop(self) -> Any:
190
- """Initialize the loop based on the loop_type.
191
-
192
- Returns:
193
- Initialized loop instance
194
- """
195
- # Lazy import OmniLoop and OmniParser to avoid circular imports
196
- from ..providers.omni.loop import OmniLoop
197
- from ..providers.omni.parser import OmniParser
198
-
199
- if self.loop_type == AgentLoop.ANTHROPIC:
200
- from ..providers.anthropic.loop import AnthropicLoop
201
-
202
- # Ensure we always have a valid model name
203
- model_name = self.model_config.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
204
-
205
- return AnthropicLoop(
206
- api_key=self.api_key,
207
- model=model_name,
208
- computer=self.computer,
209
- save_trajectory=self.save_trajectory,
210
- base_dir=self.trajectory_dir,
211
- only_n_most_recent_images=self.only_n_most_recent_images,
212
- **self._kwargs,
213
- )
214
-
215
- # Initialize parser for OmniLoop with appropriate device
216
- if "parser" not in self._kwargs:
217
- self._kwargs["parser"] = OmniParser()
218
-
219
- # Ensure we always have a valid model name
220
- model_name = self.model_config.name or DEFAULT_MODELS[self.model_config.provider]
221
-
222
- return OmniLoop(
223
- provider=self.model_config.provider,
224
- api_key=self.api_key,
225
- model=model_name,
226
- computer=self.computer,
227
- save_trajectory=self.save_trajectory,
228
- base_dir=self.trajectory_dir,
229
- only_n_most_recent_images=self.only_n_most_recent_images,
230
- **self._kwargs,
231
- )
232
-
233
- async def _execute_task(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
234
- """Execute a task using the appropriate loop.
235
-
236
- Args:
237
- task: Task description to execute
238
-
239
- Yields:
240
- Dict containing response content and metadata
241
- """
242
- try:
243
- # Format the messages based on loop type
244
- if self.loop_type == AgentLoop.ANTHROPIC:
245
- # Anthropic format
246
- messages = [{"role": "user", "content": [{"type": "text", "text": task}]}]
247
- else:
248
- # Cua format
249
- messages = [{"role": "user", "content": task}]
250
-
251
- # Run the loop
252
- try:
253
- async for result in self.loop.run(messages):
254
- if result is None:
255
- break
256
-
257
- # Handle error case
258
- if "error" in result:
259
- yield {
260
- "role": "assistant",
261
- "content": result["error"],
262
- "metadata": {"title": "❌ Error"},
263
- }
264
- continue
265
-
266
- # Extract content and metadata based on loop type
267
- if self.loop_type == AgentLoop.ANTHROPIC:
268
- # Handle Anthropic format
269
- if "content" in result:
270
- content_text = ""
271
- for content_block in result["content"]:
272
- try:
273
- # Try to access the text attribute directly
274
- content_text += content_block.text
275
- except (AttributeError, TypeError):
276
- # If it's a dictionary instead of an object
277
- if isinstance(content_block, dict) and "text" in content_block:
278
- content_text += content_block["text"]
279
-
280
- yield {
281
- "role": "assistant",
282
- "content": content_text,
283
- "metadata": result.get("parsed_screen", {}),
284
- }
285
- else:
286
- yield {
287
- "role": "assistant",
288
- "content": str(result),
289
- "metadata": {"title": "Screen Analysis"},
290
- }
291
- else:
292
- # Handle Omni format
293
- content = ""
294
- metadata = {"title": "Screen Analysis"}
295
-
296
- # If result has content (normal case)
297
- if "content" in result:
298
- content = result["content"]
299
-
300
- # Ensure metadata has a title
301
- if isinstance(content, dict) and "metadata" in content:
302
- metadata = content["metadata"]
303
- if "title" not in metadata:
304
- metadata["title"] = "Screen Analysis"
305
-
306
- # For string content, convert to proper format
307
- if isinstance(content, str):
308
- content = content
309
- elif isinstance(content, dict) and "content" in content:
310
- content = content.get("content", "")
311
-
312
- yield {"role": "assistant", "content": content, "metadata": metadata}
313
- except Exception as e:
314
- logger.error(f"Error running the loop: {str(e)}")
315
- yield {
316
- "role": "assistant",
317
- "content": f"Error running the agent loop: {str(e)}",
318
- "metadata": {"title": "❌ Loop Error"},
319
- }
320
-
321
- except Exception as e:
322
- logger.error(f"Error in _execute_task: {str(e)}")
323
- yield {
324
- "role": "assistant",
325
- "content": f"Error: {str(e)}",
326
- "metadata": {"title": "❌ Error"},
327
- }
File without changes
File without changes
File without changes
File without changes