cua-agent 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/__init__.py CHANGED
@@ -1,7 +1,53 @@
1
1
  """CUA (Computer Use) Agent for AI-driven computer interaction."""
2
2
 
3
+ import sys
4
+ import logging
5
+
3
6
  __version__ = "0.1.0"
4
7
 
8
+ # Initialize logging
9
+ logger = logging.getLogger("cua.agent")
10
+
11
+ # Initialize telemetry when the package is imported
12
+ try:
13
+ # Import from core telemetry for basic functions
14
+ from core.telemetry import (
15
+ is_telemetry_enabled,
16
+ flush,
17
+ record_event,
18
+ )
19
+
20
+ # Import set_dimension from our own telemetry module
21
+ from .core.telemetry import set_dimension
22
+
23
+ # Check if telemetry is enabled
24
+ if is_telemetry_enabled():
25
+ logger.info("Telemetry is enabled")
26
+
27
+ # Record package initialization
28
+ record_event(
29
+ "module_init",
30
+ {
31
+ "module": "agent",
32
+ "version": __version__,
33
+ "python_version": sys.version,
34
+ },
35
+ )
36
+
37
+ # Set the package version as a dimension
38
+ set_dimension("agent_version", __version__)
39
+
40
+ # Flush events to ensure they're sent
41
+ flush()
42
+ else:
43
+ logger.info("Telemetry is disabled")
44
+ except ImportError as e:
45
+ # Telemetry not available
46
+ logger.warning(f"Telemetry not available: {e}")
47
+ except Exception as e:
48
+ # Other issues with telemetry
49
+ logger.warning(f"Error initializing telemetry: {e}")
50
+
5
51
  from .core.factory import AgentFactory
6
52
  from .core.agent import ComputerAgent
7
53
  from .providers.omni.types import LLMProvider, LLM
agent/core/agent.py CHANGED
@@ -3,13 +3,17 @@
3
3
  import os
4
4
  import logging
5
5
  import asyncio
6
+ import time
7
+ import uuid
6
8
  from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING, Union, cast
7
9
  from datetime import datetime
10
+ from enum import Enum
8
11
 
9
12
  from computer import Computer
10
13
 
11
14
  from ..types.base import Provider, AgentLoop
12
15
  from .base_agent import BaseComputerAgent
16
+ from ..core.telemetry import record_agent_initialization
13
17
 
14
18
  # Only import types for type checking to avoid circular imports
15
19
  if TYPE_CHECKING:
@@ -26,13 +30,11 @@ logger = logging.getLogger(__name__)
26
30
  DEFAULT_MODELS = {
27
31
  LLMProvider.OPENAI: "gpt-4o",
28
32
  LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
29
- LLMProvider.GROQ: "llama3-70b-8192",
30
33
  }
31
34
 
32
35
  # Map providers to their environment variable names
33
36
  ENV_VARS = {
34
37
  LLMProvider.OPENAI: "OPENAI_API_KEY",
35
- LLMProvider.GROQ: "GROQ_API_KEY",
36
38
  LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
37
39
  }
38
40
 
@@ -55,79 +57,61 @@ class ComputerAgent(BaseComputerAgent):
55
57
  only_n_most_recent_images: Optional[int] = None,
56
58
  max_retries: int = 3,
57
59
  verbosity: int = logging.INFO,
60
+ telemetry_enabled: bool = True,
58
61
  **kwargs,
59
62
  ):
60
- """Initialize the computer agent.
63
+ """Initialize a ComputerAgent instance.
61
64
 
62
65
  Args:
63
- computer: Computer instance to control
64
- loop: The type of loop to use (Anthropic or Omni)
65
- model: LLM configuration. Can be:
66
- - LLM object with provider and name
67
- - Dict with 'provider' and 'name' keys
68
- - String with model name (defaults to OpenAI provider)
69
- - None (defaults based on loop)
70
- api_key: Optional API key (will use environment variable if not provided)
71
- save_trajectory: Whether to save screenshots and logs
72
- trajectory_dir: Directory to save trajectories (defaults to "trajectories")
73
- only_n_most_recent_images: Limit history to N most recent images
74
- max_retries: Maximum number of retry attempts for failed operations
75
- verbosity: Logging level (standard Python logging levels: logging.DEBUG, logging.INFO, etc.)
76
- **kwargs: Additional keyword arguments to pass to the loop
66
+ computer: The Computer instance to control
67
+ loop: The agent loop to use: ANTHROPIC or OMNI
68
+ model: The model to use. Can be a string, dict or LLM object.
69
+ Defaults to LLM for the loop type.
70
+ api_key: The API key to use. If None, will use environment variables.
71
+ save_trajectory: Whether to save the trajectory.
72
+ trajectory_dir: The directory to save trajectories to.
73
+ only_n_most_recent_images: Only keep this many most recent images.
74
+ max_retries: Maximum number of retries for failed requests.
75
+ verbosity: Logging level (standard Python logging levels).
76
+ telemetry_enabled: Whether to enable telemetry tracking. Defaults to True.
77
+ **kwargs: Additional keyword arguments to pass to the loop.
77
78
  """
78
- # Set up trajectory directories based on save_trajectory
79
- base_dir = trajectory_dir if save_trajectory else None
80
- # Don't create a redundant screenshots directory - directly use the timestamp folder
81
- screenshot_dir = None # This was previously set to os.path.join(base_dir, "screenshots")
82
- log_dir = None
83
-
84
- super().__init__(
85
- max_retries=max_retries,
86
- computer=computer,
87
- screenshot_dir=screenshot_dir,
88
- log_dir=log_dir,
89
- **kwargs,
90
- )
79
+ super().__init__(computer)
80
+ self._configure_logging(verbosity)
81
+ logger.info(f"Initializing ComputerAgent with {loop} loop")
91
82
 
83
+ # Store telemetry preference
84
+ self.telemetry_enabled = telemetry_enabled
85
+
86
+ # Process the model configuration
87
+ self.model = self._process_model_config(model, loop)
92
88
  self.loop_type = loop
89
+ self.api_key = api_key
90
+
91
+ # Store computer
92
+ self.computer = computer
93
+
94
+ # Save trajectory settings
93
95
  self.save_trajectory = save_trajectory
94
96
  self.trajectory_dir = trajectory_dir
95
97
  self.only_n_most_recent_images = only_n_most_recent_images
96
- self.verbosity = verbosity
97
- self._kwargs = kwargs # Keep this for loop initialization
98
98
 
99
- # Configure logging based on verbosity
100
- self._configure_logging(verbosity)
99
+ # Store the max retries setting
100
+ self.max_retries = max_retries
101
101
 
102
- # Process model configuration
103
- self.model_config = self._process_model_config(model, loop)
102
+ # Initialize message history
103
+ self.messages = []
104
104
 
105
- # Get API key from environment if not provided
106
- if api_key is None:
107
- env_var = (
108
- ENV_VARS.get(self.model_config.provider)
109
- if loop == AgentLoop.OMNI
110
- else "ANTHROPIC_API_KEY"
111
- )
112
- if not env_var:
113
- raise ValueError(
114
- f"Unsupported provider: {self.model_config.provider}. Please use one of: {list(ENV_VARS.keys())}"
115
- )
116
-
117
- api_key = os.environ.get(env_var)
118
- if not api_key:
119
- raise ValueError(
120
- f"No API key provided and {env_var} environment variable is not set.\n"
121
- f"Please set the {env_var} environment variable or pass the api_key directly:\n"
122
- f" - Export in terminal: export {env_var}=your_api_key_here\n"
123
- f" - Add to .env file: {env_var}=your_api_key_here\n"
124
- f" - Pass directly: api_key='your_api_key_here'"
125
- )
126
- self.api_key = api_key
105
+ # Extra kwargs for the loop
106
+ self.loop_kwargs = kwargs
127
107
 
128
- # Initialize the appropriate loop based on loop_type
108
+ # Initialize the actual loop implementation
129
109
  self.loop = self._init_loop()
130
110
 
111
+ # Record initialization in telemetry if enabled
112
+ if telemetry_enabled:
113
+ record_agent_initialization()
114
+
131
115
  def _process_model_config(
132
116
  self, model_input: Optional[Union[LLM, Dict[str, str], str]], loop: AgentLoop
133
117
  ) -> LLM:
@@ -200,7 +184,7 @@ class ComputerAgent(BaseComputerAgent):
200
184
  from ..providers.anthropic.loop import AnthropicLoop
201
185
 
202
186
  # Ensure we always have a valid model name
203
- model_name = self.model_config.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
187
+ model_name = self.model.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
204
188
 
205
189
  return AnthropicLoop(
206
190
  api_key=self.api_key,
@@ -209,119 +193,60 @@ class ComputerAgent(BaseComputerAgent):
209
193
  save_trajectory=self.save_trajectory,
210
194
  base_dir=self.trajectory_dir,
211
195
  only_n_most_recent_images=self.only_n_most_recent_images,
212
- **self._kwargs,
196
+ **self.loop_kwargs,
213
197
  )
214
198
 
215
199
  # Initialize parser for OmniLoop with appropriate device
216
- if "parser" not in self._kwargs:
217
- self._kwargs["parser"] = OmniParser()
200
+ if "parser" not in self.loop_kwargs:
201
+ self.loop_kwargs["parser"] = OmniParser()
218
202
 
219
203
  # Ensure we always have a valid model name
220
- model_name = self.model_config.name or DEFAULT_MODELS[self.model_config.provider]
204
+ model_name = self.model.name or DEFAULT_MODELS[self.model.provider]
221
205
 
222
206
  return OmniLoop(
223
- provider=self.model_config.provider,
207
+ provider=self.model.provider,
224
208
  api_key=self.api_key,
225
209
  model=model_name,
226
210
  computer=self.computer,
227
211
  save_trajectory=self.save_trajectory,
228
212
  base_dir=self.trajectory_dir,
229
213
  only_n_most_recent_images=self.only_n_most_recent_images,
230
- **self._kwargs,
214
+ **self.loop_kwargs,
231
215
  )
232
216
 
233
217
  async def _execute_task(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
234
- """Execute a task using the appropriate loop.
218
+ """Execute a task using the appropriate agent loop.
235
219
 
236
220
  Args:
237
- task: Task description to execute
221
+ task: The task to execute
238
222
 
239
- Yields:
240
- Dict containing response content and metadata
223
+ Returns:
224
+ AsyncGenerator yielding task outputs
241
225
  """
226
+ logger.info(f"Executing task: {task}")
227
+
242
228
  try:
243
- # Format the messages based on loop type
244
- if self.loop_type == AgentLoop.ANTHROPIC:
245
- # Anthropic format
246
- messages = [{"role": "user", "content": [{"type": "text", "text": task}]}]
247
- else:
248
- # Cua format
249
- messages = [{"role": "user", "content": task}]
250
-
251
- # Run the loop
252
- try:
253
- async for result in self.loop.run(messages):
254
- if result is None:
255
- break
256
-
257
- # Handle error case
258
- if "error" in result:
259
- yield {
260
- "role": "assistant",
261
- "content": result["error"],
262
- "metadata": {"title": "❌ Error"},
263
- }
264
- continue
265
-
266
- # Extract content and metadata based on loop type
267
- if self.loop_type == AgentLoop.ANTHROPIC:
268
- # Handle Anthropic format
269
- if "content" in result:
270
- content_text = ""
271
- for content_block in result["content"]:
272
- try:
273
- # Try to access the text attribute directly
274
- content_text += content_block.text
275
- except (AttributeError, TypeError):
276
- # If it's a dictionary instead of an object
277
- if isinstance(content_block, dict) and "text" in content_block:
278
- content_text += content_block["text"]
279
-
280
- yield {
281
- "role": "assistant",
282
- "content": content_text,
283
- "metadata": result.get("parsed_screen", {}),
284
- }
285
- else:
286
- yield {
287
- "role": "assistant",
288
- "content": str(result),
289
- "metadata": {"title": "Screen Analysis"},
290
- }
291
- else:
292
- # Handle Omni format
293
- content = ""
294
- metadata = {"title": "Screen Analysis"}
295
-
296
- # If result has content (normal case)
297
- if "content" in result:
298
- content = result["content"]
299
-
300
- # Ensure metadata has a title
301
- if isinstance(content, dict) and "metadata" in content:
302
- metadata = content["metadata"]
303
- if "title" not in metadata:
304
- metadata["title"] = "Screen Analysis"
305
-
306
- # For string content, convert to proper format
307
- if isinstance(content, str):
308
- content = content
309
- elif isinstance(content, dict) and "content" in content:
310
- content = content.get("content", "")
311
-
312
- yield {"role": "assistant", "content": content, "metadata": metadata}
313
- except Exception as e:
314
- logger.error(f"Error running the loop: {str(e)}")
315
- yield {
316
- "role": "assistant",
317
- "content": f"Error running the agent loop: {str(e)}",
318
- "metadata": {"title": "❌ Loop Error"},
319
- }
229
+ # Create a message from the task
230
+ task_message = {"role": "user", "content": task}
231
+ messages_with_task = self.messages + [task_message]
320
232
 
233
+ # Use the run method of the loop
234
+ async for output in self.loop.run(messages_with_task):
235
+ yield output
236
+ except Exception as e:
237
+ logger.error(f"Error executing task: {e}")
238
+ raise
239
+ finally:
240
+ pass
241
+
242
+ async def _execute_action(self, action_type: str, **action_params) -> Any:
243
+ """Execute an action with telemetry tracking."""
244
+ try:
245
+ # Execute the action
246
+ result = await super()._execute_action(action_type, **action_params)
247
+ return result
321
248
  except Exception as e:
322
- logger.error(f"Error in _execute_task: {str(e)}")
323
- yield {
324
- "role": "assistant",
325
- "content": f"Error: {str(e)}",
326
- "metadata": {"title": "❌ Error"},
327
- }
249
+ logger.exception(f"Error executing action {action_type}: {e}")
250
+ raise
251
+ finally:
252
+ pass
agent/core/base_agent.py CHANGED
@@ -113,7 +113,7 @@ class BaseComputerAgent(ABC):
113
113
  # Take a test screenshot to verify the computer is working
114
114
  logger.info("Testing computer with a screenshot...")
115
115
  try:
116
- test_screenshot = await self.computer.screenshot()
116
+ test_screenshot = await self.computer.interface.screenshot()
117
117
  # Determine the screenshot size based on its type
118
118
  if isinstance(test_screenshot, bytes):
119
119
  size = len(test_screenshot)
agent/core/experiment.py CHANGED
@@ -8,6 +8,7 @@ from datetime import datetime
8
8
  from typing import Any, Dict, List, Optional
9
9
  from PIL import Image
10
10
  import json
11
+ import re
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
@@ -106,9 +107,18 @@ class ExperimentManager:
106
107
  # Increment screenshot counter
107
108
  self.screenshot_count += 1
108
109
 
110
+ # Sanitize action_type to ensure valid filename
111
+ # Replace characters that are not safe for filenames
112
+ sanitized_action = ""
113
+ if action_type:
114
+ # Replace invalid filename characters with underscores
115
+ sanitized_action = re.sub(r'[\\/*?:"<>|]', "_", action_type)
116
+ # Limit the length to avoid excessively long filenames
117
+ sanitized_action = sanitized_action[:50]
118
+
109
119
  # Create a descriptive filename
110
120
  timestamp = int(datetime.now().timestamp() * 1000)
111
- action_suffix = f"_{action_type}" if action_type else ""
121
+ action_suffix = f"_{sanitized_action}" if sanitized_action else ""
112
122
  filename = f"screenshot_{self.screenshot_count:03d}{action_suffix}_{timestamp}.png"
113
123
 
114
124
  # Save directly to the turn directory
agent/core/loop.py CHANGED
@@ -166,7 +166,7 @@ class BaseLoop(ABC):
166
166
  """
167
167
  try:
168
168
  # Take screenshot
169
- screenshot = await self.computer.screenshot()
169
+ screenshot = await self.computer.interface.screenshot()
170
170
 
171
171
  # Initialize with default values
172
172
  width, height = 1024, 768
@@ -0,0 +1,130 @@
1
+ """Agent telemetry for tracking anonymous usage and feature usage."""
2
+
3
+ import logging
4
+ import os
5
+ import platform
6
+ import sys
7
+ from typing import Dict, Any
8
+
9
+ # Import the core telemetry module
10
+ TELEMETRY_AVAILABLE = False
11
+
12
+ try:
13
+ from core.telemetry import (
14
+ record_event,
15
+ increment,
16
+ get_telemetry_client,
17
+ flush,
18
+ is_telemetry_enabled,
19
+ is_telemetry_globally_disabled,
20
+ )
21
+
22
+ def increment_counter(counter_name: str, value: int = 1) -> None:
23
+ """Wrapper for increment to maintain backward compatibility."""
24
+ if is_telemetry_enabled():
25
+ increment(counter_name, value)
26
+
27
+ def set_dimension(name: str, value: Any) -> None:
28
+ """Set a dimension that will be attached to all events."""
29
+ logger = logging.getLogger("cua.agent.telemetry")
30
+ logger.debug(f"Setting dimension {name}={value}")
31
+
32
+ TELEMETRY_AVAILABLE = True
33
+ logger = logging.getLogger("cua.agent.telemetry")
34
+ logger.info("Successfully imported telemetry")
35
+ except ImportError as e:
36
+ logger = logging.getLogger("cua.agent.telemetry")
37
+ logger.warning(f"Could not import telemetry: {e}")
38
+ TELEMETRY_AVAILABLE = False
39
+
40
+
41
+ # Local fallbacks in case core telemetry isn't available
42
+ def _noop(*args: Any, **kwargs: Any) -> None:
43
+ """No-op function for when telemetry is not available."""
44
+ pass
45
+
46
+
47
+ logger = logging.getLogger("cua.agent.telemetry")
48
+
49
+ # If telemetry isn't available, use no-op functions
50
+ if not TELEMETRY_AVAILABLE:
51
+ logger.debug("Telemetry not available, using no-op functions")
52
+ record_event = _noop # type: ignore
53
+ increment_counter = _noop # type: ignore
54
+ set_dimension = _noop # type: ignore
55
+ get_telemetry_client = lambda: None # type: ignore
56
+ flush = _noop # type: ignore
57
+ is_telemetry_enabled = lambda: False # type: ignore
58
+ is_telemetry_globally_disabled = lambda: True # type: ignore
59
+
60
+ # Get system info once to use in telemetry
61
+ SYSTEM_INFO = {
62
+ "os": platform.system().lower(),
63
+ "os_version": platform.release(),
64
+ "python_version": platform.python_version(),
65
+ }
66
+
67
+
68
+ def enable_telemetry() -> bool:
69
+ """Enable telemetry if available.
70
+
71
+ Returns:
72
+ bool: True if telemetry was successfully enabled, False otherwise
73
+ """
74
+ global TELEMETRY_AVAILABLE
75
+
76
+ # Check if globally disabled using core function
77
+ if TELEMETRY_AVAILABLE and is_telemetry_globally_disabled():
78
+ logger.info("Telemetry is globally disabled via environment variable - cannot enable")
79
+ return False
80
+
81
+ # Already enabled
82
+ if TELEMETRY_AVAILABLE:
83
+ return True
84
+
85
+ # Try to import and enable
86
+ try:
87
+ from core.telemetry import (
88
+ record_event,
89
+ increment,
90
+ get_telemetry_client,
91
+ flush,
92
+ is_telemetry_globally_disabled,
93
+ )
94
+
95
+ # Check again after import
96
+ if is_telemetry_globally_disabled():
97
+ logger.info("Telemetry is globally disabled via environment variable - cannot enable")
98
+ return False
99
+
100
+ TELEMETRY_AVAILABLE = True
101
+ logger.info("Telemetry successfully enabled")
102
+ return True
103
+ except ImportError as e:
104
+ logger.warning(f"Could not enable telemetry: {e}")
105
+ return False
106
+
107
+
108
+ def is_telemetry_enabled() -> bool:
109
+ """Check if telemetry is enabled.
110
+
111
+ Returns:
112
+ bool: True if telemetry is enabled, False otherwise
113
+ """
114
+ # Use the core function if available, otherwise use our local flag
115
+ if TELEMETRY_AVAILABLE:
116
+ from core.telemetry import is_telemetry_enabled as core_is_enabled
117
+
118
+ return core_is_enabled()
119
+ return False
120
+
121
+
122
+ def record_agent_initialization() -> None:
123
+ """Record when an agent instance is initialized."""
124
+ if TELEMETRY_AVAILABLE and is_telemetry_enabled():
125
+ record_event("agent_initialized", SYSTEM_INFO)
126
+
127
+ # Set dimensions that will be attached to all events
128
+ set_dimension("os", SYSTEM_INFO["os"])
129
+ set_dimension("os_version", SYSTEM_INFO["os_version"])
130
+ set_dimension("python_version", SYSTEM_INFO["python_version"])
@@ -731,7 +731,7 @@ class OmniLoop(BaseLoop):
731
731
  action_type = f"hotkey_{content['Value'].replace('+', '_')}"
732
732
  logger.info(f"Preparing hotkey with keys: {keys}")
733
733
  # Get the method but call it with *args instead of **kwargs
734
- method = getattr(self.computer, action)
734
+ method = getattr(self.computer.interface, action)
735
735
  await method(*keys) # Unpack the keys list as positional arguments
736
736
  logger.info(f"Tool execution completed successfully: {action}")
737
737
 
@@ -776,7 +776,7 @@ class OmniLoop(BaseLoop):
776
776
 
777
777
  # Execute tool and handle result
778
778
  try:
779
- method = getattr(self.computer, action)
779
+ method = getattr(self.computer.interface, action)
780
780
  logger.info(f"Found method for action '{action}': {method}")
781
781
  await method(**kwargs)
782
782
  logger.info(f"Tool execution completed successfully: {action}")
@@ -79,7 +79,7 @@ class OmniParser:
79
79
  try:
80
80
  # Get screenshot from computer
81
81
  logger.info("Taking screenshot...")
82
- screenshot = await computer.screenshot()
82
+ screenshot = await computer.interface.screenshot()
83
83
 
84
84
  # Log screenshot info
85
85
  logger.info(f"Screenshot type: {type(screenshot)}")
@@ -10,8 +10,6 @@ class LLMProvider(StrEnum):
10
10
 
11
11
  ANTHROPIC = "anthropic"
12
12
  OPENAI = "openai"
13
- GROQ = "groq"
14
- QWEN = "qwen"
15
13
 
16
14
 
17
15
  LLMProvider
@@ -39,14 +37,10 @@ Model = LLM
39
37
  PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
40
38
  LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
41
39
  LLMProvider.OPENAI: "gpt-4o",
42
- LLMProvider.GROQ: "deepseek-r1-distill-llama-70b",
43
- LLMProvider.QWEN: "qwen2.5-vl-72b-instruct",
44
40
  }
45
41
 
46
42
  # Environment variable names for each provider
47
43
  PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
48
44
  LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
49
45
  LLMProvider.OPENAI: "OPENAI_API_KEY",
50
- LLMProvider.GROQ: "GROQ_API_KEY",
51
- LLMProvider.QWEN: "QWEN_API_KEY",
52
46
  }
agent/telemetry.py ADDED
@@ -0,0 +1,21 @@
1
+ """Telemetry support for Agent class."""
2
+
3
+ import os
4
+ import platform
5
+ import sys
6
+ import time
7
+ from typing import Any, Dict, Optional
8
+
9
+ from core.telemetry import (
10
+ record_event,
11
+ is_telemetry_enabled,
12
+ flush,
13
+ get_telemetry_client,
14
+ increment,
15
+ )
16
+
17
+ # System information used for telemetry
18
+ SYSTEM_INFO = {
19
+ "os": sys.platform,
20
+ "python_version": platform.python_version(),
21
+ }
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-agent
3
+ Version: 0.1.4
4
+ Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: <3.13,>=3.10
7
+ Requires-Dist: httpx<0.29.0,>=0.27.0
8
+ Requires-Dist: aiohttp<4.0.0,>=3.9.3
9
+ Requires-Dist: asyncio
10
+ Requires-Dist: anyio<5.0.0,>=4.4.1
11
+ Requires-Dist: typing-extensions<5.0.0,>=4.12.2
12
+ Requires-Dist: pydantic<3.0.0,>=2.6.4
13
+ Requires-Dist: rich<14.0.0,>=13.7.1
14
+ Requires-Dist: python-dotenv<2.0.0,>=1.0.1
15
+ Requires-Dist: cua-computer<0.2.0,>=0.1.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.0
17
+ Requires-Dist: certifi>=2024.2.2
18
+ Provides-Extra: anthropic
19
+ Requires-Dist: anthropic>=0.49.0; extra == "anthropic"
20
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
21
+ Provides-Extra: som
22
+ Requires-Dist: torch>=2.2.1; extra == "som"
23
+ Requires-Dist: torchvision>=0.17.1; extra == "som"
24
+ Requires-Dist: ultralytics>=8.0.0; extra == "som"
25
+ Requires-Dist: transformers>=4.38.2; extra == "som"
26
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "som"
27
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "som"
28
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "som"
29
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "som"
30
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "som"
31
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "som"
32
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "som"
33
+ Provides-Extra: all
34
+ Requires-Dist: torch>=2.2.1; extra == "all"
35
+ Requires-Dist: torchvision>=0.17.1; extra == "all"
36
+ Requires-Dist: ultralytics>=8.0.0; extra == "all"
37
+ Requires-Dist: transformers>=4.38.2; extra == "all"
38
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
39
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "all"
40
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "all"
41
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "all"
42
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
43
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
44
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
45
+ Description-Content-Type: text/markdown
46
+
47
+ <div align="center">
48
+ <h1>
49
+ <div class="image-wrapper" style="display: inline-block;">
50
+ <picture>
51
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
52
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
53
+ <img alt="Shows my svg">
54
+ </picture>
55
+ </div>
56
+
57
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
58
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
59
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
60
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
61
+ </h1>
62
+ </div>
63
+
64
+ **Agent** is a Computer Use (CUA) framework for running multi-app agentic workflows targeting macOS and Linux sandbox, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen). The framework integrates with Microsoft's OmniParser for enhanced UI understanding and interaction.
65
+
66
+ ### Get started with Agent
67
+
68
+ ```python
69
+ from agent import ComputerAgent, AgentLoop, LLMProvider
70
+ from computer import Computer
71
+
72
+ computer = Computer(verbosity=logging.INFO)
73
+
74
+ agent = ComputerAgent(
75
+ computer=computer,
76
+ loop=AgentLoop.ANTHROPIC,
77
+ # loop=AgentLoop.OMNI,
78
+ model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
79
+ # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4.5-preview"),
80
+ save_trajectory=True,
81
+ trajectory_dir=str(Path("trajectories")),
82
+ only_n_most_recent_images=3,
83
+ verbosity=logging.INFO,
84
+ )
85
+
86
+ tasks = [
87
+ """
88
+ Please help me with the following task:
89
+ 1. Open Safari browser
90
+ 2. Go to Wikipedia.org
91
+ 3. Search for "Claude AI"
92
+ 4. Summarize the main points you find about Claude AI
93
+ """
94
+ ]
95
+
96
+ async with agent:
97
+ for i, task in enumerate(tasks, 1):
98
+ print(f"\nExecuting task {i}/{len(tasks)}: {task}")
99
+ async for result in agent.run(task):
100
+ print(result)
101
+ print(f"Task {i} completed")
102
+ ```
103
+
104
+ ## Install
105
+
106
+ ### cua-agent
107
+
108
+ ```bash
109
+ pip install "cua-agent[all]"
110
+
111
+ # or install specific loop providers
112
+ pip install "cua-agent[anthropic]"
113
+ pip install "cua-agent[omni]"
114
+ ```
115
+
116
+ ## Run
117
+
118
+ Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
119
+
120
+ - [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
@@ -1,15 +1,16 @@
1
1
  agent/README.md,sha256=8EFnLrKejthEcL9bZflQSbvA-KwpiPanBz8TEEwRub8,2153
2
- agent/__init__.py,sha256=BRIunVPG0T5CdAiNJyElKxUZN8Mngg2_TmtLwaupG4I,355
2
+ agent/__init__.py,sha256=sxUp_I8cI4NVa2DY8g-tsJe5_XJJe5VdeS_VLgA98EM,1569
3
3
  agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
4
4
  agent/core/__init__.py,sha256=0htZ-VfsH9ixHB8j_SXu_uv6r3XXsq5TrghFNd-yRNE,709
5
- agent/core/agent.py,sha256=AQ-S2wVD82RFnD_HmR-zjA7Jj09CUKGp7KreWX1j6Fg,13495
6
- agent/core/base_agent.py,sha256=MgaMKTwgqNJ1-TgS_mxALoC9COzc7Acg9y7Q8HAFX2c,6266
5
+ agent/core/agent.py,sha256=A07a7mRtKqpX2AHCP1i8KesOqoOETfh23CyTTQth6vI,9327
6
+ agent/core/base_agent.py,sha256=te9rk2tJZpEhDUEB1xSaFqe1zeOjmzMdHF5LaUDP2K0,6276
7
7
  agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
8
8
  agent/core/computer_agent.py,sha256=JGLMl_PwImUttmQh2amdLlXHS9CUyZ9MW20J1Xid7dM,2417
9
- agent/core/experiment.py,sha256=AST1t83eqaGzjoW6KvrhfVIs3ELAR_I70VHq2NsMmNk,7446
9
+ agent/core/experiment.py,sha256=FKmSDyA2YFSrO3q-91ZT29Jm1lm24YCuK59wQ6z-6IM,7930
10
10
  agent/core/factory.py,sha256=WraOEHWPXBSN4R3DO7M2ctyadodeA8tzHM3dUjdQ_3A,3441
11
- agent/core/loop.py,sha256=E-0pz7MaguZQrHs5GP98Oc8C_Iz8ier0vXrD9Ny2HL8,8999
11
+ agent/core/loop.py,sha256=vhdlSy_hIY3-a92uTGdF3oYE5Qcq0U2hyTJNmXunnfc,9009
12
12
  agent/core/messages.py,sha256=N8pV8Eh-AJpMuDPRI5OGWUIOU6DRr-pQjK9XU0go9Hk,7637
13
+ agent/core/telemetry.py,sha256=cCnITdDxOSHM0qKV7Fe5sV2gD6B_InRxMVFm-EgKF7M,4083
13
14
  agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
14
15
  agent/core/tools/base.py,sha256=CdzRFNuOjNfzgyTUN4ZoCGkUDR5HI0ECQVpvrUdEij8,2295
15
16
  agent/core/tools/bash.py,sha256=jnJKVlHn8np8e0gWd8EO0_qqjMkfQzutSugA_Iol4jE,1585
@@ -43,23 +44,24 @@ agent/providers/omni/clients/openai.py,sha256=E4TAXMUFoYTunJETCWCNx5XAc6xutiN4rB
43
44
  agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
44
45
  agent/providers/omni/experiment.py,sha256=JGAdHi7Nf73I48c9k3TY1Xpr_i6D2VG1wurOzw5cNGk,9888
45
46
  agent/providers/omni/image_utils.py,sha256=qIFuNi5cIMVwrqYBXG1T6PxUlbxz7gIngFFP39bZIlU,2782
46
- agent/providers/omni/loop.py,sha256=mHCs13in3mrLizF1x8OeCXECp4bL9-CYS_XOJOUZqu8,43827
47
+ agent/providers/omni/loop.py,sha256=72o7q92nO7i0EUrVhEPCEHprRKdBYsg5iLTLfLHXAsw,43847
47
48
  agent/providers/omni/messages.py,sha256=zdjQCAMH-hOyrQQesHhTiIsQbw43KqVSmVIzS8JOIFA,6134
48
- agent/providers/omni/parser.py,sha256=Iv-cXWG2qzdYjyZJH5pGUzfv6nOaiHQ2OXdQSe00Ydw,9151
49
+ agent/providers/omni/parser.py,sha256=lTAoSMSf2zpwqR_8W0SXG3cYIFeUiZa5vXdpjqZwEHY,9161
49
50
  agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
50
51
  agent/providers/omni/tool_manager.py,sha256=O6DxyEI-Vg6jt99phh011o4q4me_vNhH2YffIxkO4GM,2585
51
52
  agent/providers/omni/tools/__init__.py,sha256=l636hx9Q5z9eaFdPanPwPENUE-w-Xm8kAZhPUq0ZQF4,309
52
53
  agent/providers/omni/tools/bash.py,sha256=y_ibfP9iRcbiU_E0faAoa4DCP_BlkMlKOOURdBBIGZE,2030
53
54
  agent/providers/omni/tools/computer.py,sha256=xkMmAR0e_kbf0Zs2mggCDyWrQOJZyXOKPFjkutaQb94,9108
54
55
  agent/providers/omni/tools/manager.py,sha256=V_tav2yU92PyQnFlxNXG1wvNEaJoEYudtKx5sRjj06Q,2619
55
- agent/providers/omni/types.py,sha256=G7Zqm-nWMa3K2klj-D3KUVWc2r8NJB7sYZCwwl0m9Ic,1233
56
+ agent/providers/omni/types.py,sha256=rpr7-mH9VK1R-nJ6tVu1gKp427j-hw1DpHc197b44nU,1017
56
57
  agent/providers/omni/utils.py,sha256=JqSye1bEp4wxhUgmaMyZi172fTlgXtygJ7XlnvKdUtE,6337
57
58
  agent/providers/omni/visualization.py,sha256=N3qVQLxYmia3iSVC5oCt5YRlMPuVfylCOyB99R33u8U,3924
59
+ agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
58
60
  agent/types/__init__.py,sha256=61UFJT-w0CT4YRn0LiTx4A7fsMdVQjlXO9vnmbI1A7Y,604
59
61
  agent/types/base.py,sha256=Iy_Q2DIBMLtwWdLyfvHw_6E2ltYu3bIv8GUNy3LYkGs,1133
60
62
  agent/types/messages.py,sha256=4-hwtxeAhto90_EZpHFducddtsHUsHauvXzYrpKG4RE,953
61
63
  agent/types/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
62
- cua_agent-0.1.2.dist-info/METADATA,sha256=bXSToJpS_e5KRzyRELUzCuOkozsDUD29pBMj3DKzF7U,1890
63
- cua_agent-0.1.2.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
64
- cua_agent-0.1.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
65
- cua_agent-0.1.2.dist-info/RECORD,,
64
+ cua_agent-0.1.4.dist-info/METADATA,sha256=9AW-hOrH5hmJ3Al9sFgoYV-GUCdhfoRzYYqYAMw2bpA,4528
65
+ cua_agent-0.1.4.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
66
+ cua_agent-0.1.4.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
67
+ cua_agent-0.1.4.dist-info/RECORD,,
@@ -1,44 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: cua-agent
3
- Version: 0.1.2
4
- Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
- Author-Email: TryCua <gh@trycua.com>
6
- Requires-Python: <3.13,>=3.10
7
- Requires-Dist: httpx<0.29.0,>=0.27.0
8
- Requires-Dist: aiohttp<4.0.0,>=3.9.3
9
- Requires-Dist: asyncio
10
- Requires-Dist: anyio<5.0.0,>=4.4.1
11
- Requires-Dist: typing-extensions<5.0.0,>=4.12.2
12
- Requires-Dist: pydantic<3.0.0,>=2.6.4
13
- Requires-Dist: rich<14.0.0,>=13.7.1
14
- Requires-Dist: python-dotenv<2.0.0,>=1.0.1
15
- Requires-Dist: cua-computer<0.2.0,>=0.1.0
16
- Requires-Dist: certifi>=2024.2.2
17
- Provides-Extra: anthropic
18
- Requires-Dist: anthropic>=0.49.0; extra == "anthropic"
19
- Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
20
- Provides-Extra: som
21
- Requires-Dist: torch>=2.2.1; extra == "som"
22
- Requires-Dist: torchvision>=0.17.1; extra == "som"
23
- Requires-Dist: ultralytics>=8.0.0; extra == "som"
24
- Requires-Dist: transformers>=4.38.2; extra == "som"
25
- Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "som"
26
- Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "som"
27
- Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "som"
28
- Requires-Dist: openai<2.0.0,>=1.14.0; extra == "som"
29
- Requires-Dist: groq<0.5.0,>=0.4.0; extra == "som"
30
- Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "som"
31
- Requires-Dist: requests<3.0.0,>=2.31.0; extra == "som"
32
- Provides-Extra: all
33
- Requires-Dist: torch>=2.2.1; extra == "all"
34
- Requires-Dist: torchvision>=0.17.1; extra == "all"
35
- Requires-Dist: ultralytics>=8.0.0; extra == "all"
36
- Requires-Dist: transformers>=4.38.2; extra == "all"
37
- Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
38
- Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "all"
39
- Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "all"
40
- Requires-Dist: openai<2.0.0,>=1.14.0; extra == "all"
41
- Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
42
- Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
43
- Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
44
-