cua-agent 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show
  1. agent/__init__.py +21 -12
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +594 -0
  6. agent/callbacks/__init__.py +19 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/telemetry.py +210 -0
  13. agent/callbacks/trajectory_saver.py +305 -0
  14. agent/cli.py +297 -0
  15. agent/computer_handler.py +107 -0
  16. agent/decorators.py +90 -0
  17. agent/loops/__init__.py +11 -0
  18. agent/loops/anthropic.py +728 -0
  19. agent/loops/omniparser.py +339 -0
  20. agent/loops/openai.py +95 -0
  21. agent/loops/uitars.py +688 -0
  22. agent/responses.py +207 -0
  23. agent/telemetry.py +135 -14
  24. agent/types.py +79 -0
  25. agent/ui/__init__.py +7 -1
  26. agent/ui/__main__.py +2 -13
  27. agent/ui/gradio/__init__.py +6 -19
  28. agent/ui/gradio/app.py +94 -1313
  29. agent/ui/gradio/ui_components.py +721 -0
  30. cua_agent-0.4.0.dist-info/METADATA +424 -0
  31. cua_agent-0.4.0.dist-info/RECORD +33 -0
  32. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +1 -1
  33. agent/core/__init__.py +0 -27
  34. agent/core/agent.py +0 -210
  35. agent/core/base.py +0 -217
  36. agent/core/callbacks.py +0 -200
  37. agent/core/experiment.py +0 -249
  38. agent/core/factory.py +0 -122
  39. agent/core/messages.py +0 -332
  40. agent/core/provider_config.py +0 -21
  41. agent/core/telemetry.py +0 -142
  42. agent/core/tools/__init__.py +0 -21
  43. agent/core/tools/base.py +0 -74
  44. agent/core/tools/bash.py +0 -52
  45. agent/core/tools/collection.py +0 -46
  46. agent/core/tools/computer.py +0 -113
  47. agent/core/tools/edit.py +0 -67
  48. agent/core/tools/manager.py +0 -56
  49. agent/core/tools.py +0 -32
  50. agent/core/types.py +0 -88
  51. agent/core/visualization.py +0 -197
  52. agent/providers/__init__.py +0 -4
  53. agent/providers/anthropic/__init__.py +0 -6
  54. agent/providers/anthropic/api/client.py +0 -360
  55. agent/providers/anthropic/api/logging.py +0 -150
  56. agent/providers/anthropic/api_handler.py +0 -140
  57. agent/providers/anthropic/callbacks/__init__.py +0 -5
  58. agent/providers/anthropic/callbacks/manager.py +0 -65
  59. agent/providers/anthropic/loop.py +0 -568
  60. agent/providers/anthropic/prompts.py +0 -23
  61. agent/providers/anthropic/response_handler.py +0 -226
  62. agent/providers/anthropic/tools/__init__.py +0 -33
  63. agent/providers/anthropic/tools/base.py +0 -88
  64. agent/providers/anthropic/tools/bash.py +0 -66
  65. agent/providers/anthropic/tools/collection.py +0 -34
  66. agent/providers/anthropic/tools/computer.py +0 -396
  67. agent/providers/anthropic/tools/edit.py +0 -326
  68. agent/providers/anthropic/tools/manager.py +0 -54
  69. agent/providers/anthropic/tools/run.py +0 -42
  70. agent/providers/anthropic/types.py +0 -16
  71. agent/providers/anthropic/utils.py +0 -367
  72. agent/providers/omni/__init__.py +0 -8
  73. agent/providers/omni/api_handler.py +0 -42
  74. agent/providers/omni/clients/anthropic.py +0 -103
  75. agent/providers/omni/clients/base.py +0 -35
  76. agent/providers/omni/clients/oaicompat.py +0 -195
  77. agent/providers/omni/clients/ollama.py +0 -122
  78. agent/providers/omni/clients/openai.py +0 -155
  79. agent/providers/omni/clients/utils.py +0 -25
  80. agent/providers/omni/image_utils.py +0 -34
  81. agent/providers/omni/loop.py +0 -990
  82. agent/providers/omni/parser.py +0 -307
  83. agent/providers/omni/prompts.py +0 -64
  84. agent/providers/omni/tools/__init__.py +0 -30
  85. agent/providers/omni/tools/base.py +0 -29
  86. agent/providers/omni/tools/bash.py +0 -74
  87. agent/providers/omni/tools/computer.py +0 -179
  88. agent/providers/omni/tools/manager.py +0 -61
  89. agent/providers/omni/utils.py +0 -236
  90. agent/providers/openai/__init__.py +0 -6
  91. agent/providers/openai/api_handler.py +0 -456
  92. agent/providers/openai/loop.py +0 -472
  93. agent/providers/openai/response_handler.py +0 -205
  94. agent/providers/openai/tools/__init__.py +0 -15
  95. agent/providers/openai/tools/base.py +0 -79
  96. agent/providers/openai/tools/computer.py +0 -326
  97. agent/providers/openai/tools/manager.py +0 -106
  98. agent/providers/openai/types.py +0 -36
  99. agent/providers/openai/utils.py +0 -98
  100. agent/providers/uitars/__init__.py +0 -1
  101. agent/providers/uitars/clients/base.py +0 -35
  102. agent/providers/uitars/clients/mlxvlm.py +0 -263
  103. agent/providers/uitars/clients/oaicompat.py +0 -214
  104. agent/providers/uitars/loop.py +0 -660
  105. agent/providers/uitars/prompts.py +0 -63
  106. agent/providers/uitars/tools/__init__.py +0 -1
  107. agent/providers/uitars/tools/computer.py +0 -283
  108. agent/providers/uitars/tools/manager.py +0 -60
  109. agent/providers/uitars/utils.py +0 -264
  110. cua_agent-0.3.1.dist-info/METADATA +0 -295
  111. cua_agent-0.3.1.dist-info/RECORD +0 -87
  112. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,259 @@
1
+ """
2
+ PII anonymization callback handler using Microsoft Presidio for text and image redaction.
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional, Tuple
6
+ from .base import AsyncCallbackHandler
7
+ import base64
8
+ import io
9
+ import logging
10
+
11
+ try:
12
+ from presidio_analyzer import AnalyzerEngine
13
+ from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine
14
+ from presidio_anonymizer.entities import RecognizerResult, OperatorConfig
15
+ from presidio_image_redactor import ImageRedactorEngine
16
+ from PIL import Image
17
+ PRESIDIO_AVAILABLE = True
18
+ except ImportError:
19
+ PRESIDIO_AVAILABLE = False
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class PIIAnonymizationCallback(AsyncCallbackHandler):
24
+ """
25
+ Callback handler that anonymizes PII in text and images using Microsoft Presidio.
26
+
27
+ This handler:
28
+ 1. Anonymizes PII in messages before sending to the agent loop
29
+ 2. Deanonymizes PII in tool calls and message outputs after the agent loop
30
+ 3. Redacts PII from images in computer_call_output messages
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ anonymize_text: bool = True,
36
+ anonymize_images: bool = True,
37
+ entities_to_anonymize: Optional[List[str]] = None,
38
+ anonymization_operator: str = "replace",
39
+ image_redaction_color: Tuple[int, int, int] = (255, 192, 203) # Pink
40
+ ):
41
+ """
42
+ Initialize the PII anonymization callback.
43
+
44
+ Args:
45
+ anonymize_text: Whether to anonymize text content
46
+ anonymize_images: Whether to redact images
47
+ entities_to_anonymize: List of entity types to anonymize (None for all)
48
+ anonymization_operator: Presidio operator to use ("replace", "mask", "redact", etc.)
49
+ image_redaction_color: RGB color for image redaction
50
+ """
51
+ if not PRESIDIO_AVAILABLE:
52
+ raise ImportError(
53
+ "Presidio is not available. Install with: "
54
+ "pip install presidio-analyzer presidio-anonymizer presidio-image-redactor"
55
+ )
56
+
57
+ self.anonymize_text = anonymize_text
58
+ self.anonymize_images = anonymize_images
59
+ self.entities_to_anonymize = entities_to_anonymize
60
+ self.anonymization_operator = anonymization_operator
61
+ self.image_redaction_color = image_redaction_color
62
+
63
+ # Initialize Presidio engines
64
+ self.analyzer = AnalyzerEngine()
65
+ self.anonymizer = AnonymizerEngine()
66
+ self.deanonymizer = DeanonymizeEngine()
67
+ self.image_redactor = ImageRedactorEngine()
68
+
69
+ # Store anonymization mappings for deanonymization
70
+ self.anonymization_mappings: Dict[str, Any] = {}
71
+
72
+ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
73
+ """
74
+ Anonymize PII in messages before sending to agent loop.
75
+
76
+ Args:
77
+ messages: List of message dictionaries
78
+
79
+ Returns:
80
+ List of messages with PII anonymized
81
+ """
82
+ if not self.anonymize_text and not self.anonymize_images:
83
+ return messages
84
+
85
+ anonymized_messages = []
86
+ for msg in messages:
87
+ anonymized_msg = await self._anonymize_message(msg)
88
+ anonymized_messages.append(anonymized_msg)
89
+
90
+ return anonymized_messages
91
+
92
+ async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
93
+ """
94
+ Deanonymize PII in tool calls and message outputs after agent loop.
95
+
96
+ Args:
97
+ output: List of output dictionaries
98
+
99
+ Returns:
100
+ List of output with PII deanonymized for tool calls
101
+ """
102
+ if not self.anonymize_text:
103
+ return output
104
+
105
+ deanonymized_output = []
106
+ for item in output:
107
+ # Only deanonymize tool calls and computer_call messages
108
+ if item.get("type") in ["computer_call", "computer_call_output"]:
109
+ deanonymized_item = await self._deanonymize_item(item)
110
+ deanonymized_output.append(deanonymized_item)
111
+ else:
112
+ deanonymized_output.append(item)
113
+
114
+ return deanonymized_output
115
+
116
+ async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
117
+ """Anonymize PII in a single message."""
118
+ msg_copy = message.copy()
119
+
120
+ # Anonymize text content
121
+ if self.anonymize_text:
122
+ msg_copy = await self._anonymize_text_content(msg_copy)
123
+
124
+ # Redact images in computer_call_output
125
+ if self.anonymize_images and msg_copy.get("type") == "computer_call_output":
126
+ msg_copy = await self._redact_image_content(msg_copy)
127
+
128
+ return msg_copy
129
+
130
+ async def _anonymize_text_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
131
+ """Anonymize text content in a message."""
132
+ msg_copy = message.copy()
133
+
134
+ # Handle content array
135
+ content = msg_copy.get("content", [])
136
+ if isinstance(content, str):
137
+ anonymized_text, _ = await self._anonymize_text(content)
138
+ msg_copy["content"] = anonymized_text
139
+ elif isinstance(content, list):
140
+ anonymized_content = []
141
+ for item in content:
142
+ if isinstance(item, dict) and item.get("type") == "text":
143
+ text = item.get("text", "")
144
+ anonymized_text, _ = await self._anonymize_text(text)
145
+ item_copy = item.copy()
146
+ item_copy["text"] = anonymized_text
147
+ anonymized_content.append(item_copy)
148
+ else:
149
+ anonymized_content.append(item)
150
+ msg_copy["content"] = anonymized_content
151
+
152
+ return msg_copy
153
+
154
+ async def _redact_image_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
155
+ """Redact PII from images in computer_call_output messages."""
156
+ msg_copy = message.copy()
157
+ output = msg_copy.get("output", {})
158
+
159
+ if isinstance(output, dict) and "image_url" in output:
160
+ try:
161
+ # Extract base64 image data
162
+ image_url = output["image_url"]
163
+ if image_url.startswith("data:image/"):
164
+ # Parse data URL
165
+ header, data = image_url.split(",", 1)
166
+ image_data = base64.b64decode(data)
167
+
168
+ # Load image with PIL
169
+ image = Image.open(io.BytesIO(image_data))
170
+
171
+ # Redact PII from image
172
+ redacted_image = self.image_redactor.redact(image, self.image_redaction_color)
173
+
174
+ # Convert back to base64
175
+ buffer = io.BytesIO()
176
+ redacted_image.save(buffer, format="PNG")
177
+ redacted_data = base64.b64encode(buffer.getvalue()).decode()
178
+
179
+ # Update image URL
180
+ output_copy = output.copy()
181
+ output_copy["image_url"] = f"data:image/png;base64,{redacted_data}"
182
+ msg_copy["output"] = output_copy
183
+
184
+ except Exception as e:
185
+ logger.warning(f"Failed to redact image: {e}")
186
+
187
+ return msg_copy
188
+
189
+ async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
190
+ """Deanonymize PII in tool calls and computer outputs."""
191
+ item_copy = item.copy()
192
+
193
+ # Handle computer_call arguments
194
+ if item.get("type") == "computer_call":
195
+ args = item_copy.get("args", {})
196
+ if isinstance(args, dict):
197
+ deanonymized_args = {}
198
+ for key, value in args.items():
199
+ if isinstance(value, str):
200
+ deanonymized_value, _ = await self._deanonymize_text(value)
201
+ deanonymized_args[key] = deanonymized_value
202
+ else:
203
+ deanonymized_args[key] = value
204
+ item_copy["args"] = deanonymized_args
205
+
206
+ return item_copy
207
+
208
+ async def _anonymize_text(self, text: str) -> Tuple[str, List[RecognizerResult]]:
209
+ """Anonymize PII in text and return the anonymized text and results."""
210
+ if not text.strip():
211
+ return text, []
212
+
213
+ try:
214
+ # Analyze text for PII
215
+ analyzer_results = self.analyzer.analyze(
216
+ text=text,
217
+ entities=self.entities_to_anonymize,
218
+ language="en"
219
+ )
220
+
221
+ if not analyzer_results:
222
+ return text, []
223
+
224
+ # Anonymize the text
225
+ anonymized_result = self.anonymizer.anonymize(
226
+ text=text,
227
+ analyzer_results=analyzer_results,
228
+ operators={entity_type: OperatorConfig(self.anonymization_operator)
229
+ for entity_type in set(result.entity_type for result in analyzer_results)}
230
+ )
231
+
232
+ # Store mapping for deanonymization
233
+ mapping_key = str(hash(text))
234
+ self.anonymization_mappings[mapping_key] = {
235
+ "original": text,
236
+ "anonymized": anonymized_result.text,
237
+ "results": analyzer_results
238
+ }
239
+
240
+ return anonymized_result.text, analyzer_results
241
+
242
+ except Exception as e:
243
+ logger.warning(f"Failed to anonymize text: {e}")
244
+ return text, []
245
+
246
+ async def _deanonymize_text(self, text: str) -> Tuple[str, bool]:
247
+ """Attempt to deanonymize text using stored mappings."""
248
+ try:
249
+ # Look for matching anonymized text in mappings
250
+ for mapping_key, mapping in self.anonymization_mappings.items():
251
+ if mapping["anonymized"] == text:
252
+ return mapping["original"], True
253
+
254
+ # If no mapping found, return original text
255
+ return text, False
256
+
257
+ except Exception as e:
258
+ logger.warning(f"Failed to deanonymize text: {e}")
259
+ return text, False
@@ -0,0 +1,210 @@
1
+ """
2
+ Telemetry callback handler for Computer-Use Agent (cua-agent)
3
+ """
4
+
5
+ import time
6
+ import uuid
7
+ from typing import List, Dict, Any, Optional, Union
8
+
9
+ from .base import AsyncCallbackHandler
10
+ from ..telemetry import (
11
+ record_event,
12
+ is_telemetry_enabled,
13
+ set_dimension,
14
+ SYSTEM_INFO,
15
+ )
16
+
17
+
18
+ class TelemetryCallback(AsyncCallbackHandler):
19
+ """
20
+ Telemetry callback handler for Computer-Use Agent (cua-agent)
21
+
22
+ Tracks agent usage, performance metrics, and optionally trajectory data.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ agent,
28
+ log_trajectory: bool = False
29
+ ):
30
+ """
31
+ Initialize telemetry callback.
32
+
33
+ Args:
34
+ agent: The ComputerAgent instance
35
+ log_trajectory: Whether to log full trajectory items (opt-in)
36
+ """
37
+ self.agent = agent
38
+ self.log_trajectory = log_trajectory
39
+
40
+ # Generate session/run IDs
41
+ self.session_id = str(uuid.uuid4())
42
+ self.run_id = None
43
+
44
+ # Track timing and metrics
45
+ self.run_start_time = None
46
+ self.step_count = 0
47
+ self.step_start_time = None
48
+ self.total_usage = {
49
+ "prompt_tokens": 0,
50
+ "completion_tokens": 0,
51
+ "total_tokens": 0,
52
+ "response_cost": 0.0
53
+ }
54
+
55
+ # Record agent initialization
56
+ if is_telemetry_enabled():
57
+ self._record_agent_initialization()
58
+
59
+ def _record_agent_initialization(self) -> None:
60
+ """Record agent type/model and session initialization."""
61
+ agent_info = {
62
+ "session_id": self.session_id,
63
+ "agent_type": self.agent.agent_loop.__name__,
64
+ "model": getattr(self.agent, 'model', 'unknown'),
65
+ **SYSTEM_INFO
66
+ }
67
+
68
+ # Set session-level dimensions
69
+ set_dimension("session_id", self.session_id)
70
+ set_dimension("agent_type", agent_info["agent_type"])
71
+ set_dimension("model", agent_info["model"])
72
+
73
+ record_event("agent_session_start", agent_info)
74
+
75
+ async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
76
+ """Called at the start of an agent run loop."""
77
+ if not is_telemetry_enabled():
78
+ return
79
+
80
+ self.run_id = str(uuid.uuid4())
81
+ self.run_start_time = time.time()
82
+ self.step_count = 0
83
+
84
+ # Calculate input context size
85
+ input_context_size = self._calculate_context_size(old_items)
86
+
87
+ run_data = {
88
+ "session_id": self.session_id,
89
+ "run_id": self.run_id,
90
+ "start_time": self.run_start_time,
91
+ "input_context_size": input_context_size,
92
+ "num_existing_messages": len(old_items)
93
+ }
94
+
95
+ # Log trajectory if opted in
96
+ if self.log_trajectory:
97
+ trajectory = self._extract_trajectory(old_items)
98
+ if trajectory:
99
+ run_data["uploaded_trajectory"] = trajectory
100
+
101
+ set_dimension("run_id", self.run_id)
102
+ record_event("agent_run_start", run_data)
103
+
104
+ async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
105
+ """Called at the end of an agent run loop."""
106
+ if not is_telemetry_enabled() or not self.run_start_time:
107
+ return
108
+
109
+ run_duration = time.time() - self.run_start_time
110
+
111
+ run_data = {
112
+ "session_id": self.session_id,
113
+ "run_id": self.run_id,
114
+ "end_time": time.time(),
115
+ "duration_seconds": run_duration,
116
+ "num_steps": self.step_count,
117
+ "total_usage": self.total_usage.copy()
118
+ }
119
+
120
+ # Log trajectory if opted in
121
+ if self.log_trajectory:
122
+ trajectory = self._extract_trajectory(new_items)
123
+ if trajectory:
124
+ run_data["uploaded_trajectory"] = trajectory
125
+
126
+ record_event("agent_run_end", run_data)
127
+
128
+ async def on_usage(self, usage: Dict[str, Any]) -> None:
129
+ """Called when usage information is received."""
130
+ if not is_telemetry_enabled():
131
+ return
132
+
133
+ # Accumulate usage stats
134
+ self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
135
+ self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
136
+ self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
137
+ self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
138
+
139
+ # Record individual usage event
140
+ usage_data = {
141
+ "session_id": self.session_id,
142
+ "run_id": self.run_id,
143
+ "step": self.step_count,
144
+ **usage
145
+ }
146
+
147
+ record_event("agent_usage", usage_data)
148
+
149
+ async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
150
+ """Called when responses are received."""
151
+ if not is_telemetry_enabled():
152
+ return
153
+
154
+ self.step_count += 1
155
+ step_duration = None
156
+
157
+ if self.step_start_time:
158
+ step_duration = time.time() - self.step_start_time
159
+
160
+ self.step_start_time = time.time()
161
+
162
+ step_data = {
163
+ "session_id": self.session_id,
164
+ "run_id": self.run_id,
165
+ "step": self.step_count,
166
+ "timestamp": self.step_start_time
167
+ }
168
+
169
+ if step_duration is not None:
170
+ step_data["duration_seconds"] = step_duration
171
+
172
+ record_event("agent_step", step_data)
173
+
174
+ def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
175
+ """Calculate approximate context size in tokens/characters."""
176
+ total_size = 0
177
+
178
+ for item in items:
179
+ if item.get("type") == "message" and "content" in item:
180
+ content = item["content"]
181
+ if isinstance(content, str):
182
+ total_size += len(content)
183
+ elif isinstance(content, list):
184
+ for part in content:
185
+ if isinstance(part, dict) and "text" in part:
186
+ total_size += len(part["text"])
187
+ elif "content" in item and isinstance(item["content"], str):
188
+ total_size += len(item["content"])
189
+
190
+ return total_size
191
+
192
+ def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
193
+ """Extract trajectory items that should be logged."""
194
+ trajectory = []
195
+
196
+ for item in items:
197
+ # Include user messages, assistant messages, reasoning, computer calls, and computer outputs
198
+ if (
199
+ item.get("role") == "user" or # User inputs
200
+ (item.get("type") == "message" and item.get("role") == "assistant") or # Model outputs
201
+ item.get("type") == "reasoning" or # Reasoning traces
202
+ item.get("type") == "computer_call" or # Computer actions
203
+ item.get("type") == "computer_call_output" # Computer outputs
204
+ ):
205
+ # Create a copy of the item with timestamp
206
+ trajectory_item = item.copy()
207
+ trajectory_item["logged_at"] = time.time()
208
+ trajectory.append(trajectory_item)
209
+
210
+ return trajectory