cua-agent 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +4 -10
- agent/__main__.py +2 -1
- agent/adapters/huggingfacelocal_adapter.py +54 -61
- agent/adapters/human_adapter.py +116 -114
- agent/adapters/mlxvlm_adapter.py +110 -99
- agent/adapters/models/__init__.py +14 -6
- agent/adapters/models/generic.py +7 -4
- agent/adapters/models/internvl.py +66 -30
- agent/adapters/models/opencua.py +23 -8
- agent/adapters/models/qwen2_5_vl.py +7 -4
- agent/agent.py +184 -158
- agent/callbacks/__init__.py +4 -4
- agent/callbacks/base.py +45 -31
- agent/callbacks/budget_manager.py +22 -10
- agent/callbacks/image_retention.py +18 -13
- agent/callbacks/logging.py +55 -42
- agent/callbacks/operator_validator.py +3 -1
- agent/callbacks/pii_anonymization.py +19 -16
- agent/callbacks/telemetry.py +67 -61
- agent/callbacks/trajectory_saver.py +90 -70
- agent/cli.py +115 -110
- agent/computers/__init__.py +13 -8
- agent/computers/base.py +32 -19
- agent/computers/cua.py +33 -25
- agent/computers/custom.py +78 -71
- agent/decorators.py +23 -14
- agent/human_tool/__init__.py +2 -7
- agent/human_tool/__main__.py +6 -2
- agent/human_tool/server.py +48 -37
- agent/human_tool/ui.py +235 -185
- agent/integrations/hud/__init__.py +15 -21
- agent/integrations/hud/agent.py +101 -83
- agent/integrations/hud/proxy.py +90 -57
- agent/loops/__init__.py +25 -21
- agent/loops/anthropic.py +537 -483
- agent/loops/base.py +13 -14
- agent/loops/composed_grounded.py +135 -149
- agent/loops/gemini.py +31 -12
- agent/loops/glm45v.py +135 -133
- agent/loops/gta1.py +47 -50
- agent/loops/holo.py +4 -2
- agent/loops/internvl.py +6 -11
- agent/loops/moondream3.py +36 -12
- agent/loops/omniparser.py +215 -210
- agent/loops/openai.py +49 -50
- agent/loops/opencua.py +29 -41
- agent/loops/qwen.py +510 -0
- agent/loops/uitars.py +237 -202
- agent/proxy/examples.py +54 -50
- agent/proxy/handlers.py +27 -34
- agent/responses.py +330 -330
- agent/types.py +11 -5
- agent/ui/__init__.py +1 -1
- agent/ui/__main__.py +1 -1
- agent/ui/gradio/app.py +23 -18
- agent/ui/gradio/ui_components.py +310 -161
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/METADATA +18 -10
- cua_agent-0.4.36.dist-info/RECORD +64 -0
- cua_agent-0.4.34.dist-info/RECORD +0 -63
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/entry_points.txt +0 -0
|
@@ -2,38 +2,41 @@
|
|
|
2
2
|
PII anonymization callback handler using Microsoft Presidio for text and image redaction.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import List, Dict, Any, Optional, Tuple
|
|
6
|
-
from .base import AsyncCallbackHandler
|
|
7
5
|
import base64
|
|
8
6
|
import io
|
|
9
7
|
import logging
|
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
from .base import AsyncCallbackHandler
|
|
10
11
|
|
|
11
12
|
try:
|
|
12
13
|
# TODO: Add Presidio dependencies
|
|
13
14
|
from PIL import Image
|
|
15
|
+
|
|
14
16
|
PRESIDIO_AVAILABLE = True
|
|
15
17
|
except ImportError:
|
|
16
18
|
PRESIDIO_AVAILABLE = False
|
|
17
19
|
|
|
18
20
|
logger = logging.getLogger(__name__)
|
|
19
21
|
|
|
22
|
+
|
|
20
23
|
class PIIAnonymizationCallback(AsyncCallbackHandler):
|
|
21
24
|
"""
|
|
22
25
|
Callback handler that anonymizes PII in text and images using Microsoft Presidio.
|
|
23
|
-
|
|
26
|
+
|
|
24
27
|
This handler:
|
|
25
28
|
1. Anonymizes PII in messages before sending to the agent loop
|
|
26
29
|
2. Deanonymizes PII in tool calls and message outputs after the agent loop
|
|
27
30
|
3. Redacts PII from images in computer_call_output messages
|
|
28
31
|
"""
|
|
29
|
-
|
|
32
|
+
|
|
30
33
|
def __init__(
|
|
31
34
|
self,
|
|
32
35
|
# TODO: Any extra kwargs if needed
|
|
33
36
|
):
|
|
34
37
|
"""
|
|
35
38
|
Initialize the PII anonymization callback.
|
|
36
|
-
|
|
39
|
+
|
|
37
40
|
Args:
|
|
38
41
|
anonymize_text: Whether to anonymize text content
|
|
39
42
|
anonymize_images: Whether to redact images
|
|
@@ -46,16 +49,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
|
|
|
46
49
|
"Presidio is not available. Install with: "
|
|
47
50
|
"pip install cua-agent[pii-anonymization]"
|
|
48
51
|
)
|
|
49
|
-
|
|
52
|
+
|
|
50
53
|
# TODO: Implement __init__
|
|
51
|
-
|
|
54
|
+
|
|
52
55
|
async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
53
56
|
"""
|
|
54
57
|
Anonymize PII in messages before sending to agent loop.
|
|
55
|
-
|
|
58
|
+
|
|
56
59
|
Args:
|
|
57
60
|
messages: List of message dictionaries
|
|
58
|
-
|
|
61
|
+
|
|
59
62
|
Returns:
|
|
60
63
|
List of messages with PII anonymized
|
|
61
64
|
"""
|
|
@@ -63,16 +66,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
|
|
|
63
66
|
for msg in messages:
|
|
64
67
|
anonymized_msg = await self._anonymize_message(msg)
|
|
65
68
|
anonymized_messages.append(anonymized_msg)
|
|
66
|
-
|
|
69
|
+
|
|
67
70
|
return anonymized_messages
|
|
68
|
-
|
|
71
|
+
|
|
69
72
|
async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
70
73
|
"""
|
|
71
74
|
Deanonymize PII in tool calls and message outputs after agent loop.
|
|
72
|
-
|
|
75
|
+
|
|
73
76
|
Args:
|
|
74
77
|
output: List of output dictionaries
|
|
75
|
-
|
|
78
|
+
|
|
76
79
|
Returns:
|
|
77
80
|
List of output with PII deanonymized for tool calls
|
|
78
81
|
"""
|
|
@@ -84,13 +87,13 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
|
|
|
84
87
|
deanonymized_output.append(deanonymized_item)
|
|
85
88
|
else:
|
|
86
89
|
deanonymized_output.append(item)
|
|
87
|
-
|
|
90
|
+
|
|
88
91
|
return deanonymized_output
|
|
89
|
-
|
|
92
|
+
|
|
90
93
|
async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
91
94
|
# TODO: Implement _anonymize_message
|
|
92
95
|
return message
|
|
93
|
-
|
|
96
|
+
|
|
94
97
|
async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
|
|
95
98
|
# TODO: Implement _deanonymize_item
|
|
96
99
|
return item
|
agent/callbacks/telemetry.py
CHANGED
|
@@ -2,17 +2,17 @@
|
|
|
2
2
|
Telemetry callback handler for Computer-Use Agent (cua-agent)
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import platform
|
|
5
6
|
import time
|
|
6
7
|
import uuid
|
|
7
|
-
from typing import
|
|
8
|
+
from typing import Any, Dict, List, Optional, Union
|
|
8
9
|
|
|
9
|
-
from .base import AsyncCallbackHandler
|
|
10
10
|
from core.telemetry import (
|
|
11
|
-
record_event,
|
|
12
11
|
is_telemetry_enabled,
|
|
12
|
+
record_event,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
-
import
|
|
15
|
+
from .base import AsyncCallbackHandler
|
|
16
16
|
|
|
17
17
|
SYSTEM_INFO = {
|
|
18
18
|
"os": platform.system().lower(),
|
|
@@ -20,32 +20,29 @@ SYSTEM_INFO = {
|
|
|
20
20
|
"python_version": platform.python_version(),
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
|
|
23
24
|
class TelemetryCallback(AsyncCallbackHandler):
|
|
24
25
|
"""
|
|
25
26
|
Telemetry callback handler for Computer-Use Agent (cua-agent)
|
|
26
|
-
|
|
27
|
+
|
|
27
28
|
Tracks agent usage, performance metrics, and optionally trajectory data.
|
|
28
29
|
"""
|
|
29
|
-
|
|
30
|
-
def __init__(
|
|
31
|
-
self,
|
|
32
|
-
agent,
|
|
33
|
-
log_trajectory: bool = False
|
|
34
|
-
):
|
|
30
|
+
|
|
31
|
+
def __init__(self, agent, log_trajectory: bool = False):
|
|
35
32
|
"""
|
|
36
33
|
Initialize telemetry callback.
|
|
37
|
-
|
|
34
|
+
|
|
38
35
|
Args:
|
|
39
36
|
agent: The ComputerAgent instance
|
|
40
37
|
log_trajectory: Whether to log full trajectory items (opt-in)
|
|
41
38
|
"""
|
|
42
39
|
self.agent = agent
|
|
43
40
|
self.log_trajectory = log_trajectory
|
|
44
|
-
|
|
41
|
+
|
|
45
42
|
# Generate session/run IDs
|
|
46
43
|
self.session_id = str(uuid.uuid4())
|
|
47
44
|
self.run_id = None
|
|
48
|
-
|
|
45
|
+
|
|
49
46
|
# Track timing and metrics
|
|
50
47
|
self.run_start_time = None
|
|
51
48
|
self.step_count = 0
|
|
@@ -54,126 +51,133 @@ class TelemetryCallback(AsyncCallbackHandler):
|
|
|
54
51
|
"prompt_tokens": 0,
|
|
55
52
|
"completion_tokens": 0,
|
|
56
53
|
"total_tokens": 0,
|
|
57
|
-
"response_cost": 0.0
|
|
54
|
+
"response_cost": 0.0,
|
|
58
55
|
}
|
|
59
|
-
|
|
56
|
+
|
|
60
57
|
# Record agent initialization
|
|
61
58
|
if is_telemetry_enabled():
|
|
62
59
|
self._record_agent_initialization()
|
|
63
|
-
|
|
60
|
+
|
|
64
61
|
def _record_agent_initialization(self) -> None:
|
|
65
62
|
"""Record agent type/model and session initialization."""
|
|
66
63
|
agent_info = {
|
|
67
64
|
"session_id": self.session_id,
|
|
68
|
-
"agent_type":
|
|
69
|
-
|
|
70
|
-
|
|
65
|
+
"agent_type": (
|
|
66
|
+
self.agent.agent_loop.__name__ if hasattr(self.agent, "agent_loop") else "unknown"
|
|
67
|
+
),
|
|
68
|
+
"model": getattr(self.agent, "model", "unknown"),
|
|
69
|
+
**SYSTEM_INFO,
|
|
71
70
|
}
|
|
72
|
-
|
|
71
|
+
|
|
73
72
|
record_event("agent_session_start", agent_info)
|
|
74
|
-
|
|
73
|
+
|
|
75
74
|
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
76
75
|
"""Called at the start of an agent run loop."""
|
|
77
76
|
if not is_telemetry_enabled():
|
|
78
77
|
return
|
|
79
|
-
|
|
78
|
+
|
|
80
79
|
self.run_id = str(uuid.uuid4())
|
|
81
80
|
self.run_start_time = time.time()
|
|
82
81
|
self.step_count = 0
|
|
83
|
-
|
|
82
|
+
|
|
84
83
|
# Calculate input context size
|
|
85
84
|
input_context_size = self._calculate_context_size(old_items)
|
|
86
|
-
|
|
85
|
+
|
|
87
86
|
run_data = {
|
|
88
87
|
"session_id": self.session_id,
|
|
89
88
|
"run_id": self.run_id,
|
|
90
89
|
"start_time": self.run_start_time,
|
|
91
90
|
"input_context_size": input_context_size,
|
|
92
|
-
"num_existing_messages": len(old_items)
|
|
91
|
+
"num_existing_messages": len(old_items),
|
|
93
92
|
}
|
|
94
|
-
|
|
93
|
+
|
|
95
94
|
# Log trajectory if opted in
|
|
96
95
|
if self.log_trajectory:
|
|
97
96
|
trajectory = self._extract_trajectory(old_items)
|
|
98
97
|
if trajectory:
|
|
99
98
|
run_data["uploaded_trajectory"] = trajectory
|
|
100
|
-
|
|
99
|
+
|
|
101
100
|
record_event("agent_run_start", run_data)
|
|
102
|
-
|
|
103
|
-
async def on_run_end(
|
|
101
|
+
|
|
102
|
+
async def on_run_end(
|
|
103
|
+
self,
|
|
104
|
+
kwargs: Dict[str, Any],
|
|
105
|
+
old_items: List[Dict[str, Any]],
|
|
106
|
+
new_items: List[Dict[str, Any]],
|
|
107
|
+
) -> None:
|
|
104
108
|
"""Called at the end of an agent run loop."""
|
|
105
109
|
if not is_telemetry_enabled() or not self.run_start_time:
|
|
106
110
|
return
|
|
107
|
-
|
|
111
|
+
|
|
108
112
|
run_duration = time.time() - self.run_start_time
|
|
109
|
-
|
|
113
|
+
|
|
110
114
|
run_data = {
|
|
111
115
|
"session_id": self.session_id,
|
|
112
116
|
"run_id": self.run_id,
|
|
113
117
|
"end_time": time.time(),
|
|
114
118
|
"duration_seconds": run_duration,
|
|
115
119
|
"num_steps": self.step_count,
|
|
116
|
-
"total_usage": self.total_usage.copy()
|
|
120
|
+
"total_usage": self.total_usage.copy(),
|
|
117
121
|
}
|
|
118
|
-
|
|
122
|
+
|
|
119
123
|
# Log trajectory if opted in
|
|
120
124
|
if self.log_trajectory:
|
|
121
125
|
trajectory = self._extract_trajectory(new_items)
|
|
122
126
|
if trajectory:
|
|
123
127
|
run_data["uploaded_trajectory"] = trajectory
|
|
124
|
-
|
|
128
|
+
|
|
125
129
|
record_event("agent_run_end", run_data)
|
|
126
|
-
|
|
130
|
+
|
|
127
131
|
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
128
132
|
"""Called when usage information is received."""
|
|
129
133
|
if not is_telemetry_enabled():
|
|
130
134
|
return
|
|
131
|
-
|
|
135
|
+
|
|
132
136
|
# Accumulate usage stats
|
|
133
137
|
self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
|
|
134
|
-
self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
|
|
138
|
+
self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
|
|
135
139
|
self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
|
|
136
140
|
self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
|
|
137
|
-
|
|
141
|
+
|
|
138
142
|
# Record individual usage event
|
|
139
143
|
usage_data = {
|
|
140
144
|
"session_id": self.session_id,
|
|
141
145
|
"run_id": self.run_id,
|
|
142
146
|
"step": self.step_count,
|
|
143
|
-
**usage
|
|
147
|
+
**usage,
|
|
144
148
|
}
|
|
145
|
-
|
|
149
|
+
|
|
146
150
|
record_event("agent_usage", usage_data)
|
|
147
|
-
|
|
151
|
+
|
|
148
152
|
async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
|
|
149
153
|
"""Called when responses are received."""
|
|
150
154
|
if not is_telemetry_enabled():
|
|
151
155
|
return
|
|
152
|
-
|
|
156
|
+
|
|
153
157
|
self.step_count += 1
|
|
154
158
|
step_duration = None
|
|
155
|
-
|
|
159
|
+
|
|
156
160
|
if self.step_start_time:
|
|
157
161
|
step_duration = time.time() - self.step_start_time
|
|
158
|
-
|
|
162
|
+
|
|
159
163
|
self.step_start_time = time.time()
|
|
160
|
-
|
|
164
|
+
|
|
161
165
|
step_data = {
|
|
162
166
|
"session_id": self.session_id,
|
|
163
167
|
"run_id": self.run_id,
|
|
164
168
|
"step": self.step_count,
|
|
165
|
-
"timestamp": self.step_start_time
|
|
169
|
+
"timestamp": self.step_start_time,
|
|
166
170
|
}
|
|
167
|
-
|
|
171
|
+
|
|
168
172
|
if step_duration is not None:
|
|
169
173
|
step_data["duration_seconds"] = step_duration
|
|
170
|
-
|
|
174
|
+
|
|
171
175
|
record_event("agent_step", step_data)
|
|
172
|
-
|
|
176
|
+
|
|
173
177
|
def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
|
|
174
178
|
"""Calculate approximate context size in tokens/characters."""
|
|
175
179
|
total_size = 0
|
|
176
|
-
|
|
180
|
+
|
|
177
181
|
for item in items:
|
|
178
182
|
if item.get("type") == "message" and "content" in item:
|
|
179
183
|
content = item["content"]
|
|
@@ -185,25 +189,27 @@ class TelemetryCallback(AsyncCallbackHandler):
|
|
|
185
189
|
total_size += len(part["text"])
|
|
186
190
|
elif "content" in item and isinstance(item["content"], str):
|
|
187
191
|
total_size += len(item["content"])
|
|
188
|
-
|
|
192
|
+
|
|
189
193
|
return total_size
|
|
190
|
-
|
|
194
|
+
|
|
191
195
|
def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
192
196
|
"""Extract trajectory items that should be logged."""
|
|
193
197
|
trajectory = []
|
|
194
|
-
|
|
198
|
+
|
|
195
199
|
for item in items:
|
|
196
200
|
# Include user messages, assistant messages, reasoning, computer calls, and computer outputs
|
|
197
201
|
if (
|
|
198
|
-
item.get("role") == "user"
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
item.get("type") == "
|
|
202
|
+
item.get("role") == "user" # User inputs
|
|
203
|
+
or (
|
|
204
|
+
item.get("type") == "message" and item.get("role") == "assistant"
|
|
205
|
+
) # Model outputs
|
|
206
|
+
or item.get("type") == "reasoning" # Reasoning traces
|
|
207
|
+
or item.get("type") == "computer_call" # Computer actions
|
|
208
|
+
or item.get("type") == "computer_call_output" # Computer outputs
|
|
203
209
|
):
|
|
204
210
|
# Create a copy of the item with timestamp
|
|
205
211
|
trajectory_item = item.copy()
|
|
206
212
|
trajectory_item["logged_at"] = time.time()
|
|
207
213
|
trajectory.append(trajectory_item)
|
|
208
|
-
|
|
209
|
-
return trajectory
|
|
214
|
+
|
|
215
|
+
return trajectory
|