cua-agent 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (82) hide show
  1. agent/__init__.py +4 -19
  2. agent/__main__.py +2 -1
  3. agent/adapters/__init__.py +6 -0
  4. agent/adapters/azure_ml_adapter.py +283 -0
  5. agent/adapters/cua_adapter.py +161 -0
  6. agent/adapters/huggingfacelocal_adapter.py +67 -125
  7. agent/adapters/human_adapter.py +116 -114
  8. agent/adapters/mlxvlm_adapter.py +370 -0
  9. agent/adapters/models/__init__.py +41 -0
  10. agent/adapters/models/generic.py +78 -0
  11. agent/adapters/models/internvl.py +290 -0
  12. agent/adapters/models/opencua.py +115 -0
  13. agent/adapters/models/qwen2_5_vl.py +78 -0
  14. agent/agent.py +431 -241
  15. agent/callbacks/__init__.py +10 -3
  16. agent/callbacks/base.py +45 -31
  17. agent/callbacks/budget_manager.py +22 -10
  18. agent/callbacks/image_retention.py +54 -98
  19. agent/callbacks/logging.py +55 -42
  20. agent/callbacks/operator_validator.py +140 -0
  21. agent/callbacks/otel.py +291 -0
  22. agent/callbacks/pii_anonymization.py +19 -16
  23. agent/callbacks/prompt_instructions.py +47 -0
  24. agent/callbacks/telemetry.py +106 -69
  25. agent/callbacks/trajectory_saver.py +178 -70
  26. agent/cli.py +269 -119
  27. agent/computers/__init__.py +14 -9
  28. agent/computers/base.py +32 -19
  29. agent/computers/cua.py +52 -25
  30. agent/computers/custom.py +78 -71
  31. agent/decorators.py +23 -14
  32. agent/human_tool/__init__.py +2 -7
  33. agent/human_tool/__main__.py +6 -2
  34. agent/human_tool/server.py +48 -37
  35. agent/human_tool/ui.py +359 -235
  36. agent/integrations/hud/__init__.py +164 -74
  37. agent/integrations/hud/agent.py +338 -342
  38. agent/integrations/hud/proxy.py +297 -0
  39. agent/loops/__init__.py +44 -14
  40. agent/loops/anthropic.py +590 -492
  41. agent/loops/base.py +19 -15
  42. agent/loops/composed_grounded.py +142 -144
  43. agent/loops/fara/__init__.py +8 -0
  44. agent/loops/fara/config.py +506 -0
  45. agent/loops/fara/helpers.py +357 -0
  46. agent/loops/fara/schema.py +143 -0
  47. agent/loops/gelato.py +183 -0
  48. agent/loops/gemini.py +935 -0
  49. agent/loops/generic_vlm.py +601 -0
  50. agent/loops/glm45v.py +140 -135
  51. agent/loops/gta1.py +48 -51
  52. agent/loops/holo.py +218 -0
  53. agent/loops/internvl.py +180 -0
  54. agent/loops/moondream3.py +493 -0
  55. agent/loops/omniparser.py +326 -226
  56. agent/loops/openai.py +63 -56
  57. agent/loops/opencua.py +134 -0
  58. agent/loops/uiins.py +175 -0
  59. agent/loops/uitars.py +262 -212
  60. agent/loops/uitars2.py +951 -0
  61. agent/playground/__init__.py +5 -0
  62. agent/playground/server.py +301 -0
  63. agent/proxy/examples.py +196 -0
  64. agent/proxy/handlers.py +255 -0
  65. agent/responses.py +486 -339
  66. agent/tools/__init__.py +24 -0
  67. agent/tools/base.py +253 -0
  68. agent/tools/browser_tool.py +423 -0
  69. agent/types.py +20 -5
  70. agent/ui/__init__.py +1 -1
  71. agent/ui/__main__.py +1 -1
  72. agent/ui/gradio/app.py +25 -22
  73. agent/ui/gradio/ui_components.py +314 -167
  74. cua_agent-0.7.16.dist-info/METADATA +85 -0
  75. cua_agent-0.7.16.dist-info/RECORD +79 -0
  76. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
  77. agent/integrations/hud/adapter.py +0 -121
  78. agent/integrations/hud/computer_handler.py +0 -187
  79. agent/telemetry.py +0 -142
  80. cua_agent-0.4.14.dist-info/METADATA +0 -436
  81. cua_agent-0.4.14.dist-info/RECORD +0 -50
  82. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
@@ -2,45 +2,47 @@
2
2
  Telemetry callback handler for Computer-Use Agent (cua-agent)
3
3
  """
4
4
 
5
+ import platform
5
6
  import time
6
7
  import uuid
7
- from typing import List, Dict, Any, Optional, Union
8
+ from typing import Any, Dict, List, Optional, Union
8
9
 
9
- from .base import AsyncCallbackHandler
10
- from ..telemetry import (
11
- record_event,
10
+ from core.telemetry import (
12
11
  is_telemetry_enabled,
13
- set_dimension,
14
- SYSTEM_INFO,
12
+ record_event,
15
13
  )
16
14
 
15
+ from .base import AsyncCallbackHandler
16
+
17
+ SYSTEM_INFO = {
18
+ "os": platform.system().lower(),
19
+ "os_version": platform.release(),
20
+ "python_version": platform.python_version(),
21
+ }
22
+
17
23
 
18
24
  class TelemetryCallback(AsyncCallbackHandler):
19
25
  """
20
26
  Telemetry callback handler for Computer-Use Agent (cua-agent)
21
-
27
+
22
28
  Tracks agent usage, performance metrics, and optionally trajectory data.
23
29
  """
24
-
25
- def __init__(
26
- self,
27
- agent,
28
- log_trajectory: bool = False
29
- ):
30
+
31
+ def __init__(self, agent, log_trajectory: bool = False):
30
32
  """
31
33
  Initialize telemetry callback.
32
-
34
+
33
35
  Args:
34
36
  agent: The ComputerAgent instance
35
37
  log_trajectory: Whether to log full trajectory items (opt-in)
36
38
  """
37
39
  self.agent = agent
38
40
  self.log_trajectory = log_trajectory
39
-
41
+
40
42
  # Generate session/run IDs
41
43
  self.session_id = str(uuid.uuid4())
42
44
  self.run_id = None
43
-
45
+
44
46
  # Track timing and metrics
45
47
  self.run_start_time = None
46
48
  self.step_count = 0
@@ -49,132 +51,165 @@ class TelemetryCallback(AsyncCallbackHandler):
49
51
  "prompt_tokens": 0,
50
52
  "completion_tokens": 0,
51
53
  "total_tokens": 0,
52
- "response_cost": 0.0
54
+ "response_cost": 0.0,
53
55
  }
54
-
56
+
55
57
  # Record agent initialization
56
58
  if is_telemetry_enabled():
57
59
  self._record_agent_initialization()
58
-
60
+
59
61
  def _record_agent_initialization(self) -> None:
60
62
  """Record agent type/model and session initialization."""
63
+ # Get the agent loop type (class name)
64
+ agent_type = "unknown"
65
+ if hasattr(self.agent, "agent_loop") and self.agent.agent_loop is not None:
66
+ agent_type = type(self.agent.agent_loop).__name__
67
+
61
68
  agent_info = {
62
69
  "session_id": self.session_id,
63
- "agent_type": self.agent.agent_loop.__name__ if hasattr(self.agent, 'agent_loop') else 'unknown',
64
- "model": getattr(self.agent, 'model', 'unknown'),
65
- **SYSTEM_INFO
70
+ "agent_type": agent_type,
71
+ "model": getattr(self.agent, "model", "unknown"),
72
+ **SYSTEM_INFO,
66
73
  }
67
-
68
- # Set session-level dimensions
69
- set_dimension("session_id", self.session_id)
70
- set_dimension("agent_type", agent_info["agent_type"])
71
- set_dimension("model", agent_info["model"])
72
-
74
+
75
+ # Include VM name if available
76
+ vm_name = self._get_vm_name()
77
+ if vm_name:
78
+ agent_info["vm_name"] = vm_name
79
+
73
80
  record_event("agent_session_start", agent_info)
74
-
81
+
75
82
  async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
76
83
  """Called at the start of an agent run loop."""
77
84
  if not is_telemetry_enabled():
78
85
  return
79
-
86
+
80
87
  self.run_id = str(uuid.uuid4())
81
88
  self.run_start_time = time.time()
82
89
  self.step_count = 0
83
-
90
+
84
91
  # Calculate input context size
85
92
  input_context_size = self._calculate_context_size(old_items)
86
-
93
+
87
94
  run_data = {
88
95
  "session_id": self.session_id,
89
96
  "run_id": self.run_id,
90
97
  "start_time": self.run_start_time,
91
98
  "input_context_size": input_context_size,
92
- "num_existing_messages": len(old_items)
99
+ "num_existing_messages": len(old_items),
93
100
  }
94
-
101
+
102
+ # Include VM name if available
103
+ vm_name = self._get_vm_name()
104
+ if vm_name:
105
+ run_data["vm_name"] = vm_name
106
+
95
107
  # Log trajectory if opted in
96
108
  if self.log_trajectory:
97
109
  trajectory = self._extract_trajectory(old_items)
98
110
  if trajectory:
99
111
  run_data["uploaded_trajectory"] = trajectory
100
-
101
- set_dimension("run_id", self.run_id)
112
+
102
113
  record_event("agent_run_start", run_data)
103
-
104
- async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
114
+
115
+ async def on_run_end(
116
+ self,
117
+ kwargs: Dict[str, Any],
118
+ old_items: List[Dict[str, Any]],
119
+ new_items: List[Dict[str, Any]],
120
+ ) -> None:
105
121
  """Called at the end of an agent run loop."""
106
122
  if not is_telemetry_enabled() or not self.run_start_time:
107
123
  return
108
-
124
+
109
125
  run_duration = time.time() - self.run_start_time
110
-
126
+
111
127
  run_data = {
112
128
  "session_id": self.session_id,
113
129
  "run_id": self.run_id,
114
130
  "end_time": time.time(),
115
131
  "duration_seconds": run_duration,
116
132
  "num_steps": self.step_count,
117
- "total_usage": self.total_usage.copy()
133
+ "total_usage": self.total_usage.copy(),
118
134
  }
119
-
135
+
136
+ # Include VM name if available
137
+ vm_name = self._get_vm_name()
138
+ if vm_name:
139
+ run_data["vm_name"] = vm_name
140
+
120
141
  # Log trajectory if opted in
121
142
  if self.log_trajectory:
122
143
  trajectory = self._extract_trajectory(new_items)
123
144
  if trajectory:
124
145
  run_data["uploaded_trajectory"] = trajectory
125
-
146
+
126
147
  record_event("agent_run_end", run_data)
127
-
148
+
128
149
  async def on_usage(self, usage: Dict[str, Any]) -> None:
129
150
  """Called when usage information is received."""
130
151
  if not is_telemetry_enabled():
131
152
  return
132
-
153
+
133
154
  # Accumulate usage stats
134
155
  self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
135
- self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
156
+ self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
136
157
  self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
137
158
  self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
138
-
159
+
139
160
  # Record individual usage event
140
161
  usage_data = {
141
162
  "session_id": self.session_id,
142
163
  "run_id": self.run_id,
143
164
  "step": self.step_count,
144
- **usage
165
+ **usage,
145
166
  }
146
-
167
+
147
168
  record_event("agent_usage", usage_data)
148
-
169
+
149
170
  async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
150
171
  """Called when responses are received."""
151
172
  if not is_telemetry_enabled():
152
173
  return
153
-
174
+
154
175
  self.step_count += 1
155
176
  step_duration = None
156
-
177
+
157
178
  if self.step_start_time:
158
179
  step_duration = time.time() - self.step_start_time
159
-
180
+
160
181
  self.step_start_time = time.time()
161
-
182
+
162
183
  step_data = {
163
184
  "session_id": self.session_id,
164
185
  "run_id": self.run_id,
165
186
  "step": self.step_count,
166
- "timestamp": self.step_start_time
187
+ "timestamp": self.step_start_time,
167
188
  }
168
-
189
+
169
190
  if step_duration is not None:
170
191
  step_data["duration_seconds"] = step_duration
171
-
192
+
172
193
  record_event("agent_step", step_data)
173
-
194
+
195
+ def _get_vm_name(self) -> Optional[str]:
196
+ """Extract VM name from agent's computer handler if available."""
197
+ try:
198
+ if hasattr(self.agent, "computer_handler") and self.agent.computer_handler:
199
+ handler = self.agent.computer_handler
200
+ # Check if it's a cuaComputerHandler with a cua_computer
201
+ if hasattr(handler, "cua_computer"):
202
+ computer = handler.cua_computer
203
+ if hasattr(computer, "config") and hasattr(computer.config, "name"):
204
+ return computer.config.name
205
+ except Exception:
206
+ pass
207
+ return None
208
+
174
209
  def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
175
210
  """Calculate approximate context size in tokens/characters."""
176
211
  total_size = 0
177
-
212
+
178
213
  for item in items:
179
214
  if item.get("type") == "message" and "content" in item:
180
215
  content = item["content"]
@@ -186,25 +221,27 @@ class TelemetryCallback(AsyncCallbackHandler):
186
221
  total_size += len(part["text"])
187
222
  elif "content" in item and isinstance(item["content"], str):
188
223
  total_size += len(item["content"])
189
-
224
+
190
225
  return total_size
191
-
226
+
192
227
  def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
193
228
  """Extract trajectory items that should be logged."""
194
229
  trajectory = []
195
-
230
+
196
231
  for item in items:
197
232
  # Include user messages, assistant messages, reasoning, computer calls, and computer outputs
198
233
  if (
199
- item.get("role") == "user" or # User inputs
200
- (item.get("type") == "message" and item.get("role") == "assistant") or # Model outputs
201
- item.get("type") == "reasoning" or # Reasoning traces
202
- item.get("type") == "computer_call" or # Computer actions
203
- item.get("type") == "computer_call_output" # Computer outputs
234
+ item.get("role") == "user" # User inputs
235
+ or (
236
+ item.get("type") == "message" and item.get("role") == "assistant"
237
+ ) # Model outputs
238
+ or item.get("type") == "reasoning" # Reasoning traces
239
+ or item.get("type") == "computer_call" # Computer actions
240
+ or item.get("type") == "computer_call_output" # Computer outputs
204
241
  ):
205
242
  # Create a copy of the item with timestamp
206
243
  trajectory_item = item.copy()
207
244
  trajectory_item["logged_at"] = time.time()
208
245
  trajectory.append(trajectory_item)
209
-
210
- return trajectory
246
+
247
+ return trajectory