cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show
  1. agent/__init__.py +4 -10
  2. agent/__main__.py +2 -1
  3. agent/adapters/__init__.py +4 -0
  4. agent/adapters/azure_ml_adapter.py +283 -0
  5. agent/adapters/cua_adapter.py +161 -0
  6. agent/adapters/huggingfacelocal_adapter.py +67 -125
  7. agent/adapters/human_adapter.py +116 -114
  8. agent/adapters/mlxvlm_adapter.py +110 -99
  9. agent/adapters/models/__init__.py +41 -0
  10. agent/adapters/models/generic.py +78 -0
  11. agent/adapters/models/internvl.py +290 -0
  12. agent/adapters/models/opencua.py +115 -0
  13. agent/adapters/models/qwen2_5_vl.py +78 -0
  14. agent/agent.py +337 -185
  15. agent/callbacks/__init__.py +9 -4
  16. agent/callbacks/base.py +45 -31
  17. agent/callbacks/budget_manager.py +22 -10
  18. agent/callbacks/image_retention.py +54 -98
  19. agent/callbacks/logging.py +55 -42
  20. agent/callbacks/operator_validator.py +35 -33
  21. agent/callbacks/otel.py +291 -0
  22. agent/callbacks/pii_anonymization.py +19 -16
  23. agent/callbacks/prompt_instructions.py +47 -0
  24. agent/callbacks/telemetry.py +99 -61
  25. agent/callbacks/trajectory_saver.py +95 -69
  26. agent/cli.py +269 -119
  27. agent/computers/__init__.py +14 -9
  28. agent/computers/base.py +32 -19
  29. agent/computers/cua.py +52 -25
  30. agent/computers/custom.py +78 -71
  31. agent/decorators.py +23 -14
  32. agent/human_tool/__init__.py +2 -7
  33. agent/human_tool/__main__.py +6 -2
  34. agent/human_tool/server.py +48 -37
  35. agent/human_tool/ui.py +359 -235
  36. agent/integrations/hud/__init__.py +38 -99
  37. agent/integrations/hud/agent.py +369 -0
  38. agent/integrations/hud/proxy.py +166 -52
  39. agent/loops/__init__.py +44 -14
  40. agent/loops/anthropic.py +579 -492
  41. agent/loops/base.py +19 -15
  42. agent/loops/composed_grounded.py +136 -150
  43. agent/loops/fara/__init__.py +8 -0
  44. agent/loops/fara/config.py +506 -0
  45. agent/loops/fara/helpers.py +357 -0
  46. agent/loops/fara/schema.py +143 -0
  47. agent/loops/gelato.py +183 -0
  48. agent/loops/gemini.py +935 -0
  49. agent/loops/generic_vlm.py +601 -0
  50. agent/loops/glm45v.py +140 -135
  51. agent/loops/gta1.py +48 -51
  52. agent/loops/holo.py +218 -0
  53. agent/loops/internvl.py +180 -0
  54. agent/loops/moondream3.py +493 -0
  55. agent/loops/omniparser.py +326 -226
  56. agent/loops/openai.py +50 -51
  57. agent/loops/opencua.py +134 -0
  58. agent/loops/uiins.py +175 -0
  59. agent/loops/uitars.py +247 -206
  60. agent/loops/uitars2.py +951 -0
  61. agent/playground/__init__.py +5 -0
  62. agent/playground/server.py +301 -0
  63. agent/proxy/examples.py +61 -57
  64. agent/proxy/handlers.py +46 -39
  65. agent/responses.py +447 -347
  66. agent/tools/__init__.py +24 -0
  67. agent/tools/base.py +253 -0
  68. agent/tools/browser_tool.py +423 -0
  69. agent/types.py +11 -5
  70. agent/ui/__init__.py +1 -1
  71. agent/ui/__main__.py +1 -1
  72. agent/ui/gradio/app.py +25 -22
  73. agent/ui/gradio/ui_components.py +314 -167
  74. cua_agent-0.7.16.dist-info/METADATA +85 -0
  75. cua_agent-0.7.16.dist-info/RECORD +79 -0
  76. {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
  77. cua_agent-0.4.22.dist-info/METADATA +0 -436
  78. cua_agent-0.4.22.dist-info/RECORD +0 -51
  79. {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
@@ -4,17 +4,18 @@ Logging callback for ComputerAgent that provides configurable logging of agent l
4
4
 
5
5
  import json
6
6
  import logging
7
- from typing import Dict, List, Any, Optional, Union
7
+ from typing import Any, Dict, List, Optional, Union
8
+
8
9
  from .base import AsyncCallbackHandler
9
10
 
10
11
 
11
12
  def sanitize_image_urls(data: Any) -> Any:
12
13
  """
13
14
  Recursively search for 'image_url' keys and set their values to '[omitted]'.
14
-
15
+
15
16
  Args:
16
17
  data: Any data structure (dict, list, or primitive type)
17
-
18
+
18
19
  Returns:
19
20
  A deep copy of the data with all 'image_url' values replaced with '[omitted]'
20
21
  """
@@ -28,11 +29,11 @@ def sanitize_image_urls(data: Any) -> Any:
28
29
  # Recursively sanitize the value
29
30
  sanitized[key] = sanitize_image_urls(value)
30
31
  return sanitized
31
-
32
+
32
33
  elif isinstance(data, list):
33
34
  # Recursively sanitize each item in the list
34
35
  return [sanitize_image_urls(item) for item in data]
35
-
36
+
36
37
  else:
37
38
  # For primitive types (str, int, bool, None, etc.), return as-is
38
39
  return data
@@ -41,37 +42,36 @@ def sanitize_image_urls(data: Any) -> Any:
41
42
  class LoggingCallback(AsyncCallbackHandler):
42
43
  """
43
44
  Callback handler that logs agent lifecycle events with configurable verbosity.
44
-
45
+
45
46
  Logging levels:
46
47
  - DEBUG: All events including API calls, message preprocessing, and detailed outputs
47
- - INFO: Major lifecycle events (start/end, messages, outputs)
48
+ - INFO: Major lifecycle events (start/end, messages, outputs)
48
49
  - WARNING: Only warnings and errors
49
50
  - ERROR: Only errors
50
51
  """
51
-
52
+
52
53
  def __init__(self, logger: Optional[logging.Logger] = None, level: int = logging.INFO):
53
54
  """
54
55
  Initialize the logging callback.
55
-
56
+
56
57
  Args:
57
58
  logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent'
58
59
  level: Logging level (logging.DEBUG, logging.INFO, etc.)
59
60
  """
60
- self.logger = logger or logging.getLogger('agent.ComputerAgent')
61
+ self.logger = logger or logging.getLogger("agent.ComputerAgent")
61
62
  self.level = level
62
-
63
+
63
64
  # Set up logger if it doesn't have handlers
64
65
  if not self.logger.handlers:
65
66
  handler = logging.StreamHandler()
66
- formatter = logging.Formatter(
67
- '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
68
- )
67
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
69
68
  handler.setFormatter(formatter)
70
69
  self.logger.addHandler(handler)
71
70
  self.logger.setLevel(level)
72
-
71
+
73
72
  def _update_usage(self, usage: Dict[str, Any]) -> None:
74
73
  """Update total usage statistics."""
74
+
75
75
  def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
76
76
  for key, value in source.items():
77
77
  if isinstance(value, dict):
@@ -82,18 +82,25 @@ class LoggingCallback(AsyncCallbackHandler):
82
82
  if key not in target:
83
83
  target[key] = 0
84
84
  target[key] += value
85
+
85
86
  add_dicts(self.total_usage, usage)
86
-
87
+
87
88
  async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
88
89
  """Called before the run starts."""
89
90
  self.total_usage = {}
90
-
91
+
91
92
  async def on_usage(self, usage: Dict[str, Any]) -> None:
92
93
  """Called when usage information is received."""
93
94
  self._update_usage(usage)
94
95
 
95
- async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
96
+ async def on_run_end(
97
+ self,
98
+ kwargs: Dict[str, Any],
99
+ old_items: List[Dict[str, Any]],
100
+ new_items: List[Dict[str, Any]],
101
+ ) -> None:
96
102
  """Called after the run ends."""
103
+
97
104
  def format_dict(d, indent=0):
98
105
  lines = []
99
106
  prefix = f" - {' ' * indent}"
@@ -106,10 +113,10 @@ class LoggingCallback(AsyncCallbackHandler):
106
113
  else:
107
114
  lines.append(f"{prefix}{key}: {value}")
108
115
  return lines
109
-
116
+
110
117
  formatted_output = "\n".join(format_dict(self.total_usage))
111
118
  self.logger.info(f"Total usage:\n{formatted_output}")
112
-
119
+
113
120
  async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
114
121
  """Called before LLM processing starts."""
115
122
  if self.logger.isEnabledFor(logging.INFO):
@@ -118,27 +125,27 @@ class LoggingCallback(AsyncCallbackHandler):
118
125
  sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
119
126
  self.logger.debug(f"LLM input messages: {json.dumps(sanitized_messages, indent=2)}")
120
127
  return messages
121
-
128
+
122
129
  async def on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
123
130
  """Called after LLM processing ends."""
124
131
  if self.logger.isEnabledFor(logging.DEBUG):
125
132
  sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
126
133
  self.logger.debug(f"LLM output: {json.dumps(sanitized_messages, indent=2)}")
127
134
  return messages
128
-
135
+
129
136
  async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
130
137
  """Called when a computer call starts."""
131
138
  action = item.get("action", {})
132
139
  action_type = action.get("type", "unknown")
133
140
  action_args = {k: v for k, v in action.items() if k != "type"}
134
-
141
+
135
142
  # INFO level logging for the action
136
143
  self.logger.info(f"Computer: {action_type}({action_args})")
137
-
144
+
138
145
  # DEBUG level logging for full details
139
146
  if self.logger.isEnabledFor(logging.DEBUG):
140
147
  self.logger.debug(f"Computer call started: {json.dumps(action, indent=2)}")
141
-
148
+
142
149
  async def on_computer_call_end(self, item: Dict[str, Any], result: Any) -> None:
143
150
  """Called when a computer call ends."""
144
151
  if self.logger.isEnabledFor(logging.DEBUG):
@@ -147,48 +154,52 @@ class LoggingCallback(AsyncCallbackHandler):
147
154
  if result:
148
155
  sanitized_result = sanitize_image_urls(result)
149
156
  self.logger.debug(f"Computer call result: {json.dumps(sanitized_result, indent=2)}")
150
-
157
+
151
158
  async def on_function_call_start(self, item: Dict[str, Any]) -> None:
152
159
  """Called when a function call starts."""
153
160
  name = item.get("name", "unknown")
154
161
  arguments = item.get("arguments", "{}")
155
-
162
+
156
163
  # INFO level logging for the function call
157
164
  self.logger.info(f"Function: {name}({arguments})")
158
-
165
+
159
166
  # DEBUG level logging for full details
160
167
  if self.logger.isEnabledFor(logging.DEBUG):
161
168
  self.logger.debug(f"Function call started: {name}")
162
-
169
+
163
170
  async def on_function_call_end(self, item: Dict[str, Any], result: Any) -> None:
164
171
  """Called when a function call ends."""
165
172
  # INFO level logging for function output (similar to function_call_output)
166
173
  if result:
167
174
  # Handle both list and direct result formats
168
175
  if isinstance(result, list) and len(result) > 0:
169
- output = result[0].get("output", str(result)) if isinstance(result[0], dict) else str(result[0])
176
+ output = (
177
+ result[0].get("output", str(result))
178
+ if isinstance(result[0], dict)
179
+ else str(result[0])
180
+ )
170
181
  else:
171
182
  output = str(result)
172
-
183
+
173
184
  # Truncate long outputs
174
185
  if len(output) > 100:
175
186
  output = output[:100] + "..."
176
-
187
+
177
188
  self.logger.info(f"Output: {output}")
178
-
189
+
179
190
  # DEBUG level logging for full details
180
191
  if self.logger.isEnabledFor(logging.DEBUG):
181
192
  name = item.get("name", "unknown")
182
193
  self.logger.debug(f"Function call completed: {name}")
183
194
  if result:
184
195
  self.logger.debug(f"Function call result: {json.dumps(result, indent=2)}")
185
-
196
+
186
197
  async def on_text(self, item: Dict[str, Any]) -> None:
187
198
  """Called when a text message is encountered."""
188
199
  # Get the role to determine if it's Agent or User
189
200
  role = item.get("role", "unknown")
190
201
  content_items = item.get("content", [])
191
-
202
+
192
203
  # Process content items to build display text
193
204
  text_parts = []
194
205
  for content_item in content_items:
@@ -206,10 +217,10 @@ class LoggingCallback(AsyncCallbackHandler):
206
217
  else:
207
218
  # Non-text content, show as [type]
208
219
  text_parts.append(f"[{content_type}]")
209
-
220
+
210
221
  # Join all text parts
211
- display_text = ''.join(text_parts) if text_parts else "[empty]"
212
-
222
+ display_text = "".join(text_parts) if text_parts else "[empty]"
223
+
213
224
  # Log with appropriate level and format
214
225
  if role == "assistant":
215
226
  self.logger.info(f"Agent: {display_text}")
@@ -219,7 +230,7 @@ class LoggingCallback(AsyncCallbackHandler):
219
230
  # Fallback for unknown roles, use debug level
220
231
  if self.logger.isEnabledFor(logging.DEBUG):
221
232
  self.logger.debug(f"Text message ({role}): {display_text}")
222
-
233
+
223
234
  async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
224
235
  """Called when an API call is about to start."""
225
236
  if self.logger.isEnabledFor(logging.DEBUG):
@@ -232,16 +243,18 @@ class LoggingCallback(AsyncCallbackHandler):
232
243
  elif "input" in kwargs:
233
244
  sanitized_input = sanitize_image_urls(kwargs["input"])
234
245
  self.logger.debug(f"API call input: {json.dumps(sanitized_input, indent=2)}")
235
-
246
+
236
247
  async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
237
248
  """Called when an API call has completed."""
238
249
  if self.logger.isEnabledFor(logging.DEBUG):
239
250
  model = kwargs.get("model", "unknown")
240
251
  self.logger.debug(f"API call completed for model: {model}")
241
- self.logger.debug(f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}")
252
+ self.logger.debug(
253
+ f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}"
254
+ )
242
255
 
243
256
  async def on_screenshot(self, item: Union[str, bytes], name: str = "screenshot") -> None:
244
257
  """Called when a screenshot is taken."""
245
258
  if self.logger.isEnabledFor(logging.DEBUG):
246
259
  image_size = len(item) / 1024
247
- self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")
260
+ self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")
@@ -9,6 +9,7 @@ Ensures agent output actions conform to expected schemas by fixing common issues
9
9
  This runs in on_llm_end, which receives the output array (AgentMessage[] as dicts).
10
10
  The purpose is to avoid spending another LLM call to fix broken computer call syntax when possible.
11
11
  """
12
+
12
13
  from __future__ import annotations
13
14
 
14
15
  from typing import Any, Dict, List
@@ -48,6 +49,7 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
48
49
  action["type"] = "type"
49
50
 
50
51
  action_type = action.get("type")
52
+
51
53
  def _keep_keys(action: Dict[str, Any], keys_to_keep: List[str]):
52
54
  """Keep only the provided keys on action; delete everything else.
53
55
  Always ensures required 'type' is present if listed in keys_to_keep.
@@ -55,6 +57,7 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
55
57
  for key in list(action.keys()):
56
58
  if key not in keys_to_keep:
57
59
  del action[key]
60
+
58
61
  # rename "coordinate" to "x", "y"
59
62
  if "coordinate" in action:
60
63
  action["x"] = action["coordinate"][0]
@@ -100,39 +103,38 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
100
103
  keep = required_keys_by_type.get(action_type or "")
101
104
  if keep:
102
105
  _keep_keys(action, keep)
103
-
104
106
 
105
- # Second pass: if an assistant message is immediately followed by a computer_call,
106
- # replace the assistant message itself with a reasoning message with summary text.
107
- if isinstance(output, list):
108
- for i, item in enumerate(output):
109
- # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
110
- if item.get("type") == "message" and item.get("role") == "assistant":
111
- next_idx = i + 1
112
- if next_idx >= len(output):
113
- continue
114
- next_item = output[next_idx]
115
- if not isinstance(next_item, dict):
116
- continue
117
- if next_item.get("type") != "computer_call":
118
- continue
119
- contents = item.get("content") or []
120
- # Extract text from OutputContent[]
121
- text_parts: List[str] = []
122
- if isinstance(contents, list):
123
- for c in contents:
124
- if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
125
- text_parts.append(c["text"])
126
- text_content = "\n".join(text_parts).strip()
127
- # Replace assistant message with reasoning message
128
- output[i] = {
129
- "type": "reasoning",
130
- "summary": [
131
- {
132
- "type": "summary_text",
133
- "text": text_content,
134
- }
135
- ],
136
- }
107
+ # # Second pass: if an assistant message is immediately followed by a computer_call,
108
+ # # replace the assistant message itself with a reasoning message with summary text.
109
+ # if isinstance(output, list):
110
+ # for i, item in enumerate(output):
111
+ # # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
112
+ # if item.get("type") == "message" and item.get("role") == "assistant":
113
+ # next_idx = i + 1
114
+ # if next_idx >= len(output):
115
+ # continue
116
+ # next_item = output[next_idx]
117
+ # if not isinstance(next_item, dict):
118
+ # continue
119
+ # if next_item.get("type") != "computer_call":
120
+ # continue
121
+ # contents = item.get("content") or []
122
+ # # Extract text from OutputContent[]
123
+ # text_parts: List[str] = []
124
+ # if isinstance(contents, list):
125
+ # for c in contents:
126
+ # if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
127
+ # text_parts.append(c["text"])
128
+ # text_content = "\n".join(text_parts).strip()
129
+ # # Replace assistant message with reasoning message
130
+ # output[i] = {
131
+ # "type": "reasoning",
132
+ # "summary": [
133
+ # {
134
+ # "type": "summary_text",
135
+ # "text": text_content,
136
+ # }
137
+ # ],
138
+ # }
137
139
 
138
140
  return output
@@ -0,0 +1,291 @@
1
+ """
2
+ OpenTelemetry callback handler for Computer-Use Agent (cua-agent).
3
+
4
+ Instruments agent operations for the Four Golden Signals:
5
+ - Latency: Operation duration
6
+ - Traffic: Operation counts
7
+ - Errors: Error counts
8
+ - Saturation: Concurrent operations
9
+ """
10
+
11
+ import time
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from .base import AsyncCallbackHandler
15
+
16
+ # Import OTEL functions - these are available when cua-core[telemetry] is installed
17
+ try:
18
+ from core.telemetry import (
19
+ add_breadcrumb,
20
+ capture_exception,
21
+ create_span,
22
+ is_otel_enabled,
23
+ record_error,
24
+ record_operation,
25
+ record_tokens,
26
+ set_context,
27
+ track_concurrent,
28
+ )
29
+
30
+ OTEL_AVAILABLE = True
31
+ except ImportError:
32
+ OTEL_AVAILABLE = False
33
+
34
+ def is_otel_enabled() -> bool:
35
+ return False
36
+
37
+
38
+ class OtelCallback(AsyncCallbackHandler):
39
+ """
40
+ OpenTelemetry callback handler for instrumentation.
41
+
42
+ Tracks:
43
+ - Agent session lifecycle (start/end)
44
+ - Agent run lifecycle (start/end with duration)
45
+ - Individual steps (with duration)
46
+ - Computer actions (with duration)
47
+ - Token usage
48
+ - Errors
49
+ """
50
+
51
+ def __init__(self, agent: Any):
52
+ """
53
+ Initialize OTEL callback.
54
+
55
+ Args:
56
+ agent: The ComputerAgent instance
57
+ """
58
+ self.agent = agent
59
+ self.model = getattr(agent, "model", "unknown")
60
+
61
+ # Timing state
62
+ self.run_start_time: Optional[float] = None
63
+ self.step_start_time: Optional[float] = None
64
+ self.step_count = 0
65
+
66
+ # Span management
67
+ self._session_span: Optional[Any] = None
68
+ self._run_span: Optional[Any] = None
69
+
70
+ # Track concurrent sessions
71
+ self._concurrent_tracker: Optional[Any] = None
72
+
73
+ if OTEL_AVAILABLE and is_otel_enabled():
74
+ # Set context for all events
75
+ set_context(
76
+ "agent",
77
+ {
78
+ "model": self.model,
79
+ "agent_type": self._get_agent_type(),
80
+ },
81
+ )
82
+
83
+ def _get_agent_type(self) -> str:
84
+ """Get the agent loop type name."""
85
+ if hasattr(self.agent, "agent_loop") and self.agent.agent_loop is not None:
86
+ return type(self.agent.agent_loop).__name__
87
+ return "unknown"
88
+
89
+ async def on_run_start(
90
+ self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]
91
+ ) -> None:
92
+ """Called at the start of an agent run loop."""
93
+ if not OTEL_AVAILABLE or not is_otel_enabled():
94
+ return
95
+
96
+ self.run_start_time = time.perf_counter()
97
+ self.step_count = 0
98
+
99
+ # Add breadcrumb for debugging
100
+ add_breadcrumb(
101
+ category="agent",
102
+ message=f"Agent run started with model {self.model}",
103
+ level="info",
104
+ data={
105
+ "model": self.model,
106
+ "agent_type": self._get_agent_type(),
107
+ "input_messages": len(old_items),
108
+ },
109
+ )
110
+
111
+ async def on_run_end(
112
+ self,
113
+ kwargs: Dict[str, Any],
114
+ old_items: List[Dict[str, Any]],
115
+ new_items: List[Dict[str, Any]],
116
+ ) -> None:
117
+ """Called at the end of an agent run loop."""
118
+ if not OTEL_AVAILABLE or not is_otel_enabled():
119
+ return
120
+
121
+ if self.run_start_time is not None:
122
+ duration = time.perf_counter() - self.run_start_time
123
+
124
+ # Record run metrics
125
+ record_operation(
126
+ operation="agent.run",
127
+ duration_seconds=duration,
128
+ status="success",
129
+ model=self.model,
130
+ steps=self.step_count,
131
+ )
132
+
133
+ add_breadcrumb(
134
+ category="agent",
135
+ message=f"Agent run completed in {duration:.2f}s",
136
+ level="info",
137
+ data={
138
+ "duration_seconds": duration,
139
+ "steps": self.step_count,
140
+ "output_messages": len(new_items),
141
+ },
142
+ )
143
+
144
+ self.run_start_time = None
145
+
146
+ async def on_responses(
147
+ self, kwargs: Dict[str, Any], responses: Dict[str, Any]
148
+ ) -> None:
149
+ """Called when responses are received (each step)."""
150
+ if not OTEL_AVAILABLE or not is_otel_enabled():
151
+ return
152
+
153
+ self.step_count += 1
154
+ current_time = time.perf_counter()
155
+
156
+ # Calculate step duration if we have a start time
157
+ if self.step_start_time is not None:
158
+ step_duration = current_time - self.step_start_time
159
+ record_operation(
160
+ operation="agent.step",
161
+ duration_seconds=step_duration,
162
+ status="success",
163
+ model=self.model,
164
+ step_number=self.step_count,
165
+ )
166
+
167
+ # Start timing next step
168
+ self.step_start_time = current_time
169
+
170
+ add_breadcrumb(
171
+ category="agent",
172
+ message=f"Agent step {self.step_count} completed",
173
+ level="info",
174
+ data={"step": self.step_count},
175
+ )
176
+
177
+ async def on_usage(self, usage: Dict[str, Any]) -> None:
178
+ """Called when usage information is received."""
179
+ if not OTEL_AVAILABLE or not is_otel_enabled():
180
+ return
181
+
182
+ prompt_tokens = usage.get("prompt_tokens", 0)
183
+ completion_tokens = usage.get("completion_tokens", 0)
184
+
185
+ if prompt_tokens > 0 or completion_tokens > 0:
186
+ record_tokens(
187
+ prompt_tokens=prompt_tokens,
188
+ completion_tokens=completion_tokens,
189
+ model=self.model,
190
+ )
191
+
192
+ async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
193
+ """Called when a computer call is about to start."""
194
+ if not OTEL_AVAILABLE or not is_otel_enabled():
195
+ return
196
+
197
+ action = item.get("action", {})
198
+ action_type = action.get("type", "unknown")
199
+
200
+ add_breadcrumb(
201
+ category="computer",
202
+ message=f"Computer action: {action_type}",
203
+ level="info",
204
+ data={"action_type": action_type},
205
+ )
206
+
207
+ async def on_computer_call_end(
208
+ self, item: Dict[str, Any], result: List[Dict[str, Any]]
209
+ ) -> None:
210
+ """Called when a computer call has completed."""
211
+ if not OTEL_AVAILABLE or not is_otel_enabled():
212
+ return
213
+
214
+ action = item.get("action", {})
215
+ action_type = action.get("type", "unknown")
216
+
217
+ # Record computer action metric
218
+ # Note: We don't have precise timing here, so we record with 0 duration
219
+ # The actual timing should be done in the computer module
220
+ record_operation(
221
+ operation=f"computer.action.{action_type}",
222
+ duration_seconds=0, # Timing handled elsewhere
223
+ status="success",
224
+ model=self.model,
225
+ )
226
+
227
+ async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
228
+ """Called when an LLM API call is about to start."""
229
+ if not OTEL_AVAILABLE or not is_otel_enabled():
230
+ return
231
+
232
+ add_breadcrumb(
233
+ category="llm",
234
+ message="LLM API call started",
235
+ level="info",
236
+ data={"model": self.model},
237
+ )
238
+
239
+ async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
240
+ """Called when an LLM API call has completed."""
241
+ if not OTEL_AVAILABLE or not is_otel_enabled():
242
+ return
243
+
244
+ add_breadcrumb(
245
+ category="llm",
246
+ message="LLM API call completed",
247
+ level="info",
248
+ )
249
+
250
+
251
+ class OtelErrorCallback(AsyncCallbackHandler):
252
+ """
253
+ Callback that captures errors and sends them to Sentry/OTEL.
254
+
255
+ Should be added early in the callback chain to catch all errors.
256
+ """
257
+
258
+ def __init__(self, agent: Any):
259
+ """
260
+ Initialize error callback.
261
+
262
+ Args:
263
+ agent: The ComputerAgent instance
264
+ """
265
+ self.agent = agent
266
+ self.model = getattr(agent, "model", "unknown")
267
+
268
+ async def on_error(self, error: Exception, context: Dict[str, Any]) -> None:
269
+ """Called when an error occurs during agent execution."""
270
+ if not OTEL_AVAILABLE or not is_otel_enabled():
271
+ return
272
+
273
+ error_type = type(error).__name__
274
+ operation = context.get("operation", "unknown")
275
+
276
+ # Record error metric
277
+ record_error(
278
+ error_type=error_type,
279
+ operation=operation,
280
+ model=self.model,
281
+ )
282
+
283
+ # Capture exception in Sentry
284
+ capture_exception(
285
+ error,
286
+ context={
287
+ "model": self.model,
288
+ "operation": operation,
289
+ **{k: v for k, v in context.items() if k != "operation"},
290
+ },
291
+ )