cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +4 -10
- agent/__main__.py +2 -1
- agent/adapters/__init__.py +4 -0
- agent/adapters/azure_ml_adapter.py +283 -0
- agent/adapters/cua_adapter.py +161 -0
- agent/adapters/huggingfacelocal_adapter.py +67 -125
- agent/adapters/human_adapter.py +116 -114
- agent/adapters/mlxvlm_adapter.py +110 -99
- agent/adapters/models/__init__.py +41 -0
- agent/adapters/models/generic.py +78 -0
- agent/adapters/models/internvl.py +290 -0
- agent/adapters/models/opencua.py +115 -0
- agent/adapters/models/qwen2_5_vl.py +78 -0
- agent/agent.py +337 -185
- agent/callbacks/__init__.py +9 -4
- agent/callbacks/base.py +45 -31
- agent/callbacks/budget_manager.py +22 -10
- agent/callbacks/image_retention.py +54 -98
- agent/callbacks/logging.py +55 -42
- agent/callbacks/operator_validator.py +35 -33
- agent/callbacks/otel.py +291 -0
- agent/callbacks/pii_anonymization.py +19 -16
- agent/callbacks/prompt_instructions.py +47 -0
- agent/callbacks/telemetry.py +99 -61
- agent/callbacks/trajectory_saver.py +95 -69
- agent/cli.py +269 -119
- agent/computers/__init__.py +14 -9
- agent/computers/base.py +32 -19
- agent/computers/cua.py +52 -25
- agent/computers/custom.py +78 -71
- agent/decorators.py +23 -14
- agent/human_tool/__init__.py +2 -7
- agent/human_tool/__main__.py +6 -2
- agent/human_tool/server.py +48 -37
- agent/human_tool/ui.py +359 -235
- agent/integrations/hud/__init__.py +38 -99
- agent/integrations/hud/agent.py +369 -0
- agent/integrations/hud/proxy.py +166 -52
- agent/loops/__init__.py +44 -14
- agent/loops/anthropic.py +579 -492
- agent/loops/base.py +19 -15
- agent/loops/composed_grounded.py +136 -150
- agent/loops/fara/__init__.py +8 -0
- agent/loops/fara/config.py +506 -0
- agent/loops/fara/helpers.py +357 -0
- agent/loops/fara/schema.py +143 -0
- agent/loops/gelato.py +183 -0
- agent/loops/gemini.py +935 -0
- agent/loops/generic_vlm.py +601 -0
- agent/loops/glm45v.py +140 -135
- agent/loops/gta1.py +48 -51
- agent/loops/holo.py +218 -0
- agent/loops/internvl.py +180 -0
- agent/loops/moondream3.py +493 -0
- agent/loops/omniparser.py +326 -226
- agent/loops/openai.py +50 -51
- agent/loops/opencua.py +134 -0
- agent/loops/uiins.py +175 -0
- agent/loops/uitars.py +247 -206
- agent/loops/uitars2.py +951 -0
- agent/playground/__init__.py +5 -0
- agent/playground/server.py +301 -0
- agent/proxy/examples.py +61 -57
- agent/proxy/handlers.py +46 -39
- agent/responses.py +447 -347
- agent/tools/__init__.py +24 -0
- agent/tools/base.py +253 -0
- agent/tools/browser_tool.py +423 -0
- agent/types.py +11 -5
- agent/ui/__init__.py +1 -1
- agent/ui/__main__.py +1 -1
- agent/ui/gradio/app.py +25 -22
- agent/ui/gradio/ui_components.py +314 -167
- cua_agent-0.7.16.dist-info/METADATA +85 -0
- cua_agent-0.7.16.dist-info/RECORD +79 -0
- {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
- cua_agent-0.4.22.dist-info/METADATA +0 -436
- cua_agent-0.4.22.dist-info/RECORD +0 -51
- {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
agent/callbacks/logging.py
CHANGED
|
@@ -4,17 +4,18 @@ Logging callback for ComputerAgent that provides configurable logging of agent l
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
|
-
from typing import Dict, List,
|
|
7
|
+
from typing import Any, Dict, List, Optional, Union
|
|
8
|
+
|
|
8
9
|
from .base import AsyncCallbackHandler
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
def sanitize_image_urls(data: Any) -> Any:
|
|
12
13
|
"""
|
|
13
14
|
Recursively search for 'image_url' keys and set their values to '[omitted]'.
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
Args:
|
|
16
17
|
data: Any data structure (dict, list, or primitive type)
|
|
17
|
-
|
|
18
|
+
|
|
18
19
|
Returns:
|
|
19
20
|
A deep copy of the data with all 'image_url' values replaced with '[omitted]'
|
|
20
21
|
"""
|
|
@@ -28,11 +29,11 @@ def sanitize_image_urls(data: Any) -> Any:
|
|
|
28
29
|
# Recursively sanitize the value
|
|
29
30
|
sanitized[key] = sanitize_image_urls(value)
|
|
30
31
|
return sanitized
|
|
31
|
-
|
|
32
|
+
|
|
32
33
|
elif isinstance(data, list):
|
|
33
34
|
# Recursively sanitize each item in the list
|
|
34
35
|
return [sanitize_image_urls(item) for item in data]
|
|
35
|
-
|
|
36
|
+
|
|
36
37
|
else:
|
|
37
38
|
# For primitive types (str, int, bool, None, etc.), return as-is
|
|
38
39
|
return data
|
|
@@ -41,37 +42,36 @@ def sanitize_image_urls(data: Any) -> Any:
|
|
|
41
42
|
class LoggingCallback(AsyncCallbackHandler):
|
|
42
43
|
"""
|
|
43
44
|
Callback handler that logs agent lifecycle events with configurable verbosity.
|
|
44
|
-
|
|
45
|
+
|
|
45
46
|
Logging levels:
|
|
46
47
|
- DEBUG: All events including API calls, message preprocessing, and detailed outputs
|
|
47
|
-
- INFO: Major lifecycle events (start/end, messages, outputs)
|
|
48
|
+
- INFO: Major lifecycle events (start/end, messages, outputs)
|
|
48
49
|
- WARNING: Only warnings and errors
|
|
49
50
|
- ERROR: Only errors
|
|
50
51
|
"""
|
|
51
|
-
|
|
52
|
+
|
|
52
53
|
def __init__(self, logger: Optional[logging.Logger] = None, level: int = logging.INFO):
|
|
53
54
|
"""
|
|
54
55
|
Initialize the logging callback.
|
|
55
|
-
|
|
56
|
+
|
|
56
57
|
Args:
|
|
57
58
|
logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent'
|
|
58
59
|
level: Logging level (logging.DEBUG, logging.INFO, etc.)
|
|
59
60
|
"""
|
|
60
|
-
self.logger = logger or logging.getLogger(
|
|
61
|
+
self.logger = logger or logging.getLogger("agent.ComputerAgent")
|
|
61
62
|
self.level = level
|
|
62
|
-
|
|
63
|
+
|
|
63
64
|
# Set up logger if it doesn't have handlers
|
|
64
65
|
if not self.logger.handlers:
|
|
65
66
|
handler = logging.StreamHandler()
|
|
66
|
-
formatter = logging.Formatter(
|
|
67
|
-
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
68
|
-
)
|
|
67
|
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
69
68
|
handler.setFormatter(formatter)
|
|
70
69
|
self.logger.addHandler(handler)
|
|
71
70
|
self.logger.setLevel(level)
|
|
72
|
-
|
|
71
|
+
|
|
73
72
|
def _update_usage(self, usage: Dict[str, Any]) -> None:
|
|
74
73
|
"""Update total usage statistics."""
|
|
74
|
+
|
|
75
75
|
def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
|
|
76
76
|
for key, value in source.items():
|
|
77
77
|
if isinstance(value, dict):
|
|
@@ -82,18 +82,25 @@ class LoggingCallback(AsyncCallbackHandler):
|
|
|
82
82
|
if key not in target:
|
|
83
83
|
target[key] = 0
|
|
84
84
|
target[key] += value
|
|
85
|
+
|
|
85
86
|
add_dicts(self.total_usage, usage)
|
|
86
|
-
|
|
87
|
+
|
|
87
88
|
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
88
89
|
"""Called before the run starts."""
|
|
89
90
|
self.total_usage = {}
|
|
90
|
-
|
|
91
|
+
|
|
91
92
|
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
92
93
|
"""Called when usage information is received."""
|
|
93
94
|
self._update_usage(usage)
|
|
94
95
|
|
|
95
|
-
async def on_run_end(
|
|
96
|
+
async def on_run_end(
|
|
97
|
+
self,
|
|
98
|
+
kwargs: Dict[str, Any],
|
|
99
|
+
old_items: List[Dict[str, Any]],
|
|
100
|
+
new_items: List[Dict[str, Any]],
|
|
101
|
+
) -> None:
|
|
96
102
|
"""Called after the run ends."""
|
|
103
|
+
|
|
97
104
|
def format_dict(d, indent=0):
|
|
98
105
|
lines = []
|
|
99
106
|
prefix = f" - {' ' * indent}"
|
|
@@ -106,10 +113,10 @@ class LoggingCallback(AsyncCallbackHandler):
|
|
|
106
113
|
else:
|
|
107
114
|
lines.append(f"{prefix}{key}: {value}")
|
|
108
115
|
return lines
|
|
109
|
-
|
|
116
|
+
|
|
110
117
|
formatted_output = "\n".join(format_dict(self.total_usage))
|
|
111
118
|
self.logger.info(f"Total usage:\n{formatted_output}")
|
|
112
|
-
|
|
119
|
+
|
|
113
120
|
async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
114
121
|
"""Called before LLM processing starts."""
|
|
115
122
|
if self.logger.isEnabledFor(logging.INFO):
|
|
@@ -118,27 +125,27 @@ class LoggingCallback(AsyncCallbackHandler):
|
|
|
118
125
|
sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
|
|
119
126
|
self.logger.debug(f"LLM input messages: {json.dumps(sanitized_messages, indent=2)}")
|
|
120
127
|
return messages
|
|
121
|
-
|
|
128
|
+
|
|
122
129
|
async def on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
123
130
|
"""Called after LLM processing ends."""
|
|
124
131
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
125
132
|
sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
|
|
126
133
|
self.logger.debug(f"LLM output: {json.dumps(sanitized_messages, indent=2)}")
|
|
127
134
|
return messages
|
|
128
|
-
|
|
135
|
+
|
|
129
136
|
async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
|
|
130
137
|
"""Called when a computer call starts."""
|
|
131
138
|
action = item.get("action", {})
|
|
132
139
|
action_type = action.get("type", "unknown")
|
|
133
140
|
action_args = {k: v for k, v in action.items() if k != "type"}
|
|
134
|
-
|
|
141
|
+
|
|
135
142
|
# INFO level logging for the action
|
|
136
143
|
self.logger.info(f"Computer: {action_type}({action_args})")
|
|
137
|
-
|
|
144
|
+
|
|
138
145
|
# DEBUG level logging for full details
|
|
139
146
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
140
147
|
self.logger.debug(f"Computer call started: {json.dumps(action, indent=2)}")
|
|
141
|
-
|
|
148
|
+
|
|
142
149
|
async def on_computer_call_end(self, item: Dict[str, Any], result: Any) -> None:
|
|
143
150
|
"""Called when a computer call ends."""
|
|
144
151
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
@@ -147,48 +154,52 @@ class LoggingCallback(AsyncCallbackHandler):
|
|
|
147
154
|
if result:
|
|
148
155
|
sanitized_result = sanitize_image_urls(result)
|
|
149
156
|
self.logger.debug(f"Computer call result: {json.dumps(sanitized_result, indent=2)}")
|
|
150
|
-
|
|
157
|
+
|
|
151
158
|
async def on_function_call_start(self, item: Dict[str, Any]) -> None:
|
|
152
159
|
"""Called when a function call starts."""
|
|
153
160
|
name = item.get("name", "unknown")
|
|
154
161
|
arguments = item.get("arguments", "{}")
|
|
155
|
-
|
|
162
|
+
|
|
156
163
|
# INFO level logging for the function call
|
|
157
164
|
self.logger.info(f"Function: {name}({arguments})")
|
|
158
|
-
|
|
165
|
+
|
|
159
166
|
# DEBUG level logging for full details
|
|
160
167
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
161
168
|
self.logger.debug(f"Function call started: {name}")
|
|
162
|
-
|
|
169
|
+
|
|
163
170
|
async def on_function_call_end(self, item: Dict[str, Any], result: Any) -> None:
|
|
164
171
|
"""Called when a function call ends."""
|
|
165
172
|
# INFO level logging for function output (similar to function_call_output)
|
|
166
173
|
if result:
|
|
167
174
|
# Handle both list and direct result formats
|
|
168
175
|
if isinstance(result, list) and len(result) > 0:
|
|
169
|
-
output =
|
|
176
|
+
output = (
|
|
177
|
+
result[0].get("output", str(result))
|
|
178
|
+
if isinstance(result[0], dict)
|
|
179
|
+
else str(result[0])
|
|
180
|
+
)
|
|
170
181
|
else:
|
|
171
182
|
output = str(result)
|
|
172
|
-
|
|
183
|
+
|
|
173
184
|
# Truncate long outputs
|
|
174
185
|
if len(output) > 100:
|
|
175
186
|
output = output[:100] + "..."
|
|
176
|
-
|
|
187
|
+
|
|
177
188
|
self.logger.info(f"Output: {output}")
|
|
178
|
-
|
|
189
|
+
|
|
179
190
|
# DEBUG level logging for full details
|
|
180
191
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
181
192
|
name = item.get("name", "unknown")
|
|
182
193
|
self.logger.debug(f"Function call completed: {name}")
|
|
183
194
|
if result:
|
|
184
195
|
self.logger.debug(f"Function call result: {json.dumps(result, indent=2)}")
|
|
185
|
-
|
|
196
|
+
|
|
186
197
|
async def on_text(self, item: Dict[str, Any]) -> None:
|
|
187
198
|
"""Called when a text message is encountered."""
|
|
188
199
|
# Get the role to determine if it's Agent or User
|
|
189
200
|
role = item.get("role", "unknown")
|
|
190
201
|
content_items = item.get("content", [])
|
|
191
|
-
|
|
202
|
+
|
|
192
203
|
# Process content items to build display text
|
|
193
204
|
text_parts = []
|
|
194
205
|
for content_item in content_items:
|
|
@@ -206,10 +217,10 @@ class LoggingCallback(AsyncCallbackHandler):
|
|
|
206
217
|
else:
|
|
207
218
|
# Non-text content, show as [type]
|
|
208
219
|
text_parts.append(f"[{content_type}]")
|
|
209
|
-
|
|
220
|
+
|
|
210
221
|
# Join all text parts
|
|
211
|
-
display_text =
|
|
212
|
-
|
|
222
|
+
display_text = "".join(text_parts) if text_parts else "[empty]"
|
|
223
|
+
|
|
213
224
|
# Log with appropriate level and format
|
|
214
225
|
if role == "assistant":
|
|
215
226
|
self.logger.info(f"Agent: {display_text}")
|
|
@@ -219,7 +230,7 @@ class LoggingCallback(AsyncCallbackHandler):
|
|
|
219
230
|
# Fallback for unknown roles, use debug level
|
|
220
231
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
221
232
|
self.logger.debug(f"Text message ({role}): {display_text}")
|
|
222
|
-
|
|
233
|
+
|
|
223
234
|
async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
|
|
224
235
|
"""Called when an API call is about to start."""
|
|
225
236
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
@@ -232,16 +243,18 @@ class LoggingCallback(AsyncCallbackHandler):
|
|
|
232
243
|
elif "input" in kwargs:
|
|
233
244
|
sanitized_input = sanitize_image_urls(kwargs["input"])
|
|
234
245
|
self.logger.debug(f"API call input: {json.dumps(sanitized_input, indent=2)}")
|
|
235
|
-
|
|
246
|
+
|
|
236
247
|
async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
|
|
237
248
|
"""Called when an API call has completed."""
|
|
238
249
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
239
250
|
model = kwargs.get("model", "unknown")
|
|
240
251
|
self.logger.debug(f"API call completed for model: {model}")
|
|
241
|
-
self.logger.debug(
|
|
252
|
+
self.logger.debug(
|
|
253
|
+
f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}"
|
|
254
|
+
)
|
|
242
255
|
|
|
243
256
|
async def on_screenshot(self, item: Union[str, bytes], name: str = "screenshot") -> None:
|
|
244
257
|
"""Called when a screenshot is taken."""
|
|
245
258
|
if self.logger.isEnabledFor(logging.DEBUG):
|
|
246
259
|
image_size = len(item) / 1024
|
|
247
|
-
self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")
|
|
260
|
+
self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")
|
|
@@ -9,6 +9,7 @@ Ensures agent output actions conform to expected schemas by fixing common issues
|
|
|
9
9
|
This runs in on_llm_end, which receives the output array (AgentMessage[] as dicts).
|
|
10
10
|
The purpose is to avoid spending another LLM call to fix broken computer call syntax when possible.
|
|
11
11
|
"""
|
|
12
|
+
|
|
12
13
|
from __future__ import annotations
|
|
13
14
|
|
|
14
15
|
from typing import Any, Dict, List
|
|
@@ -48,6 +49,7 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
|
|
|
48
49
|
action["type"] = "type"
|
|
49
50
|
|
|
50
51
|
action_type = action.get("type")
|
|
52
|
+
|
|
51
53
|
def _keep_keys(action: Dict[str, Any], keys_to_keep: List[str]):
|
|
52
54
|
"""Keep only the provided keys on action; delete everything else.
|
|
53
55
|
Always ensures required 'type' is present if listed in keys_to_keep.
|
|
@@ -55,6 +57,7 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
|
|
|
55
57
|
for key in list(action.keys()):
|
|
56
58
|
if key not in keys_to_keep:
|
|
57
59
|
del action[key]
|
|
60
|
+
|
|
58
61
|
# rename "coordinate" to "x", "y"
|
|
59
62
|
if "coordinate" in action:
|
|
60
63
|
action["x"] = action["coordinate"][0]
|
|
@@ -100,39 +103,38 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
|
|
|
100
103
|
keep = required_keys_by_type.get(action_type or "")
|
|
101
104
|
if keep:
|
|
102
105
|
_keep_keys(action, keep)
|
|
103
|
-
|
|
104
106
|
|
|
105
|
-
# Second pass: if an assistant message is immediately followed by a computer_call,
|
|
106
|
-
# replace the assistant message itself with a reasoning message with summary text.
|
|
107
|
-
if isinstance(output, list):
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
107
|
+
# # Second pass: if an assistant message is immediately followed by a computer_call,
|
|
108
|
+
# # replace the assistant message itself with a reasoning message with summary text.
|
|
109
|
+
# if isinstance(output, list):
|
|
110
|
+
# for i, item in enumerate(output):
|
|
111
|
+
# # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
|
|
112
|
+
# if item.get("type") == "message" and item.get("role") == "assistant":
|
|
113
|
+
# next_idx = i + 1
|
|
114
|
+
# if next_idx >= len(output):
|
|
115
|
+
# continue
|
|
116
|
+
# next_item = output[next_idx]
|
|
117
|
+
# if not isinstance(next_item, dict):
|
|
118
|
+
# continue
|
|
119
|
+
# if next_item.get("type") != "computer_call":
|
|
120
|
+
# continue
|
|
121
|
+
# contents = item.get("content") or []
|
|
122
|
+
# # Extract text from OutputContent[]
|
|
123
|
+
# text_parts: List[str] = []
|
|
124
|
+
# if isinstance(contents, list):
|
|
125
|
+
# for c in contents:
|
|
126
|
+
# if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
|
|
127
|
+
# text_parts.append(c["text"])
|
|
128
|
+
# text_content = "\n".join(text_parts).strip()
|
|
129
|
+
# # Replace assistant message with reasoning message
|
|
130
|
+
# output[i] = {
|
|
131
|
+
# "type": "reasoning",
|
|
132
|
+
# "summary": [
|
|
133
|
+
# {
|
|
134
|
+
# "type": "summary_text",
|
|
135
|
+
# "text": text_content,
|
|
136
|
+
# }
|
|
137
|
+
# ],
|
|
138
|
+
# }
|
|
137
139
|
|
|
138
140
|
return output
|
agent/callbacks/otel.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenTelemetry callback handler for Computer-Use Agent (cua-agent).
|
|
3
|
+
|
|
4
|
+
Instruments agent operations for the Four Golden Signals:
|
|
5
|
+
- Latency: Operation duration
|
|
6
|
+
- Traffic: Operation counts
|
|
7
|
+
- Errors: Error counts
|
|
8
|
+
- Saturation: Concurrent operations
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
|
|
14
|
+
from .base import AsyncCallbackHandler
|
|
15
|
+
|
|
16
|
+
# Import OTEL functions - these are available when cua-core[telemetry] is installed
|
|
17
|
+
try:
|
|
18
|
+
from core.telemetry import (
|
|
19
|
+
add_breadcrumb,
|
|
20
|
+
capture_exception,
|
|
21
|
+
create_span,
|
|
22
|
+
is_otel_enabled,
|
|
23
|
+
record_error,
|
|
24
|
+
record_operation,
|
|
25
|
+
record_tokens,
|
|
26
|
+
set_context,
|
|
27
|
+
track_concurrent,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
OTEL_AVAILABLE = True
|
|
31
|
+
except ImportError:
|
|
32
|
+
OTEL_AVAILABLE = False
|
|
33
|
+
|
|
34
|
+
def is_otel_enabled() -> bool:
|
|
35
|
+
return False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class OtelCallback(AsyncCallbackHandler):
|
|
39
|
+
"""
|
|
40
|
+
OpenTelemetry callback handler for instrumentation.
|
|
41
|
+
|
|
42
|
+
Tracks:
|
|
43
|
+
- Agent session lifecycle (start/end)
|
|
44
|
+
- Agent run lifecycle (start/end with duration)
|
|
45
|
+
- Individual steps (with duration)
|
|
46
|
+
- Computer actions (with duration)
|
|
47
|
+
- Token usage
|
|
48
|
+
- Errors
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, agent: Any):
|
|
52
|
+
"""
|
|
53
|
+
Initialize OTEL callback.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
agent: The ComputerAgent instance
|
|
57
|
+
"""
|
|
58
|
+
self.agent = agent
|
|
59
|
+
self.model = getattr(agent, "model", "unknown")
|
|
60
|
+
|
|
61
|
+
# Timing state
|
|
62
|
+
self.run_start_time: Optional[float] = None
|
|
63
|
+
self.step_start_time: Optional[float] = None
|
|
64
|
+
self.step_count = 0
|
|
65
|
+
|
|
66
|
+
# Span management
|
|
67
|
+
self._session_span: Optional[Any] = None
|
|
68
|
+
self._run_span: Optional[Any] = None
|
|
69
|
+
|
|
70
|
+
# Track concurrent sessions
|
|
71
|
+
self._concurrent_tracker: Optional[Any] = None
|
|
72
|
+
|
|
73
|
+
if OTEL_AVAILABLE and is_otel_enabled():
|
|
74
|
+
# Set context for all events
|
|
75
|
+
set_context(
|
|
76
|
+
"agent",
|
|
77
|
+
{
|
|
78
|
+
"model": self.model,
|
|
79
|
+
"agent_type": self._get_agent_type(),
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def _get_agent_type(self) -> str:
|
|
84
|
+
"""Get the agent loop type name."""
|
|
85
|
+
if hasattr(self.agent, "agent_loop") and self.agent.agent_loop is not None:
|
|
86
|
+
return type(self.agent.agent_loop).__name__
|
|
87
|
+
return "unknown"
|
|
88
|
+
|
|
89
|
+
async def on_run_start(
|
|
90
|
+
self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Called at the start of an agent run loop."""
|
|
93
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
self.run_start_time = time.perf_counter()
|
|
97
|
+
self.step_count = 0
|
|
98
|
+
|
|
99
|
+
# Add breadcrumb for debugging
|
|
100
|
+
add_breadcrumb(
|
|
101
|
+
category="agent",
|
|
102
|
+
message=f"Agent run started with model {self.model}",
|
|
103
|
+
level="info",
|
|
104
|
+
data={
|
|
105
|
+
"model": self.model,
|
|
106
|
+
"agent_type": self._get_agent_type(),
|
|
107
|
+
"input_messages": len(old_items),
|
|
108
|
+
},
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
async def on_run_end(
|
|
112
|
+
self,
|
|
113
|
+
kwargs: Dict[str, Any],
|
|
114
|
+
old_items: List[Dict[str, Any]],
|
|
115
|
+
new_items: List[Dict[str, Any]],
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Called at the end of an agent run loop."""
|
|
118
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
if self.run_start_time is not None:
|
|
122
|
+
duration = time.perf_counter() - self.run_start_time
|
|
123
|
+
|
|
124
|
+
# Record run metrics
|
|
125
|
+
record_operation(
|
|
126
|
+
operation="agent.run",
|
|
127
|
+
duration_seconds=duration,
|
|
128
|
+
status="success",
|
|
129
|
+
model=self.model,
|
|
130
|
+
steps=self.step_count,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
add_breadcrumb(
|
|
134
|
+
category="agent",
|
|
135
|
+
message=f"Agent run completed in {duration:.2f}s",
|
|
136
|
+
level="info",
|
|
137
|
+
data={
|
|
138
|
+
"duration_seconds": duration,
|
|
139
|
+
"steps": self.step_count,
|
|
140
|
+
"output_messages": len(new_items),
|
|
141
|
+
},
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
self.run_start_time = None
|
|
145
|
+
|
|
146
|
+
async def on_responses(
|
|
147
|
+
self, kwargs: Dict[str, Any], responses: Dict[str, Any]
|
|
148
|
+
) -> None:
|
|
149
|
+
"""Called when responses are received (each step)."""
|
|
150
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
self.step_count += 1
|
|
154
|
+
current_time = time.perf_counter()
|
|
155
|
+
|
|
156
|
+
# Calculate step duration if we have a start time
|
|
157
|
+
if self.step_start_time is not None:
|
|
158
|
+
step_duration = current_time - self.step_start_time
|
|
159
|
+
record_operation(
|
|
160
|
+
operation="agent.step",
|
|
161
|
+
duration_seconds=step_duration,
|
|
162
|
+
status="success",
|
|
163
|
+
model=self.model,
|
|
164
|
+
step_number=self.step_count,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Start timing next step
|
|
168
|
+
self.step_start_time = current_time
|
|
169
|
+
|
|
170
|
+
add_breadcrumb(
|
|
171
|
+
category="agent",
|
|
172
|
+
message=f"Agent step {self.step_count} completed",
|
|
173
|
+
level="info",
|
|
174
|
+
data={"step": self.step_count},
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
178
|
+
"""Called when usage information is received."""
|
|
179
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
183
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
184
|
+
|
|
185
|
+
if prompt_tokens > 0 or completion_tokens > 0:
|
|
186
|
+
record_tokens(
|
|
187
|
+
prompt_tokens=prompt_tokens,
|
|
188
|
+
completion_tokens=completion_tokens,
|
|
189
|
+
model=self.model,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
|
|
193
|
+
"""Called when a computer call is about to start."""
|
|
194
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
action = item.get("action", {})
|
|
198
|
+
action_type = action.get("type", "unknown")
|
|
199
|
+
|
|
200
|
+
add_breadcrumb(
|
|
201
|
+
category="computer",
|
|
202
|
+
message=f"Computer action: {action_type}",
|
|
203
|
+
level="info",
|
|
204
|
+
data={"action_type": action_type},
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
async def on_computer_call_end(
|
|
208
|
+
self, item: Dict[str, Any], result: List[Dict[str, Any]]
|
|
209
|
+
) -> None:
|
|
210
|
+
"""Called when a computer call has completed."""
|
|
211
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
action = item.get("action", {})
|
|
215
|
+
action_type = action.get("type", "unknown")
|
|
216
|
+
|
|
217
|
+
# Record computer action metric
|
|
218
|
+
# Note: We don't have precise timing here, so we record with 0 duration
|
|
219
|
+
# The actual timing should be done in the computer module
|
|
220
|
+
record_operation(
|
|
221
|
+
operation=f"computer.action.{action_type}",
|
|
222
|
+
duration_seconds=0, # Timing handled elsewhere
|
|
223
|
+
status="success",
|
|
224
|
+
model=self.model,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
|
|
228
|
+
"""Called when an LLM API call is about to start."""
|
|
229
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
add_breadcrumb(
|
|
233
|
+
category="llm",
|
|
234
|
+
message="LLM API call started",
|
|
235
|
+
level="info",
|
|
236
|
+
data={"model": self.model},
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
|
|
240
|
+
"""Called when an LLM API call has completed."""
|
|
241
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
242
|
+
return
|
|
243
|
+
|
|
244
|
+
add_breadcrumb(
|
|
245
|
+
category="llm",
|
|
246
|
+
message="LLM API call completed",
|
|
247
|
+
level="info",
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class OtelErrorCallback(AsyncCallbackHandler):
|
|
252
|
+
"""
|
|
253
|
+
Callback that captures errors and sends them to Sentry/OTEL.
|
|
254
|
+
|
|
255
|
+
Should be added early in the callback chain to catch all errors.
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
def __init__(self, agent: Any):
|
|
259
|
+
"""
|
|
260
|
+
Initialize error callback.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
agent: The ComputerAgent instance
|
|
264
|
+
"""
|
|
265
|
+
self.agent = agent
|
|
266
|
+
self.model = getattr(agent, "model", "unknown")
|
|
267
|
+
|
|
268
|
+
async def on_error(self, error: Exception, context: Dict[str, Any]) -> None:
|
|
269
|
+
"""Called when an error occurs during agent execution."""
|
|
270
|
+
if not OTEL_AVAILABLE or not is_otel_enabled():
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
error_type = type(error).__name__
|
|
274
|
+
operation = context.get("operation", "unknown")
|
|
275
|
+
|
|
276
|
+
# Record error metric
|
|
277
|
+
record_error(
|
|
278
|
+
error_type=error_type,
|
|
279
|
+
operation=operation,
|
|
280
|
+
model=self.model,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Capture exception in Sentry
|
|
284
|
+
capture_exception(
|
|
285
|
+
error,
|
|
286
|
+
context={
|
|
287
|
+
"model": self.model,
|
|
288
|
+
"operation": operation,
|
|
289
|
+
**{k: v for k, v in context.items() if k != "operation"},
|
|
290
|
+
},
|
|
291
|
+
)
|