cua-agent 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +21 -12
- agent/__main__.py +21 -0
- agent/adapters/__init__.py +9 -0
- agent/adapters/huggingfacelocal_adapter.py +229 -0
- agent/agent.py +594 -0
- agent/callbacks/__init__.py +19 -0
- agent/callbacks/base.py +153 -0
- agent/callbacks/budget_manager.py +44 -0
- agent/callbacks/image_retention.py +139 -0
- agent/callbacks/logging.py +247 -0
- agent/callbacks/pii_anonymization.py +259 -0
- agent/callbacks/telemetry.py +210 -0
- agent/callbacks/trajectory_saver.py +305 -0
- agent/cli.py +297 -0
- agent/computer_handler.py +107 -0
- agent/decorators.py +90 -0
- agent/loops/__init__.py +11 -0
- agent/loops/anthropic.py +728 -0
- agent/loops/omniparser.py +339 -0
- agent/loops/openai.py +95 -0
- agent/loops/uitars.py +688 -0
- agent/responses.py +207 -0
- agent/telemetry.py +135 -14
- agent/types.py +79 -0
- agent/ui/__init__.py +7 -1
- agent/ui/__main__.py +2 -13
- agent/ui/gradio/__init__.py +6 -19
- agent/ui/gradio/app.py +94 -1313
- agent/ui/gradio/ui_components.py +721 -0
- cua_agent-0.4.0.dist-info/METADATA +424 -0
- cua_agent-0.4.0.dist-info/RECORD +33 -0
- {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +1 -1
- agent/core/__init__.py +0 -27
- agent/core/agent.py +0 -210
- agent/core/base.py +0 -217
- agent/core/callbacks.py +0 -200
- agent/core/experiment.py +0 -249
- agent/core/factory.py +0 -122
- agent/core/messages.py +0 -332
- agent/core/provider_config.py +0 -21
- agent/core/telemetry.py +0 -142
- agent/core/tools/__init__.py +0 -21
- agent/core/tools/base.py +0 -74
- agent/core/tools/bash.py +0 -52
- agent/core/tools/collection.py +0 -46
- agent/core/tools/computer.py +0 -113
- agent/core/tools/edit.py +0 -67
- agent/core/tools/manager.py +0 -56
- agent/core/tools.py +0 -32
- agent/core/types.py +0 -88
- agent/core/visualization.py +0 -197
- agent/providers/__init__.py +0 -4
- agent/providers/anthropic/__init__.py +0 -6
- agent/providers/anthropic/api/client.py +0 -360
- agent/providers/anthropic/api/logging.py +0 -150
- agent/providers/anthropic/api_handler.py +0 -140
- agent/providers/anthropic/callbacks/__init__.py +0 -5
- agent/providers/anthropic/callbacks/manager.py +0 -65
- agent/providers/anthropic/loop.py +0 -568
- agent/providers/anthropic/prompts.py +0 -23
- agent/providers/anthropic/response_handler.py +0 -226
- agent/providers/anthropic/tools/__init__.py +0 -33
- agent/providers/anthropic/tools/base.py +0 -88
- agent/providers/anthropic/tools/bash.py +0 -66
- agent/providers/anthropic/tools/collection.py +0 -34
- agent/providers/anthropic/tools/computer.py +0 -396
- agent/providers/anthropic/tools/edit.py +0 -326
- agent/providers/anthropic/tools/manager.py +0 -54
- agent/providers/anthropic/tools/run.py +0 -42
- agent/providers/anthropic/types.py +0 -16
- agent/providers/anthropic/utils.py +0 -367
- agent/providers/omni/__init__.py +0 -8
- agent/providers/omni/api_handler.py +0 -42
- agent/providers/omni/clients/anthropic.py +0 -103
- agent/providers/omni/clients/base.py +0 -35
- agent/providers/omni/clients/oaicompat.py +0 -195
- agent/providers/omni/clients/ollama.py +0 -122
- agent/providers/omni/clients/openai.py +0 -155
- agent/providers/omni/clients/utils.py +0 -25
- agent/providers/omni/image_utils.py +0 -34
- agent/providers/omni/loop.py +0 -990
- agent/providers/omni/parser.py +0 -307
- agent/providers/omni/prompts.py +0 -64
- agent/providers/omni/tools/__init__.py +0 -30
- agent/providers/omni/tools/base.py +0 -29
- agent/providers/omni/tools/bash.py +0 -74
- agent/providers/omni/tools/computer.py +0 -179
- agent/providers/omni/tools/manager.py +0 -61
- agent/providers/omni/utils.py +0 -236
- agent/providers/openai/__init__.py +0 -6
- agent/providers/openai/api_handler.py +0 -456
- agent/providers/openai/loop.py +0 -472
- agent/providers/openai/response_handler.py +0 -205
- agent/providers/openai/tools/__init__.py +0 -15
- agent/providers/openai/tools/base.py +0 -79
- agent/providers/openai/tools/computer.py +0 -326
- agent/providers/openai/tools/manager.py +0 -106
- agent/providers/openai/types.py +0 -36
- agent/providers/openai/utils.py +0 -98
- agent/providers/uitars/__init__.py +0 -1
- agent/providers/uitars/clients/base.py +0 -35
- agent/providers/uitars/clients/mlxvlm.py +0 -263
- agent/providers/uitars/clients/oaicompat.py +0 -214
- agent/providers/uitars/loop.py +0 -660
- agent/providers/uitars/prompts.py +0 -63
- agent/providers/uitars/tools/__init__.py +0 -1
- agent/providers/uitars/tools/computer.py +0 -283
- agent/providers/uitars/tools/manager.py +0 -60
- agent/providers/uitars/utils.py +0 -264
- cua_agent-0.3.1.dist-info/METADATA +0 -295
- cua_agent-0.3.1.dist-info/RECORD +0 -87
- {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
agent/callbacks/base.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base callback handler interface for ComputerAgent preprocessing and postprocessing hooks.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import List, Dict, Any, Optional, Union
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AsyncCallbackHandler(ABC):
|
|
10
|
+
"""
|
|
11
|
+
Base class for async callback handlers that can preprocess messages before
|
|
12
|
+
the agent loop and postprocess output after the agent loop.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
16
|
+
"""Called at the start of an agent run loop."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
|
|
20
|
+
"""Called at the end of an agent run loop."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
|
|
24
|
+
"""Called during agent run loop to determine if execution should continue.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
kwargs: Run arguments
|
|
28
|
+
old_items: Original messages
|
|
29
|
+
new_items: New messages generated during run
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
True to continue execution, False to stop
|
|
33
|
+
"""
|
|
34
|
+
return True
|
|
35
|
+
|
|
36
|
+
async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
37
|
+
"""
|
|
38
|
+
Called before messages are sent to the agent loop.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
messages: List of message dictionaries to preprocess
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
List of preprocessed message dictionaries
|
|
45
|
+
"""
|
|
46
|
+
return messages
|
|
47
|
+
|
|
48
|
+
async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
49
|
+
"""
|
|
50
|
+
Called after the agent loop returns output.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
output: List of output message dictionaries to postprocess
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
List of postprocessed output dictionaries
|
|
57
|
+
"""
|
|
58
|
+
return output
|
|
59
|
+
|
|
60
|
+
async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
|
|
61
|
+
"""
|
|
62
|
+
Called when a computer call is about to start.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
item: The computer call item dictionary
|
|
66
|
+
"""
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Called when a computer call has completed.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
item: The computer call item dictionary
|
|
75
|
+
result: The result of the computer call
|
|
76
|
+
"""
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
async def on_function_call_start(self, item: Dict[str, Any]) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Called when a function call is about to start.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
item: The function call item dictionary
|
|
85
|
+
"""
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
async def on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
|
|
89
|
+
"""
|
|
90
|
+
Called when a function call has completed.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
item: The function call item dictionary
|
|
94
|
+
result: The result of the function call
|
|
95
|
+
"""
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
async def on_text(self, item: Dict[str, Any]) -> None:
|
|
99
|
+
"""
|
|
100
|
+
Called when a text message is encountered.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
item: The message item dictionary
|
|
104
|
+
"""
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
|
|
108
|
+
"""
|
|
109
|
+
Called when an API call is about to start.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
kwargs: The kwargs being passed to the API call
|
|
113
|
+
"""
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
|
|
117
|
+
"""
|
|
118
|
+
Called when an API call has completed.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
kwargs: The kwargs that were passed to the API call
|
|
122
|
+
result: The result of the API call
|
|
123
|
+
"""
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Called when usage information is received.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
usage: The usage information
|
|
132
|
+
"""
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
|
|
136
|
+
"""
|
|
137
|
+
Called when a screenshot is taken.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
screenshot: The screenshot image
|
|
141
|
+
name: The name of the screenshot
|
|
142
|
+
"""
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Called when responses are received.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
kwargs: The kwargs being passed to the agent loop
|
|
151
|
+
responses: The responses received
|
|
152
|
+
"""
|
|
153
|
+
pass
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Dict, List, Any
|
|
2
|
+
from .base import AsyncCallbackHandler
|
|
3
|
+
|
|
4
|
+
class BudgetExceededError(Exception):
|
|
5
|
+
"""Exception raised when budget is exceeded."""
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
class BudgetManagerCallback(AsyncCallbackHandler):
|
|
9
|
+
"""Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False):
|
|
12
|
+
"""
|
|
13
|
+
Initialize BudgetManagerCallback.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
max_budget: Maximum budget allowed
|
|
17
|
+
reset_after_each_run: Whether to reset budget after each run
|
|
18
|
+
raise_error: Whether to raise an error when budget is exceeded
|
|
19
|
+
"""
|
|
20
|
+
self.max_budget = max_budget
|
|
21
|
+
self.reset_after_each_run = reset_after_each_run
|
|
22
|
+
self.raise_error = raise_error
|
|
23
|
+
self.total_cost = 0.0
|
|
24
|
+
|
|
25
|
+
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
26
|
+
"""Reset budget if configured to do so."""
|
|
27
|
+
if self.reset_after_each_run:
|
|
28
|
+
self.total_cost = 0.0
|
|
29
|
+
|
|
30
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
31
|
+
"""Track usage costs."""
|
|
32
|
+
if "response_cost" in usage:
|
|
33
|
+
self.total_cost += usage["response_cost"]
|
|
34
|
+
|
|
35
|
+
async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
|
|
36
|
+
"""Check if budget allows continuation."""
|
|
37
|
+
if self.total_cost >= self.max_budget:
|
|
38
|
+
if self.raise_error:
|
|
39
|
+
raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
40
|
+
else:
|
|
41
|
+
print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
42
|
+
return False
|
|
43
|
+
return True
|
|
44
|
+
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Image retention callback handler that limits the number of recent images in message history.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Dict, Any, Optional
|
|
6
|
+
from .base import AsyncCallbackHandler
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ImageRetentionCallback(AsyncCallbackHandler):
|
|
10
|
+
"""
|
|
11
|
+
Callback handler that applies image retention policy to limit the number
|
|
12
|
+
of recent images in message history to prevent context window overflow.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, only_n_most_recent_images: Optional[int] = None):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the image retention callback.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
only_n_most_recent_images: If set, only keep the N most recent images in message history
|
|
21
|
+
"""
|
|
22
|
+
self.only_n_most_recent_images = only_n_most_recent_images
|
|
23
|
+
|
|
24
|
+
async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
25
|
+
"""
|
|
26
|
+
Apply image retention policy to messages before sending to agent loop.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
messages: List of message dictionaries
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List of messages with image retention policy applied
|
|
33
|
+
"""
|
|
34
|
+
if self.only_n_most_recent_images is None:
|
|
35
|
+
return messages
|
|
36
|
+
|
|
37
|
+
return self._apply_image_retention(messages)
|
|
38
|
+
|
|
39
|
+
def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
40
|
+
"""Apply image retention policy to keep only the N most recent images.
|
|
41
|
+
|
|
42
|
+
Removes computer_call_output items with image_url and their corresponding computer_call items,
|
|
43
|
+
keeping only the most recent N image pairs based on only_n_most_recent_images setting.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
messages: List of message dictionaries
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Filtered list of messages with image retention applied
|
|
50
|
+
"""
|
|
51
|
+
if self.only_n_most_recent_images is None:
|
|
52
|
+
return messages
|
|
53
|
+
|
|
54
|
+
# First pass: Assign call_id to reasoning items based on the next computer_call
|
|
55
|
+
messages_with_call_ids = []
|
|
56
|
+
for i, msg in enumerate(messages):
|
|
57
|
+
msg_copy = msg.copy() if isinstance(msg, dict) else msg
|
|
58
|
+
|
|
59
|
+
# If this is a reasoning item without a call_id, find the next computer_call
|
|
60
|
+
if (msg_copy.get("type") == "reasoning" and
|
|
61
|
+
not msg_copy.get("call_id")):
|
|
62
|
+
# Look ahead for the next computer_call
|
|
63
|
+
for j in range(i + 1, len(messages)):
|
|
64
|
+
next_msg = messages[j]
|
|
65
|
+
if (next_msg.get("type") == "computer_call" and
|
|
66
|
+
next_msg.get("call_id")):
|
|
67
|
+
msg_copy["call_id"] = next_msg.get("call_id")
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
messages_with_call_ids.append(msg_copy)
|
|
71
|
+
|
|
72
|
+
# Find all computer_call_output items with images and their call_ids
|
|
73
|
+
image_call_ids = []
|
|
74
|
+
for msg in reversed(messages_with_call_ids): # Process in reverse to get most recent first
|
|
75
|
+
if (msg.get("type") == "computer_call_output" and
|
|
76
|
+
isinstance(msg.get("output"), dict) and
|
|
77
|
+
"image_url" in msg.get("output", {})):
|
|
78
|
+
call_id = msg.get("call_id")
|
|
79
|
+
if call_id and call_id not in image_call_ids:
|
|
80
|
+
image_call_ids.append(call_id)
|
|
81
|
+
if len(image_call_ids) >= self.only_n_most_recent_images:
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
# Keep the most recent N image call_ids (reverse to get chronological order)
|
|
85
|
+
keep_call_ids = set(image_call_ids[:self.only_n_most_recent_images])
|
|
86
|
+
|
|
87
|
+
# Filter messages: remove computer_call, computer_call_output, and reasoning for old images
|
|
88
|
+
filtered_messages = []
|
|
89
|
+
for msg in messages_with_call_ids:
|
|
90
|
+
msg_type = msg.get("type")
|
|
91
|
+
call_id = msg.get("call_id")
|
|
92
|
+
|
|
93
|
+
# Remove old computer_call items
|
|
94
|
+
if msg_type == "computer_call" and call_id not in keep_call_ids:
|
|
95
|
+
# Check if this call_id corresponds to an image call
|
|
96
|
+
has_image_output = any(
|
|
97
|
+
m.get("type") == "computer_call_output" and
|
|
98
|
+
m.get("call_id") == call_id and
|
|
99
|
+
isinstance(m.get("output"), dict) and
|
|
100
|
+
"image_url" in m.get("output", {})
|
|
101
|
+
for m in messages_with_call_ids
|
|
102
|
+
)
|
|
103
|
+
if has_image_output:
|
|
104
|
+
continue # Skip this computer_call
|
|
105
|
+
|
|
106
|
+
# Remove old computer_call_output items with images
|
|
107
|
+
if (msg_type == "computer_call_output" and
|
|
108
|
+
call_id not in keep_call_ids and
|
|
109
|
+
isinstance(msg.get("output"), dict) and
|
|
110
|
+
"image_url" in msg.get("output", {})):
|
|
111
|
+
continue # Skip this computer_call_output
|
|
112
|
+
|
|
113
|
+
# Remove old reasoning items that are paired with removed computer calls
|
|
114
|
+
if (msg_type == "reasoning" and
|
|
115
|
+
call_id and call_id not in keep_call_ids):
|
|
116
|
+
# Check if this call_id corresponds to an image call that's being removed
|
|
117
|
+
has_image_output = any(
|
|
118
|
+
m.get("type") == "computer_call_output" and
|
|
119
|
+
m.get("call_id") == call_id and
|
|
120
|
+
isinstance(m.get("output"), dict) and
|
|
121
|
+
"image_url" in m.get("output", {})
|
|
122
|
+
for m in messages_with_call_ids
|
|
123
|
+
)
|
|
124
|
+
if has_image_output:
|
|
125
|
+
continue # Skip this reasoning item
|
|
126
|
+
|
|
127
|
+
filtered_messages.append(msg)
|
|
128
|
+
|
|
129
|
+
# Clean up: Remove call_id from reasoning items before returning
|
|
130
|
+
final_messages = []
|
|
131
|
+
for msg in filtered_messages:
|
|
132
|
+
if msg.get("type") == "reasoning" and "call_id" in msg:
|
|
133
|
+
# Create a copy without call_id for reasoning items
|
|
134
|
+
cleaned_msg = {k: v for k, v in msg.items() if k != "call_id"}
|
|
135
|
+
final_messages.append(cleaned_msg)
|
|
136
|
+
else:
|
|
137
|
+
final_messages.append(msg)
|
|
138
|
+
|
|
139
|
+
return final_messages
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Logging callback for ComputerAgent that provides configurable logging of agent lifecycle events.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Dict, List, Any, Optional, Union
|
|
8
|
+
from .base import AsyncCallbackHandler
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def sanitize_image_urls(data: Any) -> Any:
|
|
12
|
+
"""
|
|
13
|
+
Recursively search for 'image_url' keys and set their values to '[omitted]'.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
data: Any data structure (dict, list, or primitive type)
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
A deep copy of the data with all 'image_url' values replaced with '[omitted]'
|
|
20
|
+
"""
|
|
21
|
+
if isinstance(data, dict):
|
|
22
|
+
# Create a copy of the dictionary
|
|
23
|
+
sanitized = {}
|
|
24
|
+
for key, value in data.items():
|
|
25
|
+
if key == "image_url":
|
|
26
|
+
sanitized[key] = "[omitted]"
|
|
27
|
+
else:
|
|
28
|
+
# Recursively sanitize the value
|
|
29
|
+
sanitized[key] = sanitize_image_urls(value)
|
|
30
|
+
return sanitized
|
|
31
|
+
|
|
32
|
+
elif isinstance(data, list):
|
|
33
|
+
# Recursively sanitize each item in the list
|
|
34
|
+
return [sanitize_image_urls(item) for item in data]
|
|
35
|
+
|
|
36
|
+
else:
|
|
37
|
+
# For primitive types (str, int, bool, None, etc.), return as-is
|
|
38
|
+
return data
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LoggingCallback(AsyncCallbackHandler):
|
|
42
|
+
"""
|
|
43
|
+
Callback handler that logs agent lifecycle events with configurable verbosity.
|
|
44
|
+
|
|
45
|
+
Logging levels:
|
|
46
|
+
- DEBUG: All events including API calls, message preprocessing, and detailed outputs
|
|
47
|
+
- INFO: Major lifecycle events (start/end, messages, outputs)
|
|
48
|
+
- WARNING: Only warnings and errors
|
|
49
|
+
- ERROR: Only errors
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, logger: Optional[logging.Logger] = None, level: int = logging.INFO):
|
|
53
|
+
"""
|
|
54
|
+
Initialize the logging callback.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent'
|
|
58
|
+
level: Logging level (logging.DEBUG, logging.INFO, etc.)
|
|
59
|
+
"""
|
|
60
|
+
self.logger = logger or logging.getLogger('agent.ComputerAgent')
|
|
61
|
+
self.level = level
|
|
62
|
+
|
|
63
|
+
# Set up logger if it doesn't have handlers
|
|
64
|
+
if not self.logger.handlers:
|
|
65
|
+
handler = logging.StreamHandler()
|
|
66
|
+
formatter = logging.Formatter(
|
|
67
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
68
|
+
)
|
|
69
|
+
handler.setFormatter(formatter)
|
|
70
|
+
self.logger.addHandler(handler)
|
|
71
|
+
self.logger.setLevel(level)
|
|
72
|
+
|
|
73
|
+
def _update_usage(self, usage: Dict[str, Any]) -> None:
|
|
74
|
+
"""Update total usage statistics."""
|
|
75
|
+
def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
|
|
76
|
+
for key, value in source.items():
|
|
77
|
+
if isinstance(value, dict):
|
|
78
|
+
if key not in target:
|
|
79
|
+
target[key] = {}
|
|
80
|
+
add_dicts(target[key], value)
|
|
81
|
+
else:
|
|
82
|
+
if key not in target:
|
|
83
|
+
target[key] = 0
|
|
84
|
+
target[key] += value
|
|
85
|
+
add_dicts(self.total_usage, usage)
|
|
86
|
+
|
|
87
|
+
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
88
|
+
"""Called before the run starts."""
|
|
89
|
+
self.total_usage = {}
|
|
90
|
+
|
|
91
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
92
|
+
"""Called when usage information is received."""
|
|
93
|
+
self._update_usage(usage)
|
|
94
|
+
|
|
95
|
+
async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
|
|
96
|
+
"""Called after the run ends."""
|
|
97
|
+
def format_dict(d, indent=0):
|
|
98
|
+
lines = []
|
|
99
|
+
prefix = f" - {' ' * indent}"
|
|
100
|
+
for key, value in d.items():
|
|
101
|
+
if isinstance(value, dict):
|
|
102
|
+
lines.append(f"{prefix}{key}:")
|
|
103
|
+
lines.extend(format_dict(value, indent + 1))
|
|
104
|
+
elif isinstance(value, float):
|
|
105
|
+
lines.append(f"{prefix}{key}: ${value:.4f}")
|
|
106
|
+
else:
|
|
107
|
+
lines.append(f"{prefix}{key}: {value}")
|
|
108
|
+
return lines
|
|
109
|
+
|
|
110
|
+
formatted_output = "\n".join(format_dict(self.total_usage))
|
|
111
|
+
self.logger.info(f"Total usage:\n{formatted_output}")
|
|
112
|
+
|
|
113
|
+
async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
114
|
+
"""Called before LLM processing starts."""
|
|
115
|
+
if self.logger.isEnabledFor(logging.INFO):
|
|
116
|
+
self.logger.info(f"LLM processing started with {len(messages)} messages")
|
|
117
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
118
|
+
sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
|
|
119
|
+
self.logger.debug(f"LLM input messages: {json.dumps(sanitized_messages, indent=2)}")
|
|
120
|
+
return messages
|
|
121
|
+
|
|
122
|
+
async def on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
123
|
+
"""Called after LLM processing ends."""
|
|
124
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
125
|
+
sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
|
|
126
|
+
self.logger.debug(f"LLM output: {json.dumps(sanitized_messages, indent=2)}")
|
|
127
|
+
return messages
|
|
128
|
+
|
|
129
|
+
async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
|
|
130
|
+
"""Called when a computer call starts."""
|
|
131
|
+
action = item.get("action", {})
|
|
132
|
+
action_type = action.get("type", "unknown")
|
|
133
|
+
action_args = {k: v for k, v in action.items() if k != "type"}
|
|
134
|
+
|
|
135
|
+
# INFO level logging for the action
|
|
136
|
+
self.logger.info(f"Computer: {action_type}({action_args})")
|
|
137
|
+
|
|
138
|
+
# DEBUG level logging for full details
|
|
139
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
140
|
+
self.logger.debug(f"Computer call started: {json.dumps(action, indent=2)}")
|
|
141
|
+
|
|
142
|
+
async def on_computer_call_end(self, item: Dict[str, Any], result: Any) -> None:
|
|
143
|
+
"""Called when a computer call ends."""
|
|
144
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
145
|
+
action = item.get("action", "unknown")
|
|
146
|
+
self.logger.debug(f"Computer call completed: {json.dumps(action, indent=2)}")
|
|
147
|
+
if result:
|
|
148
|
+
sanitized_result = sanitize_image_urls(result)
|
|
149
|
+
self.logger.debug(f"Computer call result: {json.dumps(sanitized_result, indent=2)}")
|
|
150
|
+
|
|
151
|
+
async def on_function_call_start(self, item: Dict[str, Any]) -> None:
|
|
152
|
+
"""Called when a function call starts."""
|
|
153
|
+
name = item.get("name", "unknown")
|
|
154
|
+
arguments = item.get("arguments", "{}")
|
|
155
|
+
|
|
156
|
+
# INFO level logging for the function call
|
|
157
|
+
self.logger.info(f"Function: {name}({arguments})")
|
|
158
|
+
|
|
159
|
+
# DEBUG level logging for full details
|
|
160
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
161
|
+
self.logger.debug(f"Function call started: {name}")
|
|
162
|
+
|
|
163
|
+
async def on_function_call_end(self, item: Dict[str, Any], result: Any) -> None:
|
|
164
|
+
"""Called when a function call ends."""
|
|
165
|
+
# INFO level logging for function output (similar to function_call_output)
|
|
166
|
+
if result:
|
|
167
|
+
# Handle both list and direct result formats
|
|
168
|
+
if isinstance(result, list) and len(result) > 0:
|
|
169
|
+
output = result[0].get("output", str(result)) if isinstance(result[0], dict) else str(result[0])
|
|
170
|
+
else:
|
|
171
|
+
output = str(result)
|
|
172
|
+
|
|
173
|
+
# Truncate long outputs
|
|
174
|
+
if len(output) > 100:
|
|
175
|
+
output = output[:100] + "..."
|
|
176
|
+
|
|
177
|
+
self.logger.info(f"Output: {output}")
|
|
178
|
+
|
|
179
|
+
# DEBUG level logging for full details
|
|
180
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
181
|
+
name = item.get("name", "unknown")
|
|
182
|
+
self.logger.debug(f"Function call completed: {name}")
|
|
183
|
+
if result:
|
|
184
|
+
self.logger.debug(f"Function call result: {json.dumps(result, indent=2)}")
|
|
185
|
+
|
|
186
|
+
async def on_text(self, item: Dict[str, Any]) -> None:
|
|
187
|
+
"""Called when a text message is encountered."""
|
|
188
|
+
# Get the role to determine if it's Agent or User
|
|
189
|
+
role = item.get("role", "unknown")
|
|
190
|
+
content_items = item.get("content", [])
|
|
191
|
+
|
|
192
|
+
# Process content items to build display text
|
|
193
|
+
text_parts = []
|
|
194
|
+
for content_item in content_items:
|
|
195
|
+
content_type = content_item.get("type", "output_text")
|
|
196
|
+
if content_type == "output_text":
|
|
197
|
+
text_content = content_item.get("text", "")
|
|
198
|
+
if not text_content.strip():
|
|
199
|
+
text_parts.append("[empty]")
|
|
200
|
+
else:
|
|
201
|
+
# Truncate long text and add ellipsis
|
|
202
|
+
if len(text_content) > 2048:
|
|
203
|
+
text_parts.append(text_content[:2048] + "...")
|
|
204
|
+
else:
|
|
205
|
+
text_parts.append(text_content)
|
|
206
|
+
else:
|
|
207
|
+
# Non-text content, show as [type]
|
|
208
|
+
text_parts.append(f"[{content_type}]")
|
|
209
|
+
|
|
210
|
+
# Join all text parts
|
|
211
|
+
display_text = ''.join(text_parts) if text_parts else "[empty]"
|
|
212
|
+
|
|
213
|
+
# Log with appropriate level and format
|
|
214
|
+
if role == "assistant":
|
|
215
|
+
self.logger.info(f"Agent: {display_text}")
|
|
216
|
+
elif role == "user":
|
|
217
|
+
self.logger.info(f"User: {display_text}")
|
|
218
|
+
else:
|
|
219
|
+
# Fallback for unknown roles, use debug level
|
|
220
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
221
|
+
self.logger.debug(f"Text message ({role}): {display_text}")
|
|
222
|
+
|
|
223
|
+
async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
|
|
224
|
+
"""Called when an API call is about to start."""
|
|
225
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
226
|
+
model = kwargs.get("model", "unknown")
|
|
227
|
+
self.logger.debug(f"API call starting for model: {model}")
|
|
228
|
+
# Log sanitized messages if present
|
|
229
|
+
if "messages" in kwargs:
|
|
230
|
+
sanitized_messages = sanitize_image_urls(kwargs["messages"])
|
|
231
|
+
self.logger.debug(f"API call messages: {json.dumps(sanitized_messages, indent=2)}")
|
|
232
|
+
elif "input" in kwargs:
|
|
233
|
+
sanitized_input = sanitize_image_urls(kwargs["input"])
|
|
234
|
+
self.logger.debug(f"API call input: {json.dumps(sanitized_input, indent=2)}")
|
|
235
|
+
|
|
236
|
+
async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
|
|
237
|
+
"""Called when an API call has completed."""
|
|
238
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
239
|
+
model = kwargs.get("model", "unknown")
|
|
240
|
+
self.logger.debug(f"API call completed for model: {model}")
|
|
241
|
+
self.logger.debug(f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}")
|
|
242
|
+
|
|
243
|
+
async def on_screenshot(self, item: Union[str, bytes], name: str = "screenshot") -> None:
|
|
244
|
+
"""Called when a screenshot is taken."""
|
|
245
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
246
|
+
image_size = len(item) / 1024
|
|
247
|
+
self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")
|