cua-agent 0.3.2__py3-none-any.whl → 0.4.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (111) hide show
  1. agent/__init__.py +15 -51
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +216 -0
  5. agent/agent.py +577 -0
  6. agent/callbacks/__init__.py +17 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/trajectory_saver.py +305 -0
  13. agent/cli.py +290 -0
  14. agent/computer_handler.py +107 -0
  15. agent/decorators.py +90 -0
  16. agent/loops/__init__.py +11 -0
  17. agent/loops/anthropic.py +728 -0
  18. agent/loops/omniparser.py +339 -0
  19. agent/loops/openai.py +95 -0
  20. agent/loops/uitars.py +688 -0
  21. agent/responses.py +207 -0
  22. agent/types.py +79 -0
  23. agent/ui/__init__.py +7 -1
  24. agent/ui/gradio/__init__.py +6 -19
  25. agent/ui/gradio/app.py +80 -1299
  26. agent/ui/gradio/ui_components.py +703 -0
  27. cua_agent-0.4.0b1.dist-info/METADATA +424 -0
  28. cua_agent-0.4.0b1.dist-info/RECORD +30 -0
  29. agent/core/__init__.py +0 -27
  30. agent/core/agent.py +0 -210
  31. agent/core/base.py +0 -217
  32. agent/core/callbacks.py +0 -200
  33. agent/core/experiment.py +0 -249
  34. agent/core/factory.py +0 -122
  35. agent/core/messages.py +0 -332
  36. agent/core/provider_config.py +0 -21
  37. agent/core/telemetry.py +0 -142
  38. agent/core/tools/__init__.py +0 -21
  39. agent/core/tools/base.py +0 -74
  40. agent/core/tools/bash.py +0 -52
  41. agent/core/tools/collection.py +0 -46
  42. agent/core/tools/computer.py +0 -113
  43. agent/core/tools/edit.py +0 -67
  44. agent/core/tools/manager.py +0 -56
  45. agent/core/tools.py +0 -32
  46. agent/core/types.py +0 -88
  47. agent/core/visualization.py +0 -197
  48. agent/providers/__init__.py +0 -4
  49. agent/providers/anthropic/__init__.py +0 -6
  50. agent/providers/anthropic/api/client.py +0 -360
  51. agent/providers/anthropic/api/logging.py +0 -150
  52. agent/providers/anthropic/api_handler.py +0 -140
  53. agent/providers/anthropic/callbacks/__init__.py +0 -5
  54. agent/providers/anthropic/callbacks/manager.py +0 -65
  55. agent/providers/anthropic/loop.py +0 -568
  56. agent/providers/anthropic/prompts.py +0 -23
  57. agent/providers/anthropic/response_handler.py +0 -226
  58. agent/providers/anthropic/tools/__init__.py +0 -33
  59. agent/providers/anthropic/tools/base.py +0 -88
  60. agent/providers/anthropic/tools/bash.py +0 -66
  61. agent/providers/anthropic/tools/collection.py +0 -34
  62. agent/providers/anthropic/tools/computer.py +0 -396
  63. agent/providers/anthropic/tools/edit.py +0 -326
  64. agent/providers/anthropic/tools/manager.py +0 -54
  65. agent/providers/anthropic/tools/run.py +0 -42
  66. agent/providers/anthropic/types.py +0 -16
  67. agent/providers/anthropic/utils.py +0 -381
  68. agent/providers/omni/__init__.py +0 -8
  69. agent/providers/omni/api_handler.py +0 -42
  70. agent/providers/omni/clients/anthropic.py +0 -103
  71. agent/providers/omni/clients/base.py +0 -35
  72. agent/providers/omni/clients/oaicompat.py +0 -195
  73. agent/providers/omni/clients/ollama.py +0 -122
  74. agent/providers/omni/clients/openai.py +0 -155
  75. agent/providers/omni/clients/utils.py +0 -25
  76. agent/providers/omni/image_utils.py +0 -34
  77. agent/providers/omni/loop.py +0 -990
  78. agent/providers/omni/parser.py +0 -307
  79. agent/providers/omni/prompts.py +0 -64
  80. agent/providers/omni/tools/__init__.py +0 -30
  81. agent/providers/omni/tools/base.py +0 -29
  82. agent/providers/omni/tools/bash.py +0 -74
  83. agent/providers/omni/tools/computer.py +0 -179
  84. agent/providers/omni/tools/manager.py +0 -61
  85. agent/providers/omni/utils.py +0 -236
  86. agent/providers/openai/__init__.py +0 -6
  87. agent/providers/openai/api_handler.py +0 -456
  88. agent/providers/openai/loop.py +0 -472
  89. agent/providers/openai/response_handler.py +0 -205
  90. agent/providers/openai/tools/__init__.py +0 -15
  91. agent/providers/openai/tools/base.py +0 -79
  92. agent/providers/openai/tools/computer.py +0 -326
  93. agent/providers/openai/tools/manager.py +0 -106
  94. agent/providers/openai/types.py +0 -36
  95. agent/providers/openai/utils.py +0 -98
  96. agent/providers/uitars/__init__.py +0 -1
  97. agent/providers/uitars/clients/base.py +0 -35
  98. agent/providers/uitars/clients/mlxvlm.py +0 -263
  99. agent/providers/uitars/clients/oaicompat.py +0 -214
  100. agent/providers/uitars/loop.py +0 -660
  101. agent/providers/uitars/prompts.py +0 -63
  102. agent/providers/uitars/tools/__init__.py +0 -1
  103. agent/providers/uitars/tools/computer.py +0 -283
  104. agent/providers/uitars/tools/manager.py +0 -60
  105. agent/providers/uitars/utils.py +0 -264
  106. agent/telemetry.py +0 -21
  107. agent/ui/__main__.py +0 -15
  108. cua_agent-0.3.2.dist-info/METADATA +0 -295
  109. cua_agent-0.3.2.dist-info/RECORD +0 -87
  110. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b1.dist-info}/WHEEL +0 -0
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,153 @@
1
+ """
2
+ Base callback handler interface for ComputerAgent preprocessing and postprocessing hooks.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import List, Dict, Any, Optional, Union
7
+
8
+
9
+ class AsyncCallbackHandler(ABC):
10
+ """
11
+ Base class for async callback handlers that can preprocess messages before
12
+ the agent loop and postprocess output after the agent loop.
13
+ """
14
+
15
+ async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
16
+ """Called at the start of an agent run loop."""
17
+ pass
18
+
19
+ async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
20
+ """Called at the end of an agent run loop."""
21
+ pass
22
+
23
+ async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
24
+ """Called during agent run loop to determine if execution should continue.
25
+
26
+ Args:
27
+ kwargs: Run arguments
28
+ old_items: Original messages
29
+ new_items: New messages generated during run
30
+
31
+ Returns:
32
+ True to continue execution, False to stop
33
+ """
34
+ return True
35
+
36
+ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
37
+ """
38
+ Called before messages are sent to the agent loop.
39
+
40
+ Args:
41
+ messages: List of message dictionaries to preprocess
42
+
43
+ Returns:
44
+ List of preprocessed message dictionaries
45
+ """
46
+ return messages
47
+
48
+ async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
49
+ """
50
+ Called after the agent loop returns output.
51
+
52
+ Args:
53
+ output: List of output message dictionaries to postprocess
54
+
55
+ Returns:
56
+ List of postprocessed output dictionaries
57
+ """
58
+ return output
59
+
60
+ async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
61
+ """
62
+ Called when a computer call is about to start.
63
+
64
+ Args:
65
+ item: The computer call item dictionary
66
+ """
67
+ pass
68
+
69
+ async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
70
+ """
71
+ Called when a computer call has completed.
72
+
73
+ Args:
74
+ item: The computer call item dictionary
75
+ result: The result of the computer call
76
+ """
77
+ pass
78
+
79
+ async def on_function_call_start(self, item: Dict[str, Any]) -> None:
80
+ """
81
+ Called when a function call is about to start.
82
+
83
+ Args:
84
+ item: The function call item dictionary
85
+ """
86
+ pass
87
+
88
+ async def on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
89
+ """
90
+ Called when a function call has completed.
91
+
92
+ Args:
93
+ item: The function call item dictionary
94
+ result: The result of the function call
95
+ """
96
+ pass
97
+
98
+ async def on_text(self, item: Dict[str, Any]) -> None:
99
+ """
100
+ Called when a text message is encountered.
101
+
102
+ Args:
103
+ item: The message item dictionary
104
+ """
105
+ pass
106
+
107
+ async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
108
+ """
109
+ Called when an API call is about to start.
110
+
111
+ Args:
112
+ kwargs: The kwargs being passed to the API call
113
+ """
114
+ pass
115
+
116
+ async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
117
+ """
118
+ Called when an API call has completed.
119
+
120
+ Args:
121
+ kwargs: The kwargs that were passed to the API call
122
+ result: The result of the API call
123
+ """
124
+ pass
125
+
126
+ async def on_usage(self, usage: Dict[str, Any]) -> None:
127
+ """
128
+ Called when usage information is received.
129
+
130
+ Args:
131
+ usage: The usage information
132
+ """
133
+ pass
134
+
135
+ async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
136
+ """
137
+ Called when a screenshot is taken.
138
+
139
+ Args:
140
+ screenshot: The screenshot image
141
+ name: The name of the screenshot
142
+ """
143
+ pass
144
+
145
+ async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
146
+ """
147
+ Called when responses are received.
148
+
149
+ Args:
150
+ kwargs: The kwargs being passed to the agent loop
151
+ responses: The responses received
152
+ """
153
+ pass
@@ -0,0 +1,44 @@
1
+ from typing import Dict, List, Any
2
+ from .base import AsyncCallbackHandler
3
+
4
+ class BudgetExceededError(Exception):
5
+ """Exception raised when budget is exceeded."""
6
+ pass
7
+
8
+ class BudgetManagerCallback(AsyncCallbackHandler):
9
+ """Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""
10
+
11
+ def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False):
12
+ """
13
+ Initialize BudgetManagerCallback.
14
+
15
+ Args:
16
+ max_budget: Maximum budget allowed
17
+ reset_after_each_run: Whether to reset budget after each run
18
+ raise_error: Whether to raise an error when budget is exceeded
19
+ """
20
+ self.max_budget = max_budget
21
+ self.reset_after_each_run = reset_after_each_run
22
+ self.raise_error = raise_error
23
+ self.total_cost = 0.0
24
+
25
+ async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
26
+ """Reset budget if configured to do so."""
27
+ if self.reset_after_each_run:
28
+ self.total_cost = 0.0
29
+
30
+ async def on_usage(self, usage: Dict[str, Any]) -> None:
31
+ """Track usage costs."""
32
+ if "response_cost" in usage:
33
+ self.total_cost += usage["response_cost"]
34
+
35
+ async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
36
+ """Check if budget allows continuation."""
37
+ if self.total_cost >= self.max_budget:
38
+ if self.raise_error:
39
+ raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
40
+ else:
41
+ print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
42
+ return False
43
+ return True
44
+
@@ -0,0 +1,139 @@
1
+ """
2
+ Image retention callback handler that limits the number of recent images in message history.
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional
6
+ from .base import AsyncCallbackHandler
7
+
8
+
9
+ class ImageRetentionCallback(AsyncCallbackHandler):
10
+ """
11
+ Callback handler that applies image retention policy to limit the number
12
+ of recent images in message history to prevent context window overflow.
13
+ """
14
+
15
+ def __init__(self, only_n_most_recent_images: Optional[int] = None):
16
+ """
17
+ Initialize the image retention callback.
18
+
19
+ Args:
20
+ only_n_most_recent_images: If set, only keep the N most recent images in message history
21
+ """
22
+ self.only_n_most_recent_images = only_n_most_recent_images
23
+
24
+ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
25
+ """
26
+ Apply image retention policy to messages before sending to agent loop.
27
+
28
+ Args:
29
+ messages: List of message dictionaries
30
+
31
+ Returns:
32
+ List of messages with image retention policy applied
33
+ """
34
+ if self.only_n_most_recent_images is None:
35
+ return messages
36
+
37
+ return self._apply_image_retention(messages)
38
+
39
+ def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
40
+ """Apply image retention policy to keep only the N most recent images.
41
+
42
+ Removes computer_call_output items with image_url and their corresponding computer_call items,
43
+ keeping only the most recent N image pairs based on only_n_most_recent_images setting.
44
+
45
+ Args:
46
+ messages: List of message dictionaries
47
+
48
+ Returns:
49
+ Filtered list of messages with image retention applied
50
+ """
51
+ if self.only_n_most_recent_images is None:
52
+ return messages
53
+
54
+ # First pass: Assign call_id to reasoning items based on the next computer_call
55
+ messages_with_call_ids = []
56
+ for i, msg in enumerate(messages):
57
+ msg_copy = msg.copy() if isinstance(msg, dict) else msg
58
+
59
+ # If this is a reasoning item without a call_id, find the next computer_call
60
+ if (msg_copy.get("type") == "reasoning" and
61
+ not msg_copy.get("call_id")):
62
+ # Look ahead for the next computer_call
63
+ for j in range(i + 1, len(messages)):
64
+ next_msg = messages[j]
65
+ if (next_msg.get("type") == "computer_call" and
66
+ next_msg.get("call_id")):
67
+ msg_copy["call_id"] = next_msg.get("call_id")
68
+ break
69
+
70
+ messages_with_call_ids.append(msg_copy)
71
+
72
+ # Find all computer_call_output items with images and their call_ids
73
+ image_call_ids = []
74
+ for msg in reversed(messages_with_call_ids): # Process in reverse to get most recent first
75
+ if (msg.get("type") == "computer_call_output" and
76
+ isinstance(msg.get("output"), dict) and
77
+ "image_url" in msg.get("output", {})):
78
+ call_id = msg.get("call_id")
79
+ if call_id and call_id not in image_call_ids:
80
+ image_call_ids.append(call_id)
81
+ if len(image_call_ids) >= self.only_n_most_recent_images:
82
+ break
83
+
84
+ # Keep the most recent N image call_ids (reverse to get chronological order)
85
+ keep_call_ids = set(image_call_ids[:self.only_n_most_recent_images])
86
+
87
+ # Filter messages: remove computer_call, computer_call_output, and reasoning for old images
88
+ filtered_messages = []
89
+ for msg in messages_with_call_ids:
90
+ msg_type = msg.get("type")
91
+ call_id = msg.get("call_id")
92
+
93
+ # Remove old computer_call items
94
+ if msg_type == "computer_call" and call_id not in keep_call_ids:
95
+ # Check if this call_id corresponds to an image call
96
+ has_image_output = any(
97
+ m.get("type") == "computer_call_output" and
98
+ m.get("call_id") == call_id and
99
+ isinstance(m.get("output"), dict) and
100
+ "image_url" in m.get("output", {})
101
+ for m in messages_with_call_ids
102
+ )
103
+ if has_image_output:
104
+ continue # Skip this computer_call
105
+
106
+ # Remove old computer_call_output items with images
107
+ if (msg_type == "computer_call_output" and
108
+ call_id not in keep_call_ids and
109
+ isinstance(msg.get("output"), dict) and
110
+ "image_url" in msg.get("output", {})):
111
+ continue # Skip this computer_call_output
112
+
113
+ # Remove old reasoning items that are paired with removed computer calls
114
+ if (msg_type == "reasoning" and
115
+ call_id and call_id not in keep_call_ids):
116
+ # Check if this call_id corresponds to an image call that's being removed
117
+ has_image_output = any(
118
+ m.get("type") == "computer_call_output" and
119
+ m.get("call_id") == call_id and
120
+ isinstance(m.get("output"), dict) and
121
+ "image_url" in m.get("output", {})
122
+ for m in messages_with_call_ids
123
+ )
124
+ if has_image_output:
125
+ continue # Skip this reasoning item
126
+
127
+ filtered_messages.append(msg)
128
+
129
+ # Clean up: Remove call_id from reasoning items before returning
130
+ final_messages = []
131
+ for msg in filtered_messages:
132
+ if msg.get("type") == "reasoning" and "call_id" in msg:
133
+ # Create a copy without call_id for reasoning items
134
+ cleaned_msg = {k: v for k, v in msg.items() if k != "call_id"}
135
+ final_messages.append(cleaned_msg)
136
+ else:
137
+ final_messages.append(msg)
138
+
139
+ return final_messages
@@ -0,0 +1,247 @@
1
+ """
2
+ Logging callback for ComputerAgent that provides configurable logging of agent lifecycle events.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from typing import Dict, List, Any, Optional, Union
8
+ from .base import AsyncCallbackHandler
9
+
10
+
11
+ def sanitize_image_urls(data: Any) -> Any:
12
+ """
13
+ Recursively search for 'image_url' keys and set their values to '[omitted]'.
14
+
15
+ Args:
16
+ data: Any data structure (dict, list, or primitive type)
17
+
18
+ Returns:
19
+ A deep copy of the data with all 'image_url' values replaced with '[omitted]'
20
+ """
21
+ if isinstance(data, dict):
22
+ # Create a copy of the dictionary
23
+ sanitized = {}
24
+ for key, value in data.items():
25
+ if key == "image_url":
26
+ sanitized[key] = "[omitted]"
27
+ else:
28
+ # Recursively sanitize the value
29
+ sanitized[key] = sanitize_image_urls(value)
30
+ return sanitized
31
+
32
+ elif isinstance(data, list):
33
+ # Recursively sanitize each item in the list
34
+ return [sanitize_image_urls(item) for item in data]
35
+
36
+ else:
37
+ # For primitive types (str, int, bool, None, etc.), return as-is
38
+ return data
39
+
40
+
41
+ class LoggingCallback(AsyncCallbackHandler):
42
+ """
43
+ Callback handler that logs agent lifecycle events with configurable verbosity.
44
+
45
+ Logging levels:
46
+ - DEBUG: All events including API calls, message preprocessing, and detailed outputs
47
+ - INFO: Major lifecycle events (start/end, messages, outputs)
48
+ - WARNING: Only warnings and errors
49
+ - ERROR: Only errors
50
+ """
51
+
52
+ def __init__(self, logger: Optional[logging.Logger] = None, level: int = logging.INFO):
53
+ """
54
+ Initialize the logging callback.
55
+
56
+ Args:
57
+ logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent'
58
+ level: Logging level (logging.DEBUG, logging.INFO, etc.)
59
+ """
60
+ self.logger = logger or logging.getLogger('agent.ComputerAgent')
61
+ self.level = level
62
+
63
+ # Set up logger if it doesn't have handlers
64
+ if not self.logger.handlers:
65
+ handler = logging.StreamHandler()
66
+ formatter = logging.Formatter(
67
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
68
+ )
69
+ handler.setFormatter(formatter)
70
+ self.logger.addHandler(handler)
71
+ self.logger.setLevel(level)
72
+
73
+ def _update_usage(self, usage: Dict[str, Any]) -> None:
74
+ """Update total usage statistics."""
75
+ def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
76
+ for key, value in source.items():
77
+ if isinstance(value, dict):
78
+ if key not in target:
79
+ target[key] = {}
80
+ add_dicts(target[key], value)
81
+ else:
82
+ if key not in target:
83
+ target[key] = 0
84
+ target[key] += value
85
+ add_dicts(self.total_usage, usage)
86
+
87
+ async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
88
+ """Called before the run starts."""
89
+ self.total_usage = {}
90
+
91
+ async def on_usage(self, usage: Dict[str, Any]) -> None:
92
+ """Called when usage information is received."""
93
+ self._update_usage(usage)
94
+
95
+ async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
96
+ """Called after the run ends."""
97
+ def format_dict(d, indent=0):
98
+ lines = []
99
+ prefix = f" - {' ' * indent}"
100
+ for key, value in d.items():
101
+ if isinstance(value, dict):
102
+ lines.append(f"{prefix}{key}:")
103
+ lines.extend(format_dict(value, indent + 1))
104
+ elif isinstance(value, float):
105
+ lines.append(f"{prefix}{key}: ${value:.4f}")
106
+ else:
107
+ lines.append(f"{prefix}{key}: {value}")
108
+ return lines
109
+
110
+ formatted_output = "\n".join(format_dict(self.total_usage))
111
+ self.logger.info(f"Total usage:\n{formatted_output}")
112
+
113
+ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
114
+ """Called before LLM processing starts."""
115
+ if self.logger.isEnabledFor(logging.INFO):
116
+ self.logger.info(f"LLM processing started with {len(messages)} messages")
117
+ if self.logger.isEnabledFor(logging.DEBUG):
118
+ sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
119
+ self.logger.debug(f"LLM input messages: {json.dumps(sanitized_messages, indent=2)}")
120
+ return messages
121
+
122
+ async def on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
123
+ """Called after LLM processing ends."""
124
+ if self.logger.isEnabledFor(logging.DEBUG):
125
+ sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
126
+ self.logger.debug(f"LLM output: {json.dumps(sanitized_messages, indent=2)}")
127
+ return messages
128
+
129
+ async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
130
+ """Called when a computer call starts."""
131
+ action = item.get("action", {})
132
+ action_type = action.get("type", "unknown")
133
+ action_args = {k: v for k, v in action.items() if k != "type"}
134
+
135
+ # INFO level logging for the action
136
+ self.logger.info(f"Computer: {action_type}({action_args})")
137
+
138
+ # DEBUG level logging for full details
139
+ if self.logger.isEnabledFor(logging.DEBUG):
140
+ self.logger.debug(f"Computer call started: {json.dumps(action, indent=2)}")
141
+
142
+ async def on_computer_call_end(self, item: Dict[str, Any], result: Any) -> None:
143
+ """Called when a computer call ends."""
144
+ if self.logger.isEnabledFor(logging.DEBUG):
145
+ action = item.get("action", "unknown")
146
+ self.logger.debug(f"Computer call completed: {json.dumps(action, indent=2)}")
147
+ if result:
148
+ sanitized_result = sanitize_image_urls(result)
149
+ self.logger.debug(f"Computer call result: {json.dumps(sanitized_result, indent=2)}")
150
+
151
+ async def on_function_call_start(self, item: Dict[str, Any]) -> None:
152
+ """Called when a function call starts."""
153
+ name = item.get("name", "unknown")
154
+ arguments = item.get("arguments", "{}")
155
+
156
+ # INFO level logging for the function call
157
+ self.logger.info(f"Function: {name}({arguments})")
158
+
159
+ # DEBUG level logging for full details
160
+ if self.logger.isEnabledFor(logging.DEBUG):
161
+ self.logger.debug(f"Function call started: {name}")
162
+
163
+ async def on_function_call_end(self, item: Dict[str, Any], result: Any) -> None:
164
+ """Called when a function call ends."""
165
+ # INFO level logging for function output (similar to function_call_output)
166
+ if result:
167
+ # Handle both list and direct result formats
168
+ if isinstance(result, list) and len(result) > 0:
169
+ output = result[0].get("output", str(result)) if isinstance(result[0], dict) else str(result[0])
170
+ else:
171
+ output = str(result)
172
+
173
+ # Truncate long outputs
174
+ if len(output) > 100:
175
+ output = output[:100] + "..."
176
+
177
+ self.logger.info(f"Output: {output}")
178
+
179
+ # DEBUG level logging for full details
180
+ if self.logger.isEnabledFor(logging.DEBUG):
181
+ name = item.get("name", "unknown")
182
+ self.logger.debug(f"Function call completed: {name}")
183
+ if result:
184
+ self.logger.debug(f"Function call result: {json.dumps(result, indent=2)}")
185
+
186
+ async def on_text(self, item: Dict[str, Any]) -> None:
187
+ """Called when a text message is encountered."""
188
+ # Get the role to determine if it's Agent or User
189
+ role = item.get("role", "unknown")
190
+ content_items = item.get("content", [])
191
+
192
+ # Process content items to build display text
193
+ text_parts = []
194
+ for content_item in content_items:
195
+ content_type = content_item.get("type", "output_text")
196
+ if content_type == "output_text":
197
+ text_content = content_item.get("text", "")
198
+ if not text_content.strip():
199
+ text_parts.append("[empty]")
200
+ else:
201
+ # Truncate long text and add ellipsis
202
+ if len(text_content) > 2048:
203
+ text_parts.append(text_content[:2048] + "...")
204
+ else:
205
+ text_parts.append(text_content)
206
+ else:
207
+ # Non-text content, show as [type]
208
+ text_parts.append(f"[{content_type}]")
209
+
210
+ # Join all text parts
211
+ display_text = ''.join(text_parts) if text_parts else "[empty]"
212
+
213
+ # Log with appropriate level and format
214
+ if role == "assistant":
215
+ self.logger.info(f"Agent: {display_text}")
216
+ elif role == "user":
217
+ self.logger.info(f"User: {display_text}")
218
+ else:
219
+ # Fallback for unknown roles, use debug level
220
+ if self.logger.isEnabledFor(logging.DEBUG):
221
+ self.logger.debug(f"Text message ({role}): {display_text}")
222
+
223
+ async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
224
+ """Called when an API call is about to start."""
225
+ if self.logger.isEnabledFor(logging.DEBUG):
226
+ model = kwargs.get("model", "unknown")
227
+ self.logger.debug(f"API call starting for model: {model}")
228
+ # Log sanitized messages if present
229
+ if "messages" in kwargs:
230
+ sanitized_messages = sanitize_image_urls(kwargs["messages"])
231
+ self.logger.debug(f"API call messages: {json.dumps(sanitized_messages, indent=2)}")
232
+ elif "input" in kwargs:
233
+ sanitized_input = sanitize_image_urls(kwargs["input"])
234
+ self.logger.debug(f"API call input: {json.dumps(sanitized_input, indent=2)}")
235
+
236
+ async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
237
+ """Called when an API call has completed."""
238
+ if self.logger.isEnabledFor(logging.DEBUG):
239
+ model = kwargs.get("model", "unknown")
240
+ self.logger.debug(f"API call completed for model: {model}")
241
+ self.logger.debug(f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}")
242
+
243
+ async def on_screenshot(self, item: Union[str, bytes], name: str = "screenshot") -> None:
244
+ """Called when a screenshot is taken."""
245
+ if self.logger.isEnabledFor(logging.DEBUG):
246
+ image_size = len(item) / 1024
247
+ self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")