cua-agent 0.3.1__py3-none-any.whl → 0.4.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (111) hide show
  1. agent/__init__.py +15 -51
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +216 -0
  5. agent/agent.py +577 -0
  6. agent/callbacks/__init__.py +17 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/trajectory_saver.py +305 -0
  13. agent/cli.py +290 -0
  14. agent/computer_handler.py +107 -0
  15. agent/decorators.py +90 -0
  16. agent/loops/__init__.py +11 -0
  17. agent/loops/anthropic.py +728 -0
  18. agent/loops/omniparser.py +339 -0
  19. agent/loops/openai.py +95 -0
  20. agent/loops/uitars.py +688 -0
  21. agent/responses.py +207 -0
  22. agent/types.py +79 -0
  23. agent/ui/__init__.py +7 -1
  24. agent/ui/gradio/__init__.py +6 -19
  25. agent/ui/gradio/app.py +80 -1299
  26. agent/ui/gradio/ui_components.py +703 -0
  27. cua_agent-0.4.0b1.dist-info/METADATA +424 -0
  28. cua_agent-0.4.0b1.dist-info/RECORD +30 -0
  29. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0b1.dist-info}/WHEEL +1 -1
  30. agent/core/__init__.py +0 -27
  31. agent/core/agent.py +0 -210
  32. agent/core/base.py +0 -217
  33. agent/core/callbacks.py +0 -200
  34. agent/core/experiment.py +0 -249
  35. agent/core/factory.py +0 -122
  36. agent/core/messages.py +0 -332
  37. agent/core/provider_config.py +0 -21
  38. agent/core/telemetry.py +0 -142
  39. agent/core/tools/__init__.py +0 -21
  40. agent/core/tools/base.py +0 -74
  41. agent/core/tools/bash.py +0 -52
  42. agent/core/tools/collection.py +0 -46
  43. agent/core/tools/computer.py +0 -113
  44. agent/core/tools/edit.py +0 -67
  45. agent/core/tools/manager.py +0 -56
  46. agent/core/tools.py +0 -32
  47. agent/core/types.py +0 -88
  48. agent/core/visualization.py +0 -197
  49. agent/providers/__init__.py +0 -4
  50. agent/providers/anthropic/__init__.py +0 -6
  51. agent/providers/anthropic/api/client.py +0 -360
  52. agent/providers/anthropic/api/logging.py +0 -150
  53. agent/providers/anthropic/api_handler.py +0 -140
  54. agent/providers/anthropic/callbacks/__init__.py +0 -5
  55. agent/providers/anthropic/callbacks/manager.py +0 -65
  56. agent/providers/anthropic/loop.py +0 -568
  57. agent/providers/anthropic/prompts.py +0 -23
  58. agent/providers/anthropic/response_handler.py +0 -226
  59. agent/providers/anthropic/tools/__init__.py +0 -33
  60. agent/providers/anthropic/tools/base.py +0 -88
  61. agent/providers/anthropic/tools/bash.py +0 -66
  62. agent/providers/anthropic/tools/collection.py +0 -34
  63. agent/providers/anthropic/tools/computer.py +0 -396
  64. agent/providers/anthropic/tools/edit.py +0 -326
  65. agent/providers/anthropic/tools/manager.py +0 -54
  66. agent/providers/anthropic/tools/run.py +0 -42
  67. agent/providers/anthropic/types.py +0 -16
  68. agent/providers/anthropic/utils.py +0 -367
  69. agent/providers/omni/__init__.py +0 -8
  70. agent/providers/omni/api_handler.py +0 -42
  71. agent/providers/omni/clients/anthropic.py +0 -103
  72. agent/providers/omni/clients/base.py +0 -35
  73. agent/providers/omni/clients/oaicompat.py +0 -195
  74. agent/providers/omni/clients/ollama.py +0 -122
  75. agent/providers/omni/clients/openai.py +0 -155
  76. agent/providers/omni/clients/utils.py +0 -25
  77. agent/providers/omni/image_utils.py +0 -34
  78. agent/providers/omni/loop.py +0 -990
  79. agent/providers/omni/parser.py +0 -307
  80. agent/providers/omni/prompts.py +0 -64
  81. agent/providers/omni/tools/__init__.py +0 -30
  82. agent/providers/omni/tools/base.py +0 -29
  83. agent/providers/omni/tools/bash.py +0 -74
  84. agent/providers/omni/tools/computer.py +0 -179
  85. agent/providers/omni/tools/manager.py +0 -61
  86. agent/providers/omni/utils.py +0 -236
  87. agent/providers/openai/__init__.py +0 -6
  88. agent/providers/openai/api_handler.py +0 -456
  89. agent/providers/openai/loop.py +0 -472
  90. agent/providers/openai/response_handler.py +0 -205
  91. agent/providers/openai/tools/__init__.py +0 -15
  92. agent/providers/openai/tools/base.py +0 -79
  93. agent/providers/openai/tools/computer.py +0 -326
  94. agent/providers/openai/tools/manager.py +0 -106
  95. agent/providers/openai/types.py +0 -36
  96. agent/providers/openai/utils.py +0 -98
  97. agent/providers/uitars/__init__.py +0 -1
  98. agent/providers/uitars/clients/base.py +0 -35
  99. agent/providers/uitars/clients/mlxvlm.py +0 -263
  100. agent/providers/uitars/clients/oaicompat.py +0 -214
  101. agent/providers/uitars/loop.py +0 -660
  102. agent/providers/uitars/prompts.py +0 -63
  103. agent/providers/uitars/tools/__init__.py +0 -1
  104. agent/providers/uitars/tools/computer.py +0 -283
  105. agent/providers/uitars/tools/manager.py +0 -60
  106. agent/providers/uitars/utils.py +0 -264
  107. agent/telemetry.py +0 -21
  108. agent/ui/__main__.py +0 -15
  109. cua_agent-0.3.1.dist-info/METADATA +0 -295
  110. cua_agent-0.3.1.dist-info/RECORD +0 -87
  111. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0b1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,259 @@
1
+ """
2
+ PII anonymization callback handler using Microsoft Presidio for text and image redaction.
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional, Tuple
6
+ from .base import AsyncCallbackHandler
7
+ import base64
8
+ import io
9
+ import logging
10
+
11
+ try:
12
+ from presidio_analyzer import AnalyzerEngine
13
+ from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine
14
+ from presidio_anonymizer.entities import RecognizerResult, OperatorConfig
15
+ from presidio_image_redactor import ImageRedactorEngine
16
+ from PIL import Image
17
+ PRESIDIO_AVAILABLE = True
18
+ except ImportError:
19
+ PRESIDIO_AVAILABLE = False
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class PIIAnonymizationCallback(AsyncCallbackHandler):
24
+ """
25
+ Callback handler that anonymizes PII in text and images using Microsoft Presidio.
26
+
27
+ This handler:
28
+ 1. Anonymizes PII in messages before sending to the agent loop
29
+ 2. Deanonymizes PII in tool calls and message outputs after the agent loop
30
+ 3. Redacts PII from images in computer_call_output messages
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ anonymize_text: bool = True,
36
+ anonymize_images: bool = True,
37
+ entities_to_anonymize: Optional[List[str]] = None,
38
+ anonymization_operator: str = "replace",
39
+ image_redaction_color: Tuple[int, int, int] = (255, 192, 203) # Pink
40
+ ):
41
+ """
42
+ Initialize the PII anonymization callback.
43
+
44
+ Args:
45
+ anonymize_text: Whether to anonymize text content
46
+ anonymize_images: Whether to redact images
47
+ entities_to_anonymize: List of entity types to anonymize (None for all)
48
+ anonymization_operator: Presidio operator to use ("replace", "mask", "redact", etc.)
49
+ image_redaction_color: RGB color for image redaction
50
+ """
51
+ if not PRESIDIO_AVAILABLE:
52
+ raise ImportError(
53
+ "Presidio is not available. Install with: "
54
+ "pip install presidio-analyzer presidio-anonymizer presidio-image-redactor"
55
+ )
56
+
57
+ self.anonymize_text = anonymize_text
58
+ self.anonymize_images = anonymize_images
59
+ self.entities_to_anonymize = entities_to_anonymize
60
+ self.anonymization_operator = anonymization_operator
61
+ self.image_redaction_color = image_redaction_color
62
+
63
+ # Initialize Presidio engines
64
+ self.analyzer = AnalyzerEngine()
65
+ self.anonymizer = AnonymizerEngine()
66
+ self.deanonymizer = DeanonymizeEngine()
67
+ self.image_redactor = ImageRedactorEngine()
68
+
69
+ # Store anonymization mappings for deanonymization
70
+ self.anonymization_mappings: Dict[str, Any] = {}
71
+
72
+ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
73
+ """
74
+ Anonymize PII in messages before sending to agent loop.
75
+
76
+ Args:
77
+ messages: List of message dictionaries
78
+
79
+ Returns:
80
+ List of messages with PII anonymized
81
+ """
82
+ if not self.anonymize_text and not self.anonymize_images:
83
+ return messages
84
+
85
+ anonymized_messages = []
86
+ for msg in messages:
87
+ anonymized_msg = await self._anonymize_message(msg)
88
+ anonymized_messages.append(anonymized_msg)
89
+
90
+ return anonymized_messages
91
+
92
+ async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
93
+ """
94
+ Deanonymize PII in tool calls and message outputs after agent loop.
95
+
96
+ Args:
97
+ output: List of output dictionaries
98
+
99
+ Returns:
100
+ List of output with PII deanonymized for tool calls
101
+ """
102
+ if not self.anonymize_text:
103
+ return output
104
+
105
+ deanonymized_output = []
106
+ for item in output:
107
+ # Only deanonymize tool calls and computer_call messages
108
+ if item.get("type") in ["computer_call", "computer_call_output"]:
109
+ deanonymized_item = await self._deanonymize_item(item)
110
+ deanonymized_output.append(deanonymized_item)
111
+ else:
112
+ deanonymized_output.append(item)
113
+
114
+ return deanonymized_output
115
+
116
+ async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
117
+ """Anonymize PII in a single message."""
118
+ msg_copy = message.copy()
119
+
120
+ # Anonymize text content
121
+ if self.anonymize_text:
122
+ msg_copy = await self._anonymize_text_content(msg_copy)
123
+
124
+ # Redact images in computer_call_output
125
+ if self.anonymize_images and msg_copy.get("type") == "computer_call_output":
126
+ msg_copy = await self._redact_image_content(msg_copy)
127
+
128
+ return msg_copy
129
+
130
+ async def _anonymize_text_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
131
+ """Anonymize text content in a message."""
132
+ msg_copy = message.copy()
133
+
134
+ # Handle content array
135
+ content = msg_copy.get("content", [])
136
+ if isinstance(content, str):
137
+ anonymized_text, _ = await self._anonymize_text(content)
138
+ msg_copy["content"] = anonymized_text
139
+ elif isinstance(content, list):
140
+ anonymized_content = []
141
+ for item in content:
142
+ if isinstance(item, dict) and item.get("type") == "text":
143
+ text = item.get("text", "")
144
+ anonymized_text, _ = await self._anonymize_text(text)
145
+ item_copy = item.copy()
146
+ item_copy["text"] = anonymized_text
147
+ anonymized_content.append(item_copy)
148
+ else:
149
+ anonymized_content.append(item)
150
+ msg_copy["content"] = anonymized_content
151
+
152
+ return msg_copy
153
+
154
+ async def _redact_image_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
155
+ """Redact PII from images in computer_call_output messages."""
156
+ msg_copy = message.copy()
157
+ output = msg_copy.get("output", {})
158
+
159
+ if isinstance(output, dict) and "image_url" in output:
160
+ try:
161
+ # Extract base64 image data
162
+ image_url = output["image_url"]
163
+ if image_url.startswith("data:image/"):
164
+ # Parse data URL
165
+ header, data = image_url.split(",", 1)
166
+ image_data = base64.b64decode(data)
167
+
168
+ # Load image with PIL
169
+ image = Image.open(io.BytesIO(image_data))
170
+
171
+ # Redact PII from image
172
+ redacted_image = self.image_redactor.redact(image, self.image_redaction_color)
173
+
174
+ # Convert back to base64
175
+ buffer = io.BytesIO()
176
+ redacted_image.save(buffer, format="PNG")
177
+ redacted_data = base64.b64encode(buffer.getvalue()).decode()
178
+
179
+ # Update image URL
180
+ output_copy = output.copy()
181
+ output_copy["image_url"] = f"data:image/png;base64,{redacted_data}"
182
+ msg_copy["output"] = output_copy
183
+
184
+ except Exception as e:
185
+ logger.warning(f"Failed to redact image: {e}")
186
+
187
+ return msg_copy
188
+
189
+ async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
190
+ """Deanonymize PII in tool calls and computer outputs."""
191
+ item_copy = item.copy()
192
+
193
+ # Handle computer_call arguments
194
+ if item.get("type") == "computer_call":
195
+ args = item_copy.get("args", {})
196
+ if isinstance(args, dict):
197
+ deanonymized_args = {}
198
+ for key, value in args.items():
199
+ if isinstance(value, str):
200
+ deanonymized_value, _ = await self._deanonymize_text(value)
201
+ deanonymized_args[key] = deanonymized_value
202
+ else:
203
+ deanonymized_args[key] = value
204
+ item_copy["args"] = deanonymized_args
205
+
206
+ return item_copy
207
+
208
+ async def _anonymize_text(self, text: str) -> Tuple[str, List[RecognizerResult]]:
209
+ """Anonymize PII in text and return the anonymized text and results."""
210
+ if not text.strip():
211
+ return text, []
212
+
213
+ try:
214
+ # Analyze text for PII
215
+ analyzer_results = self.analyzer.analyze(
216
+ text=text,
217
+ entities=self.entities_to_anonymize,
218
+ language="en"
219
+ )
220
+
221
+ if not analyzer_results:
222
+ return text, []
223
+
224
+ # Anonymize the text
225
+ anonymized_result = self.anonymizer.anonymize(
226
+ text=text,
227
+ analyzer_results=analyzer_results,
228
+ operators={entity_type: OperatorConfig(self.anonymization_operator)
229
+ for entity_type in set(result.entity_type for result in analyzer_results)}
230
+ )
231
+
232
+ # Store mapping for deanonymization
233
+ mapping_key = str(hash(text))
234
+ self.anonymization_mappings[mapping_key] = {
235
+ "original": text,
236
+ "anonymized": anonymized_result.text,
237
+ "results": analyzer_results
238
+ }
239
+
240
+ return anonymized_result.text, analyzer_results
241
+
242
+ except Exception as e:
243
+ logger.warning(f"Failed to anonymize text: {e}")
244
+ return text, []
245
+
246
+ async def _deanonymize_text(self, text: str) -> Tuple[str, bool]:
247
+ """Attempt to deanonymize text using stored mappings."""
248
+ try:
249
+ # Look for matching anonymized text in mappings
250
+ for mapping_key, mapping in self.anonymization_mappings.items():
251
+ if mapping["anonymized"] == text:
252
+ return mapping["original"], True
253
+
254
+ # If no mapping found, return original text
255
+ return text, False
256
+
257
+ except Exception as e:
258
+ logger.warning(f"Failed to deanonymize text: {e}")
259
+ return text, False
@@ -0,0 +1,305 @@
1
+ """
2
+ Trajectory saving callback handler for ComputerAgent.
3
+ """
4
+
5
+ import os
6
+ import json
7
+ import uuid
8
+ from datetime import datetime
9
+ import base64
10
+ from pathlib import Path
11
+ from typing import List, Dict, Any, Optional, Union, override
12
+ from PIL import Image, ImageDraw
13
+ import io
14
+ from .base import AsyncCallbackHandler
15
+
16
+ def sanitize_image_urls(data: Any) -> Any:
17
+ """
18
+ Recursively search for 'image_url' keys and set their values to '[omitted]'.
19
+
20
+ Args:
21
+ data: Any data structure (dict, list, or primitive type)
22
+
23
+ Returns:
24
+ A deep copy of the data with all 'image_url' values replaced with '[omitted]'
25
+ """
26
+ if isinstance(data, dict):
27
+ # Create a copy of the dictionary
28
+ sanitized = {}
29
+ for key, value in data.items():
30
+ if key == "image_url":
31
+ sanitized[key] = "[omitted]"
32
+ else:
33
+ # Recursively sanitize the value
34
+ sanitized[key] = sanitize_image_urls(value)
35
+ return sanitized
36
+
37
+ elif isinstance(data, list):
38
+ # Recursively sanitize each item in the list
39
+ return [sanitize_image_urls(item) for item in data]
40
+
41
+ else:
42
+ # For primitive types (str, int, bool, None, etc.), return as-is
43
+ return data
44
+
45
+
46
+ class TrajectorySaverCallback(AsyncCallbackHandler):
47
+ """
48
+ Callback handler that saves agent trajectories to disk.
49
+
50
+ Saves each run as a separate trajectory with unique ID, and each turn
51
+ within the trajectory gets its own folder with screenshots and responses.
52
+ """
53
+
54
+ def __init__(self, trajectory_dir: str):
55
+ """
56
+ Initialize trajectory saver.
57
+
58
+ Args:
59
+ trajectory_dir: Base directory to save trajectories
60
+ """
61
+ self.trajectory_dir = Path(trajectory_dir)
62
+ self.trajectory_id: Optional[str] = None
63
+ self.current_turn: int = 0
64
+ self.current_artifact: int = 0
65
+ self.model: Optional[str] = None
66
+ self.total_usage: Dict[str, Any] = {}
67
+
68
+ # Ensure trajectory directory exists
69
+ self.trajectory_dir.mkdir(parents=True, exist_ok=True)
70
+
71
+ def _get_turn_dir(self) -> Path:
72
+ """Get the directory for the current turn."""
73
+ if not self.trajectory_id:
74
+ raise ValueError("Trajectory not initialized - call _on_run_start first")
75
+
76
+ # format: trajectory_id/turn_000
77
+ turn_dir = self.trajectory_dir / self.trajectory_id / f"turn_{self.current_turn:03d}"
78
+ turn_dir.mkdir(parents=True, exist_ok=True)
79
+ return turn_dir
80
+
81
+ def _save_artifact(self, name: str, artifact: Union[str, bytes, Dict[str, Any]]) -> None:
82
+ """Save an artifact to the current turn directory."""
83
+ turn_dir = self._get_turn_dir()
84
+ if isinstance(artifact, bytes):
85
+ # format: turn_000/0000_name.png
86
+ artifact_filename = f"{self.current_artifact:04d}_{name}"
87
+ artifact_path = turn_dir / f"{artifact_filename}.png"
88
+ with open(artifact_path, "wb") as f:
89
+ f.write(artifact)
90
+ else:
91
+ # format: turn_000/0000_name.json
92
+ artifact_filename = f"{self.current_artifact:04d}_{name}"
93
+ artifact_path = turn_dir / f"{artifact_filename}.json"
94
+ with open(artifact_path, "w") as f:
95
+ json.dump(sanitize_image_urls(artifact), f, indent=2)
96
+ self.current_artifact += 1
97
+
98
+ def _update_usage(self, usage: Dict[str, Any]) -> None:
99
+ """Update total usage statistics."""
100
+ def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
101
+ for key, value in source.items():
102
+ if isinstance(value, dict):
103
+ if key not in target:
104
+ target[key] = {}
105
+ add_dicts(target[key], value)
106
+ else:
107
+ if key not in target:
108
+ target[key] = 0
109
+ target[key] += value
110
+ add_dicts(self.total_usage, usage)
111
+
112
+ @override
113
+ async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
114
+ """Initialize trajectory tracking for a new run."""
115
+ model = kwargs.get("model", "unknown")
116
+ model_name_short = model.split("+")[-1].split("/")[-1].lower()[:16]
117
+ if "+" in model:
118
+ model_name_short = model.split("+")[0].lower()[:4] + "_" + model_name_short
119
+
120
+ # id format: yyyy-mm-dd_model_hhmmss_uuid[:4]
121
+ now = datetime.now()
122
+ self.trajectory_id = f"{now.strftime('%Y-%m-%d')}_{model_name_short}_{now.strftime('%H%M%S')}_{str(uuid.uuid4())[:4]}"
123
+ self.current_turn = 0
124
+ self.current_artifact = 0
125
+ self.model = model
126
+ self.total_usage = {}
127
+
128
+ # Create trajectory directory
129
+ trajectory_path = self.trajectory_dir / self.trajectory_id
130
+ trajectory_path.mkdir(parents=True, exist_ok=True)
131
+
132
+ # Save trajectory metadata
133
+ metadata = {
134
+ "trajectory_id": self.trajectory_id,
135
+ "created_at": str(uuid.uuid1().time),
136
+ "status": "running",
137
+ "kwargs": kwargs,
138
+ }
139
+
140
+ with open(trajectory_path / "metadata.json", "w") as f:
141
+ json.dump(metadata, f, indent=2)
142
+
143
+ @override
144
+ async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
145
+ """Finalize run tracking by updating metadata with completion status, usage, and new items."""
146
+ if not self.trajectory_id:
147
+ return
148
+
149
+ # Update metadata with completion status, total usage, and new items
150
+ trajectory_path = self.trajectory_dir / self.trajectory_id
151
+ metadata_path = trajectory_path / "metadata.json"
152
+
153
+ # Read existing metadata
154
+ if metadata_path.exists():
155
+ with open(metadata_path, "r") as f:
156
+ metadata = json.load(f)
157
+ else:
158
+ metadata = {}
159
+
160
+ # Update metadata with completion info
161
+ metadata.update({
162
+ "status": "completed",
163
+ "completed_at": str(uuid.uuid1().time),
164
+ "total_usage": self.total_usage,
165
+ "new_items": sanitize_image_urls(new_items),
166
+ "total_turns": self.current_turn
167
+ })
168
+
169
+ # Save updated metadata
170
+ with open(metadata_path, "w") as f:
171
+ json.dump(metadata, f, indent=2)
172
+
173
+ @override
174
+ async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
175
+ if not self.trajectory_id:
176
+ return
177
+
178
+ self._save_artifact("api_start", { "kwargs": kwargs })
179
+
180
+ @override
181
+ async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
182
+ """Save API call result."""
183
+ if not self.trajectory_id:
184
+ return
185
+
186
+ self._save_artifact("api_result", { "kwargs": kwargs, "result": result })
187
+
188
+ @override
189
+ async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
190
+ """Save a screenshot."""
191
+ if isinstance(screenshot, str):
192
+ screenshot = base64.b64decode(screenshot)
193
+ self._save_artifact(name, screenshot)
194
+
195
+ @override
196
+ async def on_usage(self, usage: Dict[str, Any]) -> None:
197
+ """Called when usage information is received."""
198
+ self._update_usage(usage)
199
+
200
+ @override
201
+ async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
202
+ """Save responses to the current turn directory and update usage statistics."""
203
+ if not self.trajectory_id:
204
+ return
205
+
206
+ # Save responses
207
+ turn_dir = self._get_turn_dir()
208
+ response_data = {
209
+ "timestamp": str(uuid.uuid1().time),
210
+ "model": self.model,
211
+ "kwargs": kwargs,
212
+ "response": responses
213
+ }
214
+
215
+ self._save_artifact("agent_response", response_data)
216
+
217
+ # Increment turn counter
218
+ self.current_turn += 1
219
+
220
+ def _draw_crosshair_on_image(self, image_bytes: bytes, x: int, y: int) -> bytes:
221
+ """
222
+ Draw a red dot and crosshair at the specified coordinates on the image.
223
+
224
+ Args:
225
+ image_bytes: The original image as bytes
226
+ x: X coordinate for the crosshair
227
+ y: Y coordinate for the crosshair
228
+
229
+ Returns:
230
+ Modified image as bytes with red dot and crosshair
231
+ """
232
+ # Open the image
233
+ image = Image.open(io.BytesIO(image_bytes))
234
+ draw = ImageDraw.Draw(image)
235
+
236
+ # Draw crosshair lines (red, 2px thick)
237
+ crosshair_size = 20
238
+ line_width = 2
239
+ color = "red"
240
+
241
+ # Horizontal line
242
+ draw.line([(x - crosshair_size, y), (x + crosshair_size, y)], fill=color, width=line_width)
243
+ # Vertical line
244
+ draw.line([(x, y - crosshair_size), (x, y + crosshair_size)], fill=color, width=line_width)
245
+
246
+ # Draw center dot (filled circle)
247
+ dot_radius = 3
248
+ draw.ellipse([(x - dot_radius, y - dot_radius), (x + dot_radius, y + dot_radius)], fill=color)
249
+
250
+ # Convert back to bytes
251
+ output = io.BytesIO()
252
+ image.save(output, format='PNG')
253
+ return output.getvalue()
254
+
255
+ @override
256
+ async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
257
+ """
258
+ Called when a computer call has completed.
259
+ Saves screenshots and computer call output.
260
+ """
261
+ if not self.trajectory_id:
262
+ return
263
+
264
+ self._save_artifact("computer_call_result", { "item": item, "result": result })
265
+
266
+ # Check if action has x/y coordinates and there's a screenshot in the result
267
+ action = item.get("action", {})
268
+ if "x" in action and "y" in action:
269
+ # Look for screenshot in the result
270
+ for result_item in result:
271
+ if (result_item.get("type") == "computer_call_output" and
272
+ result_item.get("output", {}).get("type") == "input_image"):
273
+
274
+ image_url = result_item["output"]["image_url"]
275
+
276
+ # Extract base64 image data
277
+ if image_url.startswith("data:image/"):
278
+ # Format: data:image/png;base64,<base64_data>
279
+ base64_data = image_url.split(",", 1)[1]
280
+ else:
281
+ # Assume it's just base64 data
282
+ base64_data = image_url
283
+
284
+ try:
285
+ # Decode the image
286
+ image_bytes = base64.b64decode(base64_data)
287
+
288
+ # Draw crosshair at the action coordinates
289
+ annotated_image = self._draw_crosshair_on_image(
290
+ image_bytes,
291
+ int(action["x"]),
292
+ int(action["y"])
293
+ )
294
+
295
+ # Save as screenshot_action
296
+ self._save_artifact("screenshot_action", annotated_image)
297
+
298
+ except Exception as e:
299
+ # If annotation fails, just log and continue
300
+ print(f"Failed to annotate screenshot: {e}")
301
+
302
+ break # Only process the first screenshot found
303
+
304
+ # Increment turn counter
305
+ self.current_turn += 1