optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,548 @@
1
+ """Python-based highlighting system for drawing bounding boxes on screenshots.
2
+
3
+ This module replaces JavaScript-based highlighting with fast Python image processing
4
+ to draw bounding boxes around interactive elements directly on screenshots.
5
+ """
6
+
7
+ import asyncio
8
+ import base64
9
+ import io
10
+ import logging
11
+ import os
12
+
13
+ from PIL import Image, ImageDraw, ImageFont
14
+
15
+ from browser_use.dom.views import DOMSelectorMap, EnhancedDOMTreeNode
16
+ from browser_use.observability import observe_debug
17
+ from browser_use.utils import time_execution_async
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Font cache to prevent repeated font loading and reduce memory usage
22
+ _FONT_CACHE: dict[tuple[str, int], ImageFont.FreeTypeFont | None] = {}
23
+
24
+ # Cross-platform font paths
25
+ _FONT_PATHS = [
26
+ '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', # Linux (Debian/Ubuntu)
27
+ '/usr/share/fonts/TTF/DejaVuSans-Bold.ttf', # Linux (Arch/Fedora)
28
+ '/System/Library/Fonts/Arial.ttf', # macOS
29
+ 'C:\\Windows\\Fonts\\arial.ttf', # Windows
30
+ 'arial.ttf', # Windows (system path)
31
+ 'Arial Bold.ttf', # macOS alternative
32
+ '/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf', # Linux alternative
33
+ ]
34
+
35
+
36
+ def get_cross_platform_font(font_size: int) -> ImageFont.FreeTypeFont | None:
37
+ """Get a cross-platform compatible font with caching to prevent memory leaks.
38
+
39
+ Args:
40
+ font_size: Size of the font to load
41
+
42
+ Returns:
43
+ ImageFont object or None if no system fonts are available
44
+ """
45
+ # Use cache key based on font size
46
+ cache_key = ('system_font', font_size)
47
+
48
+ # Return cached font if available
49
+ if cache_key in _FONT_CACHE:
50
+ return _FONT_CACHE[cache_key]
51
+
52
+ # Try to load a system font
53
+ font = None
54
+ for font_path in _FONT_PATHS:
55
+ try:
56
+ font = ImageFont.truetype(font_path, font_size)
57
+ break
58
+ except OSError:
59
+ continue
60
+
61
+ # Cache the result (even if None) to avoid repeated attempts
62
+ _FONT_CACHE[cache_key] = font
63
+ return font
64
+
65
+
66
+ def cleanup_font_cache() -> None:
67
+ """Clean up the font cache to prevent memory leaks in long-running applications."""
68
+ global _FONT_CACHE
69
+ _FONT_CACHE.clear()
70
+
71
+
72
+ # Color scheme for different element types
73
+ ELEMENT_COLORS = {
74
+ 'button': '#FF6B6B', # Red for buttons
75
+ 'input': '#4ECDC4', # Teal for inputs
76
+ 'select': '#45B7D1', # Blue for dropdowns
77
+ 'a': '#96CEB4', # Green for links
78
+ 'textarea': '#FF8C42', # Orange for text areas (was yellow, now more visible)
79
+ 'default': '#DDA0DD', # Light purple for other interactive elements
80
+ }
81
+
82
+ # Element type mappings
83
+ ELEMENT_TYPE_MAP = {
84
+ 'button': 'button',
85
+ 'input': 'input',
86
+ 'select': 'select',
87
+ 'a': 'a',
88
+ 'textarea': 'textarea',
89
+ }
90
+
91
+
92
+ def get_element_color(tag_name: str, element_type: str | None = None) -> str:
93
+ """Get color for element based on tag name and type."""
94
+ # Check input type first
95
+ if tag_name == 'input' and element_type:
96
+ if element_type in ['button', 'submit']:
97
+ return ELEMENT_COLORS['button']
98
+
99
+ # Use tag-based color
100
+ return ELEMENT_COLORS.get(tag_name.lower(), ELEMENT_COLORS['default'])
101
+
102
+
103
+ def should_show_index_overlay(backend_node_id: int | None) -> bool:
104
+ """Determine if index overlay should be shown."""
105
+ return backend_node_id is not None
106
+
107
+
108
+ def draw_enhanced_bounding_box_with_text(
109
+ draw, # ImageDraw.Draw - avoiding type annotation due to PIL typing issues
110
+ bbox: tuple[int, int, int, int],
111
+ color: str,
112
+ text: str | None = None,
113
+ font: ImageFont.FreeTypeFont | None = None,
114
+ element_type: str = 'div',
115
+ image_size: tuple[int, int] = (2000, 1500),
116
+ device_pixel_ratio: float = 1.0,
117
+ ) -> None:
118
+ """Draw an enhanced bounding box with much bigger index containers and dashed borders."""
119
+ x1, y1, x2, y2 = bbox
120
+
121
+ # Draw dashed bounding box with pattern: 1 line, 2 spaces, 1 line, 2 spaces...
122
+ dash_length = 4
123
+ gap_length = 8
124
+ line_width = 2
125
+
126
+ # Helper function to draw dashed line
127
+ def draw_dashed_line(start_x, start_y, end_x, end_y):
128
+ if start_x == end_x: # Vertical line
129
+ y = start_y
130
+ while y < end_y:
131
+ dash_end = min(y + dash_length, end_y)
132
+ draw.line([(start_x, y), (start_x, dash_end)], fill=color, width=line_width)
133
+ y += dash_length + gap_length
134
+ else: # Horizontal line
135
+ x = start_x
136
+ while x < end_x:
137
+ dash_end = min(x + dash_length, end_x)
138
+ draw.line([(x, start_y), (dash_end, start_y)], fill=color, width=line_width)
139
+ x += dash_length + gap_length
140
+
141
+ # Draw dashed rectangle
142
+ draw_dashed_line(x1, y1, x2, y1) # Top
143
+ draw_dashed_line(x2, y1, x2, y2) # Right
144
+ draw_dashed_line(x2, y2, x1, y2) # Bottom
145
+ draw_dashed_line(x1, y2, x1, y1) # Left
146
+
147
+ # Draw much bigger index overlay if we have index text
148
+ if text:
149
+ try:
150
+ # Scale font size for appropriate sizing across different resolutions
151
+ img_width, img_height = image_size
152
+
153
+ css_width = img_width # / device_pixel_ratio
154
+ # Much smaller scaling - 1% of CSS viewport width, max 16px to prevent huge highlights
155
+ base_font_size = max(10, min(20, int(css_width * 0.01)))
156
+ # Use shared font loading function with caching
157
+ big_font = get_cross_platform_font(base_font_size)
158
+ if big_font is None:
159
+ big_font = font # Fallback to original font if no system fonts found
160
+
161
+ # Get text size with bigger font
162
+ if big_font:
163
+ bbox_text = draw.textbbox((0, 0), text, font=big_font)
164
+ text_width = bbox_text[2] - bbox_text[0]
165
+ text_height = bbox_text[3] - bbox_text[1]
166
+ else:
167
+ # Fallback for default font
168
+ bbox_text = draw.textbbox((0, 0), text)
169
+ text_width = bbox_text[2] - bbox_text[0]
170
+ text_height = bbox_text[3] - bbox_text[1]
171
+
172
+ # Scale padding appropriately for different resolutions
173
+ padding = max(4, min(10, int(css_width * 0.005))) # 0.3% of CSS width, max 4px
174
+ element_width = x2 - x1
175
+ element_height = y2 - y1
176
+
177
+ # Container dimensions
178
+ container_width = text_width + padding * 2
179
+ container_height = text_height + padding * 2
180
+
181
+ # Position in top center - for small elements, place further up to avoid blocking content
182
+ # Center horizontally within the element
183
+ bg_x1 = x1 + (element_width - container_width) // 2
184
+
185
+ # Simple rule: if element is small, place index further up to avoid blocking icons
186
+ if element_width < 60 or element_height < 30:
187
+ # Small element: place well above to avoid blocking content
188
+ bg_y1 = max(0, y1 - container_height - 5)
189
+ else:
190
+ # Regular element: place inside with small offset
191
+ bg_y1 = y1 + 2
192
+
193
+ bg_x2 = bg_x1 + container_width
194
+ bg_y2 = bg_y1 + container_height
195
+
196
+ # Center the number within the index box with proper baseline handling
197
+ text_x = bg_x1 + (container_width - text_width) // 2
198
+ # Add extra vertical space to prevent clipping
199
+ text_y = bg_y1 + (container_height - text_height) // 2 - bbox_text[1] # Subtract top offset
200
+
201
+ # Ensure container stays within image bounds
202
+ img_width, img_height = image_size
203
+ if bg_x1 < 0:
204
+ offset = -bg_x1
205
+ bg_x1 += offset
206
+ bg_x2 += offset
207
+ text_x += offset
208
+ if bg_y1 < 0:
209
+ offset = -bg_y1
210
+ bg_y1 += offset
211
+ bg_y2 += offset
212
+ text_y += offset
213
+ if bg_x2 > img_width:
214
+ offset = bg_x2 - img_width
215
+ bg_x1 -= offset
216
+ bg_x2 -= offset
217
+ text_x -= offset
218
+ if bg_y2 > img_height:
219
+ offset = bg_y2 - img_height
220
+ bg_y1 -= offset
221
+ bg_y2 -= offset
222
+ text_y -= offset
223
+
224
+ # Draw bigger background rectangle with thicker border
225
+ draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=color, outline='white', width=2)
226
+
227
+ # Draw white text centered in the index box
228
+ draw.text((text_x, text_y), text, fill='white', font=big_font or font)
229
+
230
+ except Exception as e:
231
+ logger.debug(f'Failed to draw enhanced text overlay: {e}')
232
+
233
+
234
+ def draw_bounding_box_with_text(
235
+ draw, # ImageDraw.Draw - avoiding type annotation due to PIL typing issues
236
+ bbox: tuple[int, int, int, int],
237
+ color: str,
238
+ text: str | None = None,
239
+ font: ImageFont.FreeTypeFont | None = None,
240
+ ) -> None:
241
+ """Draw a bounding box with optional text overlay."""
242
+ x1, y1, x2, y2 = bbox
243
+
244
+ # Draw dashed bounding box
245
+ dash_length = 2
246
+ gap_length = 6
247
+
248
+ # Top edge
249
+ x = x1
250
+ while x < x2:
251
+ end_x = min(x + dash_length, x2)
252
+ draw.line([(x, y1), (end_x, y1)], fill=color, width=2)
253
+ draw.line([(x, y1 + 1), (end_x, y1 + 1)], fill=color, width=2)
254
+ x += dash_length + gap_length
255
+
256
+ # Bottom edge
257
+ x = x1
258
+ while x < x2:
259
+ end_x = min(x + dash_length, x2)
260
+ draw.line([(x, y2), (end_x, y2)], fill=color, width=2)
261
+ draw.line([(x, y2 - 1), (end_x, y2 - 1)], fill=color, width=2)
262
+ x += dash_length + gap_length
263
+
264
+ # Left edge
265
+ y = y1
266
+ while y < y2:
267
+ end_y = min(y + dash_length, y2)
268
+ draw.line([(x1, y), (x1, end_y)], fill=color, width=2)
269
+ draw.line([(x1 + 1, y), (x1 + 1, end_y)], fill=color, width=2)
270
+ y += dash_length + gap_length
271
+
272
+ # Right edge
273
+ y = y1
274
+ while y < y2:
275
+ end_y = min(y + dash_length, y2)
276
+ draw.line([(x2, y), (x2, end_y)], fill=color, width=2)
277
+ draw.line([(x2 - 1, y), (x2 - 1, end_y)], fill=color, width=2)
278
+ y += dash_length + gap_length
279
+
280
+ # Draw index overlay if we have index text
281
+ if text:
282
+ try:
283
+ # Get text size
284
+ if font:
285
+ bbox_text = draw.textbbox((0, 0), text, font=font)
286
+ text_width = bbox_text[2] - bbox_text[0]
287
+ text_height = bbox_text[3] - bbox_text[1]
288
+ else:
289
+ # Fallback for default font
290
+ bbox_text = draw.textbbox((0, 0), text)
291
+ text_width = bbox_text[2] - bbox_text[0]
292
+ text_height = bbox_text[3] - bbox_text[1]
293
+
294
+ # Smart positioning based on element size
295
+ padding = 5
296
+ element_width = x2 - x1
297
+ element_height = y2 - y1
298
+ element_area = element_width * element_height
299
+ index_box_area = (text_width + padding * 2) * (text_height + padding * 2)
300
+
301
+ # Calculate size ratio to determine positioning strategy
302
+ size_ratio = element_area / max(index_box_area, 1)
303
+
304
+ if size_ratio < 4:
305
+ # Very small elements: place outside in bottom-right corner
306
+ text_x = x2 + padding
307
+ text_y = y2 - text_height
308
+ # Ensure it doesn't go off screen
309
+ text_x = min(text_x, 1200 - text_width - padding)
310
+ text_y = max(text_y, 0)
311
+ elif size_ratio < 16:
312
+ # Medium elements: place in bottom-right corner inside
313
+ text_x = x2 - text_width - padding
314
+ text_y = y2 - text_height - padding
315
+ else:
316
+ # Large elements: place in center
317
+ text_x = x1 + (element_width - text_width) // 2
318
+ text_y = y1 + (element_height - text_height) // 2
319
+
320
+ # Ensure text stays within bounds
321
+ text_x = max(0, min(text_x, 1200 - text_width))
322
+ text_y = max(0, min(text_y, 800 - text_height))
323
+
324
+ # Draw background rectangle for maximum contrast
325
+ bg_x1 = text_x - padding
326
+ bg_y1 = text_y - padding
327
+ bg_x2 = text_x + text_width + padding
328
+ bg_y2 = text_y + text_height + padding
329
+
330
+ # Use white background with thick black border for maximum visibility
331
+ draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill='white', outline='black', width=2)
332
+
333
+ # Draw bold dark text on light background for best contrast
334
+ draw.text((text_x, text_y), text, fill='black', font=font)
335
+
336
+ except Exception as e:
337
+ logger.debug(f'Failed to draw text overlay: {e}')
338
+
339
+
340
+ def process_element_highlight(
341
+ element_id: int,
342
+ element: EnhancedDOMTreeNode,
343
+ draw,
344
+ device_pixel_ratio: float,
345
+ font,
346
+ filter_highlight_ids: bool,
347
+ image_size: tuple[int, int],
348
+ ) -> None:
349
+ """Process a single element for highlighting."""
350
+ try:
351
+ # Use absolute_position coordinates directly
352
+ if not element.absolute_position:
353
+ return
354
+
355
+ bounds = element.absolute_position
356
+
357
+ # Scale coordinates from CSS pixels to device pixels for screenshot
358
+ # The screenshot is captured at device pixel resolution, but coordinates are in CSS pixels
359
+ x1 = int(bounds.x * device_pixel_ratio)
360
+ y1 = int(bounds.y * device_pixel_ratio)
361
+ x2 = int((bounds.x + bounds.width) * device_pixel_ratio)
362
+ y2 = int((bounds.y + bounds.height) * device_pixel_ratio)
363
+
364
+ # Ensure coordinates are within image bounds
365
+ img_width, img_height = image_size
366
+ x1 = max(0, min(x1, img_width))
367
+ y1 = max(0, min(y1, img_height))
368
+ x2 = max(x1, min(x2, img_width))
369
+ y2 = max(y1, min(y2, img_height))
370
+
371
+ # Skip if bounding box is too small or invalid
372
+ if x2 - x1 < 2 or y2 - y1 < 2:
373
+ return
374
+
375
+ # Get element color based on type
376
+ tag_name = element.tag_name if hasattr(element, 'tag_name') else 'div'
377
+ element_type = None
378
+ if hasattr(element, 'attributes') and element.attributes:
379
+ element_type = element.attributes.get('type')
380
+
381
+ color = get_element_color(tag_name, element_type)
382
+
383
+ # Get element index for overlay and apply filtering
384
+ backend_node_id = getattr(element, 'backend_node_id', None)
385
+ index_text = None
386
+
387
+ if backend_node_id is not None:
388
+ if filter_highlight_ids:
389
+ # Use the meaningful text that matches what the LLM sees
390
+ meaningful_text = element.get_meaningful_text_for_llm()
391
+ # Show ID only if meaningful text is less than 5 characters
392
+ if len(meaningful_text) < 3:
393
+ index_text = str(backend_node_id)
394
+ else:
395
+ # Always show ID when filter is disabled
396
+ index_text = str(backend_node_id)
397
+
398
+ # Draw enhanced bounding box with bigger index
399
+ draw_enhanced_bounding_box_with_text(
400
+ draw, (x1, y1, x2, y2), color, index_text, font, tag_name, image_size, device_pixel_ratio
401
+ )
402
+
403
+ except Exception as e:
404
+ logger.debug(f'Failed to draw highlight for element {element_id}: {e}')
405
+
406
+
407
+ @observe_debug(ignore_input=True, ignore_output=True, name='create_highlighted_screenshot')
408
+ @time_execution_async('create_highlighted_screenshot')
409
+ async def create_highlighted_screenshot(
410
+ screenshot_b64: str,
411
+ selector_map: DOMSelectorMap,
412
+ device_pixel_ratio: float = 1.0,
413
+ viewport_offset_x: int = 0,
414
+ viewport_offset_y: int = 0,
415
+ filter_highlight_ids: bool = True,
416
+ ) -> str:
417
+ """Create a highlighted screenshot with bounding boxes around interactive elements.
418
+
419
+ Args:
420
+ screenshot_b64: Base64 encoded screenshot
421
+ selector_map: Map of interactive elements with their positions
422
+ device_pixel_ratio: Device pixel ratio for scaling coordinates
423
+ viewport_offset_x: X offset for viewport positioning
424
+ viewport_offset_y: Y offset for viewport positioning
425
+
426
+ Returns:
427
+ Base64 encoded highlighted screenshot
428
+ """
429
+ try:
430
+ # Decode screenshot
431
+ screenshot_data = base64.b64decode(screenshot_b64)
432
+ image = Image.open(io.BytesIO(screenshot_data)).convert('RGBA')
433
+
434
+ # Create drawing context
435
+ draw = ImageDraw.Draw(image)
436
+
437
+ # Load font using shared function with caching
438
+ font = get_cross_platform_font(12)
439
+ # If no system fonts found, font remains None and will use default font
440
+
441
+ # Process elements sequentially to avoid ImageDraw thread safety issues
442
+ # PIL ImageDraw is not thread-safe, so we process elements one by one
443
+ for element_id, element in selector_map.items():
444
+ process_element_highlight(element_id, element, draw, device_pixel_ratio, font, filter_highlight_ids, image.size)
445
+
446
+ # Convert back to base64
447
+ output_buffer = io.BytesIO()
448
+ try:
449
+ image.save(output_buffer, format='PNG')
450
+ output_buffer.seek(0)
451
+ highlighted_b64 = base64.b64encode(output_buffer.getvalue()).decode('utf-8')
452
+
453
+ logger.debug(f'Successfully created highlighted screenshot with {len(selector_map)} elements')
454
+ return highlighted_b64
455
+ finally:
456
+ # Explicit cleanup to prevent memory leaks
457
+ output_buffer.close()
458
+ if 'image' in locals():
459
+ image.close()
460
+
461
+ except Exception as e:
462
+ logger.error(f'Failed to create highlighted screenshot: {e}')
463
+ # Clean up on error as well
464
+ if 'image' in locals():
465
+ image.close()
466
+ # Return original screenshot on error
467
+ return screenshot_b64
468
+
469
+
470
+ async def get_viewport_info_from_cdp(cdp_session) -> tuple[float, int, int]:
471
+ """Get viewport information from CDP session.
472
+
473
+ Returns:
474
+ Tuple of (device_pixel_ratio, scroll_x, scroll_y)
475
+ """
476
+ try:
477
+ # Get layout metrics which includes viewport info and device pixel ratio
478
+ metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
479
+
480
+ # Extract viewport information
481
+ visual_viewport = metrics.get('visualViewport', {})
482
+ css_visual_viewport = metrics.get('cssVisualViewport', {})
483
+ css_layout_viewport = metrics.get('cssLayoutViewport', {})
484
+
485
+ # Calculate device pixel ratio
486
+ css_width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1280.0))
487
+ device_width = visual_viewport.get('clientWidth', css_width)
488
+ device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
489
+
490
+ # Get scroll position in CSS pixels
491
+ scroll_x = int(css_visual_viewport.get('pageX', 0))
492
+ scroll_y = int(css_visual_viewport.get('pageY', 0))
493
+
494
+ return float(device_pixel_ratio), scroll_x, scroll_y
495
+
496
+ except Exception as e:
497
+ logger.debug(f'Failed to get viewport info from CDP: {e}')
498
+ return 1.0, 0, 0
499
+
500
+
501
+ @time_execution_async('create_highlighted_screenshot_async')
502
+ async def create_highlighted_screenshot_async(
503
+ screenshot_b64: str, selector_map: DOMSelectorMap, cdp_session=None, filter_highlight_ids: bool = True
504
+ ) -> str:
505
+ """Async wrapper for creating highlighted screenshots.
506
+
507
+ Args:
508
+ screenshot_b64: Base64 encoded screenshot
509
+ selector_map: Map of interactive elements
510
+ cdp_session: CDP session for getting viewport info
511
+ filter_highlight_ids: Whether to filter element IDs based on meaningful text
512
+
513
+ Returns:
514
+ Base64 encoded highlighted screenshot
515
+ """
516
+ # Get viewport information if CDP session is available
517
+ device_pixel_ratio = 1.0
518
+ viewport_offset_x = 0
519
+ viewport_offset_y = 0
520
+
521
+ if cdp_session:
522
+ try:
523
+ device_pixel_ratio, viewport_offset_x, viewport_offset_y = await get_viewport_info_from_cdp(cdp_session)
524
+ except Exception as e:
525
+ logger.debug(f'Failed to get viewport info from CDP: {e}')
526
+
527
+ # Create highlighted screenshot with async processing
528
+ final_screenshot = await create_highlighted_screenshot(
529
+ screenshot_b64, selector_map, device_pixel_ratio, viewport_offset_x, viewport_offset_y, filter_highlight_ids
530
+ )
531
+
532
+ filename = os.getenv('BROWSER_USE_SCREENSHOT_FILE')
533
+ if filename:
534
+
535
+ def _write_screenshot():
536
+ try:
537
+ with open(filename, 'wb') as f:
538
+ f.write(base64.b64decode(final_screenshot))
539
+ logger.debug('Saved screenshot to ' + str(filename))
540
+ except Exception as e:
541
+ logger.warning(f'Failed to save screenshot to {filename}: {e}')
542
+
543
+ await asyncio.to_thread(_write_screenshot)
544
+ return final_screenshot
545
+
546
+
547
+ # Export the cleanup function for external use in long-running applications
548
+ __all__ = ['create_highlighted_screenshot', 'create_highlighted_screenshot_async', 'cleanup_font_cache']