optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
"""Python-based highlighting system for drawing bounding boxes on screenshots.
|
|
2
|
+
|
|
3
|
+
This module replaces JavaScript-based highlighting with fast Python image processing
|
|
4
|
+
to draw bounding boxes around interactive elements directly on screenshots.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import base64
|
|
9
|
+
import io
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
from PIL import Image, ImageDraw, ImageFont
|
|
14
|
+
|
|
15
|
+
from browser_use.dom.views import DOMSelectorMap, EnhancedDOMTreeNode
|
|
16
|
+
from browser_use.observability import observe_debug
|
|
17
|
+
from browser_use.utils import time_execution_async
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# Font cache to prevent repeated font loading and reduce memory usage
|
|
22
|
+
_FONT_CACHE: dict[tuple[str, int], ImageFont.FreeTypeFont | None] = {}
|
|
23
|
+
|
|
24
|
+
# Cross-platform font paths
|
|
25
|
+
_FONT_PATHS = [
|
|
26
|
+
'/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', # Linux (Debian/Ubuntu)
|
|
27
|
+
'/usr/share/fonts/TTF/DejaVuSans-Bold.ttf', # Linux (Arch/Fedora)
|
|
28
|
+
'/System/Library/Fonts/Arial.ttf', # macOS
|
|
29
|
+
'C:\\Windows\\Fonts\\arial.ttf', # Windows
|
|
30
|
+
'arial.ttf', # Windows (system path)
|
|
31
|
+
'Arial Bold.ttf', # macOS alternative
|
|
32
|
+
'/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf', # Linux alternative
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_cross_platform_font(font_size: int) -> ImageFont.FreeTypeFont | None:
|
|
37
|
+
"""Get a cross-platform compatible font with caching to prevent memory leaks.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
font_size: Size of the font to load
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
ImageFont object or None if no system fonts are available
|
|
44
|
+
"""
|
|
45
|
+
# Use cache key based on font size
|
|
46
|
+
cache_key = ('system_font', font_size)
|
|
47
|
+
|
|
48
|
+
# Return cached font if available
|
|
49
|
+
if cache_key in _FONT_CACHE:
|
|
50
|
+
return _FONT_CACHE[cache_key]
|
|
51
|
+
|
|
52
|
+
# Try to load a system font
|
|
53
|
+
font = None
|
|
54
|
+
for font_path in _FONT_PATHS:
|
|
55
|
+
try:
|
|
56
|
+
font = ImageFont.truetype(font_path, font_size)
|
|
57
|
+
break
|
|
58
|
+
except OSError:
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
# Cache the result (even if None) to avoid repeated attempts
|
|
62
|
+
_FONT_CACHE[cache_key] = font
|
|
63
|
+
return font
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def cleanup_font_cache() -> None:
|
|
67
|
+
"""Clean up the font cache to prevent memory leaks in long-running applications."""
|
|
68
|
+
global _FONT_CACHE
|
|
69
|
+
_FONT_CACHE.clear()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Color scheme for different element types
|
|
73
|
+
ELEMENT_COLORS = {
|
|
74
|
+
'button': '#FF6B6B', # Red for buttons
|
|
75
|
+
'input': '#4ECDC4', # Teal for inputs
|
|
76
|
+
'select': '#45B7D1', # Blue for dropdowns
|
|
77
|
+
'a': '#96CEB4', # Green for links
|
|
78
|
+
'textarea': '#FF8C42', # Orange for text areas (was yellow, now more visible)
|
|
79
|
+
'default': '#DDA0DD', # Light purple for other interactive elements
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Element type mappings
|
|
83
|
+
ELEMENT_TYPE_MAP = {
|
|
84
|
+
'button': 'button',
|
|
85
|
+
'input': 'input',
|
|
86
|
+
'select': 'select',
|
|
87
|
+
'a': 'a',
|
|
88
|
+
'textarea': 'textarea',
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_element_color(tag_name: str, element_type: str | None = None) -> str:
|
|
93
|
+
"""Get color for element based on tag name and type."""
|
|
94
|
+
# Check input type first
|
|
95
|
+
if tag_name == 'input' and element_type:
|
|
96
|
+
if element_type in ['button', 'submit']:
|
|
97
|
+
return ELEMENT_COLORS['button']
|
|
98
|
+
|
|
99
|
+
# Use tag-based color
|
|
100
|
+
return ELEMENT_COLORS.get(tag_name.lower(), ELEMENT_COLORS['default'])
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def should_show_index_overlay(backend_node_id: int | None) -> bool:
|
|
104
|
+
"""Determine if index overlay should be shown."""
|
|
105
|
+
return backend_node_id is not None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def draw_enhanced_bounding_box_with_text(
|
|
109
|
+
draw, # ImageDraw.Draw - avoiding type annotation due to PIL typing issues
|
|
110
|
+
bbox: tuple[int, int, int, int],
|
|
111
|
+
color: str,
|
|
112
|
+
text: str | None = None,
|
|
113
|
+
font: ImageFont.FreeTypeFont | None = None,
|
|
114
|
+
element_type: str = 'div',
|
|
115
|
+
image_size: tuple[int, int] = (2000, 1500),
|
|
116
|
+
device_pixel_ratio: float = 1.0,
|
|
117
|
+
) -> None:
|
|
118
|
+
"""Draw an enhanced bounding box with much bigger index containers and dashed borders."""
|
|
119
|
+
x1, y1, x2, y2 = bbox
|
|
120
|
+
|
|
121
|
+
# Draw dashed bounding box with pattern: 1 line, 2 spaces, 1 line, 2 spaces...
|
|
122
|
+
dash_length = 4
|
|
123
|
+
gap_length = 8
|
|
124
|
+
line_width = 2
|
|
125
|
+
|
|
126
|
+
# Helper function to draw dashed line
|
|
127
|
+
def draw_dashed_line(start_x, start_y, end_x, end_y):
|
|
128
|
+
if start_x == end_x: # Vertical line
|
|
129
|
+
y = start_y
|
|
130
|
+
while y < end_y:
|
|
131
|
+
dash_end = min(y + dash_length, end_y)
|
|
132
|
+
draw.line([(start_x, y), (start_x, dash_end)], fill=color, width=line_width)
|
|
133
|
+
y += dash_length + gap_length
|
|
134
|
+
else: # Horizontal line
|
|
135
|
+
x = start_x
|
|
136
|
+
while x < end_x:
|
|
137
|
+
dash_end = min(x + dash_length, end_x)
|
|
138
|
+
draw.line([(x, start_y), (dash_end, start_y)], fill=color, width=line_width)
|
|
139
|
+
x += dash_length + gap_length
|
|
140
|
+
|
|
141
|
+
# Draw dashed rectangle
|
|
142
|
+
draw_dashed_line(x1, y1, x2, y1) # Top
|
|
143
|
+
draw_dashed_line(x2, y1, x2, y2) # Right
|
|
144
|
+
draw_dashed_line(x2, y2, x1, y2) # Bottom
|
|
145
|
+
draw_dashed_line(x1, y2, x1, y1) # Left
|
|
146
|
+
|
|
147
|
+
# Draw much bigger index overlay if we have index text
|
|
148
|
+
if text:
|
|
149
|
+
try:
|
|
150
|
+
# Scale font size for appropriate sizing across different resolutions
|
|
151
|
+
img_width, img_height = image_size
|
|
152
|
+
|
|
153
|
+
css_width = img_width # / device_pixel_ratio
|
|
154
|
+
# Much smaller scaling - 1% of CSS viewport width, max 16px to prevent huge highlights
|
|
155
|
+
base_font_size = max(10, min(20, int(css_width * 0.01)))
|
|
156
|
+
# Use shared font loading function with caching
|
|
157
|
+
big_font = get_cross_platform_font(base_font_size)
|
|
158
|
+
if big_font is None:
|
|
159
|
+
big_font = font # Fallback to original font if no system fonts found
|
|
160
|
+
|
|
161
|
+
# Get text size with bigger font
|
|
162
|
+
if big_font:
|
|
163
|
+
bbox_text = draw.textbbox((0, 0), text, font=big_font)
|
|
164
|
+
text_width = bbox_text[2] - bbox_text[0]
|
|
165
|
+
text_height = bbox_text[3] - bbox_text[1]
|
|
166
|
+
else:
|
|
167
|
+
# Fallback for default font
|
|
168
|
+
bbox_text = draw.textbbox((0, 0), text)
|
|
169
|
+
text_width = bbox_text[2] - bbox_text[0]
|
|
170
|
+
text_height = bbox_text[3] - bbox_text[1]
|
|
171
|
+
|
|
172
|
+
# Scale padding appropriately for different resolutions
|
|
173
|
+
padding = max(4, min(10, int(css_width * 0.005))) # 0.3% of CSS width, max 4px
|
|
174
|
+
element_width = x2 - x1
|
|
175
|
+
element_height = y2 - y1
|
|
176
|
+
|
|
177
|
+
# Container dimensions
|
|
178
|
+
container_width = text_width + padding * 2
|
|
179
|
+
container_height = text_height + padding * 2
|
|
180
|
+
|
|
181
|
+
# Position in top center - for small elements, place further up to avoid blocking content
|
|
182
|
+
# Center horizontally within the element
|
|
183
|
+
bg_x1 = x1 + (element_width - container_width) // 2
|
|
184
|
+
|
|
185
|
+
# Simple rule: if element is small, place index further up to avoid blocking icons
|
|
186
|
+
if element_width < 60 or element_height < 30:
|
|
187
|
+
# Small element: place well above to avoid blocking content
|
|
188
|
+
bg_y1 = max(0, y1 - container_height - 5)
|
|
189
|
+
else:
|
|
190
|
+
# Regular element: place inside with small offset
|
|
191
|
+
bg_y1 = y1 + 2
|
|
192
|
+
|
|
193
|
+
bg_x2 = bg_x1 + container_width
|
|
194
|
+
bg_y2 = bg_y1 + container_height
|
|
195
|
+
|
|
196
|
+
# Center the number within the index box with proper baseline handling
|
|
197
|
+
text_x = bg_x1 + (container_width - text_width) // 2
|
|
198
|
+
# Add extra vertical space to prevent clipping
|
|
199
|
+
text_y = bg_y1 + (container_height - text_height) // 2 - bbox_text[1] # Subtract top offset
|
|
200
|
+
|
|
201
|
+
# Ensure container stays within image bounds
|
|
202
|
+
img_width, img_height = image_size
|
|
203
|
+
if bg_x1 < 0:
|
|
204
|
+
offset = -bg_x1
|
|
205
|
+
bg_x1 += offset
|
|
206
|
+
bg_x2 += offset
|
|
207
|
+
text_x += offset
|
|
208
|
+
if bg_y1 < 0:
|
|
209
|
+
offset = -bg_y1
|
|
210
|
+
bg_y1 += offset
|
|
211
|
+
bg_y2 += offset
|
|
212
|
+
text_y += offset
|
|
213
|
+
if bg_x2 > img_width:
|
|
214
|
+
offset = bg_x2 - img_width
|
|
215
|
+
bg_x1 -= offset
|
|
216
|
+
bg_x2 -= offset
|
|
217
|
+
text_x -= offset
|
|
218
|
+
if bg_y2 > img_height:
|
|
219
|
+
offset = bg_y2 - img_height
|
|
220
|
+
bg_y1 -= offset
|
|
221
|
+
bg_y2 -= offset
|
|
222
|
+
text_y -= offset
|
|
223
|
+
|
|
224
|
+
# Draw bigger background rectangle with thicker border
|
|
225
|
+
draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill=color, outline='white', width=2)
|
|
226
|
+
|
|
227
|
+
# Draw white text centered in the index box
|
|
228
|
+
draw.text((text_x, text_y), text, fill='white', font=big_font or font)
|
|
229
|
+
|
|
230
|
+
except Exception as e:
|
|
231
|
+
logger.debug(f'Failed to draw enhanced text overlay: {e}')
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def draw_bounding_box_with_text(
|
|
235
|
+
draw, # ImageDraw.Draw - avoiding type annotation due to PIL typing issues
|
|
236
|
+
bbox: tuple[int, int, int, int],
|
|
237
|
+
color: str,
|
|
238
|
+
text: str | None = None,
|
|
239
|
+
font: ImageFont.FreeTypeFont | None = None,
|
|
240
|
+
) -> None:
|
|
241
|
+
"""Draw a bounding box with optional text overlay."""
|
|
242
|
+
x1, y1, x2, y2 = bbox
|
|
243
|
+
|
|
244
|
+
# Draw dashed bounding box
|
|
245
|
+
dash_length = 2
|
|
246
|
+
gap_length = 6
|
|
247
|
+
|
|
248
|
+
# Top edge
|
|
249
|
+
x = x1
|
|
250
|
+
while x < x2:
|
|
251
|
+
end_x = min(x + dash_length, x2)
|
|
252
|
+
draw.line([(x, y1), (end_x, y1)], fill=color, width=2)
|
|
253
|
+
draw.line([(x, y1 + 1), (end_x, y1 + 1)], fill=color, width=2)
|
|
254
|
+
x += dash_length + gap_length
|
|
255
|
+
|
|
256
|
+
# Bottom edge
|
|
257
|
+
x = x1
|
|
258
|
+
while x < x2:
|
|
259
|
+
end_x = min(x + dash_length, x2)
|
|
260
|
+
draw.line([(x, y2), (end_x, y2)], fill=color, width=2)
|
|
261
|
+
draw.line([(x, y2 - 1), (end_x, y2 - 1)], fill=color, width=2)
|
|
262
|
+
x += dash_length + gap_length
|
|
263
|
+
|
|
264
|
+
# Left edge
|
|
265
|
+
y = y1
|
|
266
|
+
while y < y2:
|
|
267
|
+
end_y = min(y + dash_length, y2)
|
|
268
|
+
draw.line([(x1, y), (x1, end_y)], fill=color, width=2)
|
|
269
|
+
draw.line([(x1 + 1, y), (x1 + 1, end_y)], fill=color, width=2)
|
|
270
|
+
y += dash_length + gap_length
|
|
271
|
+
|
|
272
|
+
# Right edge
|
|
273
|
+
y = y1
|
|
274
|
+
while y < y2:
|
|
275
|
+
end_y = min(y + dash_length, y2)
|
|
276
|
+
draw.line([(x2, y), (x2, end_y)], fill=color, width=2)
|
|
277
|
+
draw.line([(x2 - 1, y), (x2 - 1, end_y)], fill=color, width=2)
|
|
278
|
+
y += dash_length + gap_length
|
|
279
|
+
|
|
280
|
+
# Draw index overlay if we have index text
|
|
281
|
+
if text:
|
|
282
|
+
try:
|
|
283
|
+
# Get text size
|
|
284
|
+
if font:
|
|
285
|
+
bbox_text = draw.textbbox((0, 0), text, font=font)
|
|
286
|
+
text_width = bbox_text[2] - bbox_text[0]
|
|
287
|
+
text_height = bbox_text[3] - bbox_text[1]
|
|
288
|
+
else:
|
|
289
|
+
# Fallback for default font
|
|
290
|
+
bbox_text = draw.textbbox((0, 0), text)
|
|
291
|
+
text_width = bbox_text[2] - bbox_text[0]
|
|
292
|
+
text_height = bbox_text[3] - bbox_text[1]
|
|
293
|
+
|
|
294
|
+
# Smart positioning based on element size
|
|
295
|
+
padding = 5
|
|
296
|
+
element_width = x2 - x1
|
|
297
|
+
element_height = y2 - y1
|
|
298
|
+
element_area = element_width * element_height
|
|
299
|
+
index_box_area = (text_width + padding * 2) * (text_height + padding * 2)
|
|
300
|
+
|
|
301
|
+
# Calculate size ratio to determine positioning strategy
|
|
302
|
+
size_ratio = element_area / max(index_box_area, 1)
|
|
303
|
+
|
|
304
|
+
if size_ratio < 4:
|
|
305
|
+
# Very small elements: place outside in bottom-right corner
|
|
306
|
+
text_x = x2 + padding
|
|
307
|
+
text_y = y2 - text_height
|
|
308
|
+
# Ensure it doesn't go off screen
|
|
309
|
+
text_x = min(text_x, 1200 - text_width - padding)
|
|
310
|
+
text_y = max(text_y, 0)
|
|
311
|
+
elif size_ratio < 16:
|
|
312
|
+
# Medium elements: place in bottom-right corner inside
|
|
313
|
+
text_x = x2 - text_width - padding
|
|
314
|
+
text_y = y2 - text_height - padding
|
|
315
|
+
else:
|
|
316
|
+
# Large elements: place in center
|
|
317
|
+
text_x = x1 + (element_width - text_width) // 2
|
|
318
|
+
text_y = y1 + (element_height - text_height) // 2
|
|
319
|
+
|
|
320
|
+
# Ensure text stays within bounds
|
|
321
|
+
text_x = max(0, min(text_x, 1200 - text_width))
|
|
322
|
+
text_y = max(0, min(text_y, 800 - text_height))
|
|
323
|
+
|
|
324
|
+
# Draw background rectangle for maximum contrast
|
|
325
|
+
bg_x1 = text_x - padding
|
|
326
|
+
bg_y1 = text_y - padding
|
|
327
|
+
bg_x2 = text_x + text_width + padding
|
|
328
|
+
bg_y2 = text_y + text_height + padding
|
|
329
|
+
|
|
330
|
+
# Use white background with thick black border for maximum visibility
|
|
331
|
+
draw.rectangle([bg_x1, bg_y1, bg_x2, bg_y2], fill='white', outline='black', width=2)
|
|
332
|
+
|
|
333
|
+
# Draw bold dark text on light background for best contrast
|
|
334
|
+
draw.text((text_x, text_y), text, fill='black', font=font)
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
logger.debug(f'Failed to draw text overlay: {e}')
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def process_element_highlight(
|
|
341
|
+
element_id: int,
|
|
342
|
+
element: EnhancedDOMTreeNode,
|
|
343
|
+
draw,
|
|
344
|
+
device_pixel_ratio: float,
|
|
345
|
+
font,
|
|
346
|
+
filter_highlight_ids: bool,
|
|
347
|
+
image_size: tuple[int, int],
|
|
348
|
+
) -> None:
|
|
349
|
+
"""Process a single element for highlighting."""
|
|
350
|
+
try:
|
|
351
|
+
# Use absolute_position coordinates directly
|
|
352
|
+
if not element.absolute_position:
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
bounds = element.absolute_position
|
|
356
|
+
|
|
357
|
+
# Scale coordinates from CSS pixels to device pixels for screenshot
|
|
358
|
+
# The screenshot is captured at device pixel resolution, but coordinates are in CSS pixels
|
|
359
|
+
x1 = int(bounds.x * device_pixel_ratio)
|
|
360
|
+
y1 = int(bounds.y * device_pixel_ratio)
|
|
361
|
+
x2 = int((bounds.x + bounds.width) * device_pixel_ratio)
|
|
362
|
+
y2 = int((bounds.y + bounds.height) * device_pixel_ratio)
|
|
363
|
+
|
|
364
|
+
# Ensure coordinates are within image bounds
|
|
365
|
+
img_width, img_height = image_size
|
|
366
|
+
x1 = max(0, min(x1, img_width))
|
|
367
|
+
y1 = max(0, min(y1, img_height))
|
|
368
|
+
x2 = max(x1, min(x2, img_width))
|
|
369
|
+
y2 = max(y1, min(y2, img_height))
|
|
370
|
+
|
|
371
|
+
# Skip if bounding box is too small or invalid
|
|
372
|
+
if x2 - x1 < 2 or y2 - y1 < 2:
|
|
373
|
+
return
|
|
374
|
+
|
|
375
|
+
# Get element color based on type
|
|
376
|
+
tag_name = element.tag_name if hasattr(element, 'tag_name') else 'div'
|
|
377
|
+
element_type = None
|
|
378
|
+
if hasattr(element, 'attributes') and element.attributes:
|
|
379
|
+
element_type = element.attributes.get('type')
|
|
380
|
+
|
|
381
|
+
color = get_element_color(tag_name, element_type)
|
|
382
|
+
|
|
383
|
+
# Get element index for overlay and apply filtering
|
|
384
|
+
backend_node_id = getattr(element, 'backend_node_id', None)
|
|
385
|
+
index_text = None
|
|
386
|
+
|
|
387
|
+
if backend_node_id is not None:
|
|
388
|
+
if filter_highlight_ids:
|
|
389
|
+
# Use the meaningful text that matches what the LLM sees
|
|
390
|
+
meaningful_text = element.get_meaningful_text_for_llm()
|
|
391
|
+
# Show ID only if meaningful text is less than 5 characters
|
|
392
|
+
if len(meaningful_text) < 3:
|
|
393
|
+
index_text = str(backend_node_id)
|
|
394
|
+
else:
|
|
395
|
+
# Always show ID when filter is disabled
|
|
396
|
+
index_text = str(backend_node_id)
|
|
397
|
+
|
|
398
|
+
# Draw enhanced bounding box with bigger index
|
|
399
|
+
draw_enhanced_bounding_box_with_text(
|
|
400
|
+
draw, (x1, y1, x2, y2), color, index_text, font, tag_name, image_size, device_pixel_ratio
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
except Exception as e:
|
|
404
|
+
logger.debug(f'Failed to draw highlight for element {element_id}: {e}')
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='create_highlighted_screenshot')
|
|
408
|
+
@time_execution_async('create_highlighted_screenshot')
|
|
409
|
+
async def create_highlighted_screenshot(
|
|
410
|
+
screenshot_b64: str,
|
|
411
|
+
selector_map: DOMSelectorMap,
|
|
412
|
+
device_pixel_ratio: float = 1.0,
|
|
413
|
+
viewport_offset_x: int = 0,
|
|
414
|
+
viewport_offset_y: int = 0,
|
|
415
|
+
filter_highlight_ids: bool = True,
|
|
416
|
+
) -> str:
|
|
417
|
+
"""Create a highlighted screenshot with bounding boxes around interactive elements.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
screenshot_b64: Base64 encoded screenshot
|
|
421
|
+
selector_map: Map of interactive elements with their positions
|
|
422
|
+
device_pixel_ratio: Device pixel ratio for scaling coordinates
|
|
423
|
+
viewport_offset_x: X offset for viewport positioning
|
|
424
|
+
viewport_offset_y: Y offset for viewport positioning
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
Base64 encoded highlighted screenshot
|
|
428
|
+
"""
|
|
429
|
+
try:
|
|
430
|
+
# Decode screenshot
|
|
431
|
+
screenshot_data = base64.b64decode(screenshot_b64)
|
|
432
|
+
image = Image.open(io.BytesIO(screenshot_data)).convert('RGBA')
|
|
433
|
+
|
|
434
|
+
# Create drawing context
|
|
435
|
+
draw = ImageDraw.Draw(image)
|
|
436
|
+
|
|
437
|
+
# Load font using shared function with caching
|
|
438
|
+
font = get_cross_platform_font(12)
|
|
439
|
+
# If no system fonts found, font remains None and will use default font
|
|
440
|
+
|
|
441
|
+
# Process elements sequentially to avoid ImageDraw thread safety issues
|
|
442
|
+
# PIL ImageDraw is not thread-safe, so we process elements one by one
|
|
443
|
+
for element_id, element in selector_map.items():
|
|
444
|
+
process_element_highlight(element_id, element, draw, device_pixel_ratio, font, filter_highlight_ids, image.size)
|
|
445
|
+
|
|
446
|
+
# Convert back to base64
|
|
447
|
+
output_buffer = io.BytesIO()
|
|
448
|
+
try:
|
|
449
|
+
image.save(output_buffer, format='PNG')
|
|
450
|
+
output_buffer.seek(0)
|
|
451
|
+
highlighted_b64 = base64.b64encode(output_buffer.getvalue()).decode('utf-8')
|
|
452
|
+
|
|
453
|
+
logger.debug(f'Successfully created highlighted screenshot with {len(selector_map)} elements')
|
|
454
|
+
return highlighted_b64
|
|
455
|
+
finally:
|
|
456
|
+
# Explicit cleanup to prevent memory leaks
|
|
457
|
+
output_buffer.close()
|
|
458
|
+
if 'image' in locals():
|
|
459
|
+
image.close()
|
|
460
|
+
|
|
461
|
+
except Exception as e:
|
|
462
|
+
logger.error(f'Failed to create highlighted screenshot: {e}')
|
|
463
|
+
# Clean up on error as well
|
|
464
|
+
if 'image' in locals():
|
|
465
|
+
image.close()
|
|
466
|
+
# Return original screenshot on error
|
|
467
|
+
return screenshot_b64
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
async def get_viewport_info_from_cdp(cdp_session) -> tuple[float, int, int]:
|
|
471
|
+
"""Get viewport information from CDP session.
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
Tuple of (device_pixel_ratio, scroll_x, scroll_y)
|
|
475
|
+
"""
|
|
476
|
+
try:
|
|
477
|
+
# Get layout metrics which includes viewport info and device pixel ratio
|
|
478
|
+
metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
|
|
479
|
+
|
|
480
|
+
# Extract viewport information
|
|
481
|
+
visual_viewport = metrics.get('visualViewport', {})
|
|
482
|
+
css_visual_viewport = metrics.get('cssVisualViewport', {})
|
|
483
|
+
css_layout_viewport = metrics.get('cssLayoutViewport', {})
|
|
484
|
+
|
|
485
|
+
# Calculate device pixel ratio
|
|
486
|
+
css_width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1280.0))
|
|
487
|
+
device_width = visual_viewport.get('clientWidth', css_width)
|
|
488
|
+
device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
|
|
489
|
+
|
|
490
|
+
# Get scroll position in CSS pixels
|
|
491
|
+
scroll_x = int(css_visual_viewport.get('pageX', 0))
|
|
492
|
+
scroll_y = int(css_visual_viewport.get('pageY', 0))
|
|
493
|
+
|
|
494
|
+
return float(device_pixel_ratio), scroll_x, scroll_y
|
|
495
|
+
|
|
496
|
+
except Exception as e:
|
|
497
|
+
logger.debug(f'Failed to get viewport info from CDP: {e}')
|
|
498
|
+
return 1.0, 0, 0
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
@time_execution_async('create_highlighted_screenshot_async')
|
|
502
|
+
async def create_highlighted_screenshot_async(
|
|
503
|
+
screenshot_b64: str, selector_map: DOMSelectorMap, cdp_session=None, filter_highlight_ids: bool = True
|
|
504
|
+
) -> str:
|
|
505
|
+
"""Async wrapper for creating highlighted screenshots.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
screenshot_b64: Base64 encoded screenshot
|
|
509
|
+
selector_map: Map of interactive elements
|
|
510
|
+
cdp_session: CDP session for getting viewport info
|
|
511
|
+
filter_highlight_ids: Whether to filter element IDs based on meaningful text
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
Base64 encoded highlighted screenshot
|
|
515
|
+
"""
|
|
516
|
+
# Get viewport information if CDP session is available
|
|
517
|
+
device_pixel_ratio = 1.0
|
|
518
|
+
viewport_offset_x = 0
|
|
519
|
+
viewport_offset_y = 0
|
|
520
|
+
|
|
521
|
+
if cdp_session:
|
|
522
|
+
try:
|
|
523
|
+
device_pixel_ratio, viewport_offset_x, viewport_offset_y = await get_viewport_info_from_cdp(cdp_session)
|
|
524
|
+
except Exception as e:
|
|
525
|
+
logger.debug(f'Failed to get viewport info from CDP: {e}')
|
|
526
|
+
|
|
527
|
+
# Create highlighted screenshot with async processing
|
|
528
|
+
final_screenshot = await create_highlighted_screenshot(
|
|
529
|
+
screenshot_b64, selector_map, device_pixel_ratio, viewport_offset_x, viewport_offset_y, filter_highlight_ids
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
filename = os.getenv('BROWSER_USE_SCREENSHOT_FILE')
|
|
533
|
+
if filename:
|
|
534
|
+
|
|
535
|
+
def _write_screenshot():
|
|
536
|
+
try:
|
|
537
|
+
with open(filename, 'wb') as f:
|
|
538
|
+
f.write(base64.b64decode(final_screenshot))
|
|
539
|
+
logger.debug('Saved screenshot to ' + str(filename))
|
|
540
|
+
except Exception as e:
|
|
541
|
+
logger.warning(f'Failed to save screenshot to {filename}: {e}')
|
|
542
|
+
|
|
543
|
+
await asyncio.to_thread(_write_screenshot)
|
|
544
|
+
return final_screenshot
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
# Export the cleanup function for external use in long-running applications
|
|
548
|
+
__all__ = ['create_highlighted_screenshot', 'create_highlighted_screenshot_async', 'cleanup_font_cache']
|