optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,825 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from cdp_use.cdp.accessibility.commands import GetFullAXTreeReturns
|
|
7
|
+
from cdp_use.cdp.accessibility.types import AXNode
|
|
8
|
+
from cdp_use.cdp.dom.types import Node
|
|
9
|
+
from cdp_use.cdp.target import TargetID
|
|
10
|
+
|
|
11
|
+
from browser_use.dom.enhanced_snapshot import (
|
|
12
|
+
REQUIRED_COMPUTED_STYLES,
|
|
13
|
+
build_snapshot_lookup,
|
|
14
|
+
)
|
|
15
|
+
from browser_use.dom.serializer.serializer import DOMTreeSerializer
|
|
16
|
+
from browser_use.dom.views import (
|
|
17
|
+
CurrentPageTargets,
|
|
18
|
+
DOMRect,
|
|
19
|
+
EnhancedAXNode,
|
|
20
|
+
EnhancedAXProperty,
|
|
21
|
+
EnhancedDOMTreeNode,
|
|
22
|
+
NodeType,
|
|
23
|
+
SerializedDOMState,
|
|
24
|
+
TargetAllTrees,
|
|
25
|
+
)
|
|
26
|
+
from browser_use.observability import observe_debug
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from browser_use.browser.session import BrowserSession
|
|
30
|
+
|
|
31
|
+
# Note: iframe limits are now configurable via BrowserProfile.max_iframes and BrowserProfile.max_iframe_depth
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DomService:
|
|
35
|
+
"""
|
|
36
|
+
Service for getting the DOM tree and other DOM-related information.
|
|
37
|
+
|
|
38
|
+
Either browser or page must be provided.
|
|
39
|
+
|
|
40
|
+
TODO: currently we start a new websocket connection PER STEP, we should definitely keep this persistent
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
logger: logging.Logger
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
browser_session: 'BrowserSession',
|
|
48
|
+
logger: logging.Logger | None = None,
|
|
49
|
+
cross_origin_iframes: bool = False,
|
|
50
|
+
paint_order_filtering: bool = True,
|
|
51
|
+
max_iframes: int = 100,
|
|
52
|
+
max_iframe_depth: int = 5,
|
|
53
|
+
):
|
|
54
|
+
self.browser_session = browser_session
|
|
55
|
+
self.logger = logger or browser_session.logger
|
|
56
|
+
self.cross_origin_iframes = cross_origin_iframes
|
|
57
|
+
self.paint_order_filtering = paint_order_filtering
|
|
58
|
+
self.max_iframes = max_iframes
|
|
59
|
+
self.max_iframe_depth = max_iframe_depth
|
|
60
|
+
|
|
61
|
+
async def __aenter__(self):
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
65
|
+
pass # no need to cleanup anything, browser_session auto handles cleaning up session cache
|
|
66
|
+
|
|
67
|
+
async def _get_targets_for_page(self, target_id: TargetID | None = None) -> CurrentPageTargets:
|
|
68
|
+
"""Get the target info for a specific page.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
target_id: The target ID to get info for. If None, uses current_target_id.
|
|
72
|
+
"""
|
|
73
|
+
targets = await self.browser_session.cdp_client.send.Target.getTargets()
|
|
74
|
+
|
|
75
|
+
# Use provided target_id or fall back to current_target_id
|
|
76
|
+
if target_id is None:
|
|
77
|
+
target_id = self.browser_session.current_target_id
|
|
78
|
+
if not target_id:
|
|
79
|
+
raise ValueError('No current target ID set in browser session')
|
|
80
|
+
|
|
81
|
+
# Find main page target by ID
|
|
82
|
+
main_target = next((t for t in targets['targetInfos'] if t['targetId'] == target_id), None)
|
|
83
|
+
|
|
84
|
+
if not main_target:
|
|
85
|
+
raise ValueError(f'No target found for target ID: {target_id}')
|
|
86
|
+
|
|
87
|
+
# Get all frames using the new method to find iframe targets for this page
|
|
88
|
+
all_frames, _ = await self.browser_session.get_all_frames()
|
|
89
|
+
|
|
90
|
+
# Find iframe targets that are children of this target
|
|
91
|
+
iframe_targets = []
|
|
92
|
+
for frame_info in all_frames.values():
|
|
93
|
+
# Check if this frame is a cross-origin iframe with its own target
|
|
94
|
+
if frame_info.get('isCrossOrigin') and frame_info.get('frameTargetId'):
|
|
95
|
+
# Check if this frame belongs to our target
|
|
96
|
+
parent_target = frame_info.get('parentTargetId', frame_info.get('frameTargetId'))
|
|
97
|
+
if parent_target == target_id:
|
|
98
|
+
# Find the target info for this iframe
|
|
99
|
+
iframe_target = next(
|
|
100
|
+
(t for t in targets['targetInfos'] if t['targetId'] == frame_info['frameTargetId']), None
|
|
101
|
+
)
|
|
102
|
+
if iframe_target:
|
|
103
|
+
iframe_targets.append(iframe_target)
|
|
104
|
+
|
|
105
|
+
return CurrentPageTargets(
|
|
106
|
+
page_session=main_target,
|
|
107
|
+
iframe_sessions=iframe_targets,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def _build_enhanced_ax_node(self, ax_node: AXNode) -> EnhancedAXNode:
|
|
111
|
+
properties: list[EnhancedAXProperty] | None = None
|
|
112
|
+
if 'properties' in ax_node and ax_node['properties']:
|
|
113
|
+
properties = []
|
|
114
|
+
for property in ax_node['properties']:
|
|
115
|
+
try:
|
|
116
|
+
# test whether property name can go into the enum (sometimes Chrome returns some random properties)
|
|
117
|
+
properties.append(
|
|
118
|
+
EnhancedAXProperty(
|
|
119
|
+
name=property['name'],
|
|
120
|
+
value=property.get('value', {}).get('value', None),
|
|
121
|
+
# related_nodes=[], # TODO: add related nodes
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
except ValueError:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
enhanced_ax_node = EnhancedAXNode(
|
|
128
|
+
ax_node_id=ax_node['nodeId'],
|
|
129
|
+
ignored=ax_node['ignored'],
|
|
130
|
+
role=ax_node.get('role', {}).get('value', None),
|
|
131
|
+
name=ax_node.get('name', {}).get('value', None),
|
|
132
|
+
description=ax_node.get('description', {}).get('value', None),
|
|
133
|
+
properties=properties,
|
|
134
|
+
child_ids=ax_node.get('childIds', []) if ax_node.get('childIds') else None,
|
|
135
|
+
)
|
|
136
|
+
return enhanced_ax_node
|
|
137
|
+
|
|
138
|
+
async def _get_viewport_ratio(self, target_id: TargetID) -> float:
|
|
139
|
+
"""Get viewport dimensions, device pixel ratio, and scroll position using CDP."""
|
|
140
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=target_id, focus=False)
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# Get the layout metrics which includes the visual viewport
|
|
144
|
+
metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
|
|
145
|
+
|
|
146
|
+
visual_viewport = metrics.get('visualViewport', {})
|
|
147
|
+
|
|
148
|
+
# IMPORTANT: Use CSS viewport instead of device pixel viewport
|
|
149
|
+
# This fixes the coordinate mismatch on high-DPI displays
|
|
150
|
+
css_visual_viewport = metrics.get('cssVisualViewport', {})
|
|
151
|
+
css_layout_viewport = metrics.get('cssLayoutViewport', {})
|
|
152
|
+
|
|
153
|
+
# Use CSS pixels (what JavaScript sees) instead of device pixels
|
|
154
|
+
width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1920.0))
|
|
155
|
+
|
|
156
|
+
# Calculate device pixel ratio
|
|
157
|
+
device_width = visual_viewport.get('clientWidth', width)
|
|
158
|
+
css_width = css_visual_viewport.get('clientWidth', width)
|
|
159
|
+
device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
|
|
160
|
+
|
|
161
|
+
return float(device_pixel_ratio)
|
|
162
|
+
except Exception as e:
|
|
163
|
+
self.logger.debug(f'Viewport size detection failed: {e}')
|
|
164
|
+
# Fallback to default viewport size
|
|
165
|
+
return 1.0
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def is_element_visible_according_to_all_parents(
|
|
169
|
+
cls, node: EnhancedDOMTreeNode, html_frames: list[EnhancedDOMTreeNode]
|
|
170
|
+
) -> bool:
|
|
171
|
+
"""Check if the element is visible according to all its parent HTML frames."""
|
|
172
|
+
|
|
173
|
+
if not node.snapshot_node:
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
computed_styles = node.snapshot_node.computed_styles or {}
|
|
177
|
+
|
|
178
|
+
display = computed_styles.get('display', '').lower()
|
|
179
|
+
visibility = computed_styles.get('visibility', '').lower()
|
|
180
|
+
opacity = computed_styles.get('opacity', '1')
|
|
181
|
+
|
|
182
|
+
if display == 'none' or visibility == 'hidden':
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
if float(opacity) <= 0:
|
|
187
|
+
return False
|
|
188
|
+
except (ValueError, TypeError):
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
# Start with the element's local bounds (in its own frame's coordinate system)
|
|
192
|
+
current_bounds = node.snapshot_node.bounds
|
|
193
|
+
|
|
194
|
+
if not current_bounds:
|
|
195
|
+
return False # If there are no bounds, the element is not visible
|
|
196
|
+
|
|
197
|
+
"""
|
|
198
|
+
Reverse iterate through the html frames (that can be either iframe or document -> if it's a document frame compare if the current bounds interest with it (taking scroll into account) otherwise move the current bounds by the iframe offset)
|
|
199
|
+
"""
|
|
200
|
+
for frame in reversed(html_frames):
|
|
201
|
+
if (
|
|
202
|
+
frame.node_type == NodeType.ELEMENT_NODE
|
|
203
|
+
and (frame.node_name.upper() == 'IFRAME' or frame.node_name.upper() == 'FRAME')
|
|
204
|
+
and frame.snapshot_node
|
|
205
|
+
and frame.snapshot_node.bounds
|
|
206
|
+
):
|
|
207
|
+
iframe_bounds = frame.snapshot_node.bounds
|
|
208
|
+
|
|
209
|
+
# negate the values added in `_construct_enhanced_node`
|
|
210
|
+
current_bounds.x += iframe_bounds.x
|
|
211
|
+
current_bounds.y += iframe_bounds.y
|
|
212
|
+
|
|
213
|
+
if (
|
|
214
|
+
frame.node_type == NodeType.ELEMENT_NODE
|
|
215
|
+
and frame.node_name == 'HTML'
|
|
216
|
+
and frame.snapshot_node
|
|
217
|
+
and frame.snapshot_node.scrollRects
|
|
218
|
+
and frame.snapshot_node.clientRects
|
|
219
|
+
):
|
|
220
|
+
# For iframe content, we need to check visibility within the iframe's viewport
|
|
221
|
+
# The scrollRects represent the current scroll position
|
|
222
|
+
# The clientRects represent the viewport size
|
|
223
|
+
# Elements are visible if they fall within the viewport after accounting for scroll
|
|
224
|
+
|
|
225
|
+
# The viewport of the frame (what's actually visible)
|
|
226
|
+
viewport_left = 0 # Viewport always starts at 0 in frame coordinates
|
|
227
|
+
viewport_top = 0
|
|
228
|
+
viewport_right = frame.snapshot_node.clientRects.width
|
|
229
|
+
viewport_bottom = frame.snapshot_node.clientRects.height
|
|
230
|
+
|
|
231
|
+
# Adjust element bounds by the scroll offset to get position relative to viewport
|
|
232
|
+
# When scrolled down, scrollRects.y is positive, so we subtract it from element's y
|
|
233
|
+
adjusted_x = current_bounds.x - frame.snapshot_node.scrollRects.x
|
|
234
|
+
adjusted_y = current_bounds.y - frame.snapshot_node.scrollRects.y
|
|
235
|
+
|
|
236
|
+
frame_intersects = (
|
|
237
|
+
adjusted_x < viewport_right
|
|
238
|
+
and adjusted_x + current_bounds.width > viewport_left
|
|
239
|
+
and adjusted_y < viewport_bottom + 1000
|
|
240
|
+
and adjusted_y + current_bounds.height > viewport_top - 1000
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if not frame_intersects:
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
# Keep the original coordinate adjustment to maintain consistency
|
|
247
|
+
# This adjustment is needed for proper coordinate transformation
|
|
248
|
+
current_bounds.x -= frame.snapshot_node.scrollRects.x
|
|
249
|
+
current_bounds.y -= frame.snapshot_node.scrollRects.y
|
|
250
|
+
|
|
251
|
+
# If we reach here, element is visible in main viewport and all containing iframes
|
|
252
|
+
return True
|
|
253
|
+
|
|
254
|
+
async def _get_ax_tree_for_all_frames(self, target_id: TargetID) -> GetFullAXTreeReturns:
|
|
255
|
+
"""Recursively collect all frames and merge their accessibility trees into a single array."""
|
|
256
|
+
|
|
257
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=target_id, focus=False)
|
|
258
|
+
frame_tree = await cdp_session.cdp_client.send.Page.getFrameTree(session_id=cdp_session.session_id)
|
|
259
|
+
|
|
260
|
+
def collect_all_frame_ids(frame_tree_node) -> list[str]:
|
|
261
|
+
"""Recursively collect all frame IDs from the frame tree."""
|
|
262
|
+
frame_ids = [frame_tree_node['frame']['id']]
|
|
263
|
+
|
|
264
|
+
if 'childFrames' in frame_tree_node and frame_tree_node['childFrames']:
|
|
265
|
+
for child_frame in frame_tree_node['childFrames']:
|
|
266
|
+
frame_ids.extend(collect_all_frame_ids(child_frame))
|
|
267
|
+
|
|
268
|
+
return frame_ids
|
|
269
|
+
|
|
270
|
+
# Collect all frame IDs recursively
|
|
271
|
+
all_frame_ids = collect_all_frame_ids(frame_tree['frameTree'])
|
|
272
|
+
|
|
273
|
+
# Get accessibility tree for each frame
|
|
274
|
+
ax_tree_requests = []
|
|
275
|
+
for frame_id in all_frame_ids:
|
|
276
|
+
ax_tree_request = cdp_session.cdp_client.send.Accessibility.getFullAXTree(
|
|
277
|
+
params={'frameId': frame_id}, session_id=cdp_session.session_id
|
|
278
|
+
)
|
|
279
|
+
ax_tree_requests.append(ax_tree_request)
|
|
280
|
+
|
|
281
|
+
# Wait for all requests to complete
|
|
282
|
+
ax_trees = await asyncio.gather(*ax_tree_requests)
|
|
283
|
+
|
|
284
|
+
# Merge all AX nodes into a single array
|
|
285
|
+
merged_nodes: list[AXNode] = []
|
|
286
|
+
for ax_tree in ax_trees:
|
|
287
|
+
merged_nodes.extend(ax_tree['nodes'])
|
|
288
|
+
|
|
289
|
+
return {'nodes': merged_nodes}
|
|
290
|
+
|
|
291
|
+
async def _get_all_trees(self, target_id: TargetID) -> TargetAllTrees:
|
|
292
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=target_id, focus=False)
|
|
293
|
+
|
|
294
|
+
# Wait for the page to be ready first
|
|
295
|
+
try:
|
|
296
|
+
ready_state = await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
297
|
+
params={'expression': 'document.readyState'}, session_id=cdp_session.session_id
|
|
298
|
+
)
|
|
299
|
+
except Exception as e:
|
|
300
|
+
pass # Page might not be ready yet
|
|
301
|
+
# DEBUG: Log before capturing snapshot
|
|
302
|
+
self.logger.debug(f'🔍 DEBUG: Capturing DOM snapshot for target {target_id}')
|
|
303
|
+
|
|
304
|
+
# Get actual scroll positions for all iframes before capturing snapshot
|
|
305
|
+
iframe_scroll_positions = {}
|
|
306
|
+
try:
|
|
307
|
+
scroll_result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
308
|
+
params={
|
|
309
|
+
'expression': """
|
|
310
|
+
(() => {
|
|
311
|
+
const scrollData = {};
|
|
312
|
+
const iframes = document.querySelectorAll('iframe');
|
|
313
|
+
iframes.forEach((iframe, index) => {
|
|
314
|
+
try {
|
|
315
|
+
const doc = iframe.contentDocument || iframe.contentWindow.document;
|
|
316
|
+
if (doc) {
|
|
317
|
+
scrollData[index] = {
|
|
318
|
+
scrollTop: doc.documentElement.scrollTop || doc.body.scrollTop || 0,
|
|
319
|
+
scrollLeft: doc.documentElement.scrollLeft || doc.body.scrollLeft || 0
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
} catch (e) {
|
|
323
|
+
// Cross-origin iframe, can't access
|
|
324
|
+
}
|
|
325
|
+
});
|
|
326
|
+
return scrollData;
|
|
327
|
+
})()
|
|
328
|
+
""",
|
|
329
|
+
'returnByValue': True,
|
|
330
|
+
},
|
|
331
|
+
session_id=cdp_session.session_id,
|
|
332
|
+
)
|
|
333
|
+
if scroll_result and 'result' in scroll_result and 'value' in scroll_result['result']:
|
|
334
|
+
iframe_scroll_positions = scroll_result['result']['value']
|
|
335
|
+
for idx, scroll_data in iframe_scroll_positions.items():
|
|
336
|
+
self.logger.debug(
|
|
337
|
+
f'🔍 DEBUG: Iframe {idx} actual scroll position - scrollTop={scroll_data.get("scrollTop", 0)}, scrollLeft={scroll_data.get("scrollLeft", 0)}'
|
|
338
|
+
)
|
|
339
|
+
except Exception as e:
|
|
340
|
+
self.logger.debug(f'Failed to get iframe scroll positions: {e}')
|
|
341
|
+
|
|
342
|
+
# Define CDP request factories to avoid duplication
|
|
343
|
+
def create_snapshot_request():
|
|
344
|
+
return cdp_session.cdp_client.send.DOMSnapshot.captureSnapshot(
|
|
345
|
+
params={
|
|
346
|
+
'computedStyles': REQUIRED_COMPUTED_STYLES,
|
|
347
|
+
'includePaintOrder': True,
|
|
348
|
+
'includeDOMRects': True,
|
|
349
|
+
'includeBlendedBackgroundColors': False,
|
|
350
|
+
'includeTextColorOpacities': False,
|
|
351
|
+
},
|
|
352
|
+
session_id=cdp_session.session_id,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
def create_dom_tree_request():
|
|
356
|
+
return cdp_session.cdp_client.send.DOM.getDocument(
|
|
357
|
+
params={'depth': -1, 'pierce': True}, session_id=cdp_session.session_id
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
start = time.time()
|
|
361
|
+
|
|
362
|
+
# Create initial tasks
|
|
363
|
+
tasks = {
|
|
364
|
+
'snapshot': asyncio.create_task(create_snapshot_request()),
|
|
365
|
+
'dom_tree': asyncio.create_task(create_dom_tree_request()),
|
|
366
|
+
'ax_tree': asyncio.create_task(self._get_ax_tree_for_all_frames(target_id)),
|
|
367
|
+
'device_pixel_ratio': asyncio.create_task(self._get_viewport_ratio(target_id)),
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
# Wait for all tasks with timeout
|
|
371
|
+
done, pending = await asyncio.wait(tasks.values(), timeout=10.0)
|
|
372
|
+
|
|
373
|
+
# Retry any failed or timed out tasks
|
|
374
|
+
if pending:
|
|
375
|
+
for task in pending:
|
|
376
|
+
task.cancel()
|
|
377
|
+
|
|
378
|
+
# Retry mapping for pending tasks
|
|
379
|
+
retry_map = {
|
|
380
|
+
tasks['snapshot']: lambda: asyncio.create_task(create_snapshot_request()),
|
|
381
|
+
tasks['dom_tree']: lambda: asyncio.create_task(create_dom_tree_request()),
|
|
382
|
+
tasks['ax_tree']: lambda: asyncio.create_task(self._get_ax_tree_for_all_frames(target_id)),
|
|
383
|
+
tasks['device_pixel_ratio']: lambda: asyncio.create_task(self._get_viewport_ratio(target_id)),
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
# Create new tasks only for the ones that didn't complete
|
|
387
|
+
for key, task in tasks.items():
|
|
388
|
+
if task in pending and task in retry_map:
|
|
389
|
+
tasks[key] = retry_map[task]()
|
|
390
|
+
|
|
391
|
+
# Wait again with shorter timeout
|
|
392
|
+
done2, pending2 = await asyncio.wait([t for t in tasks.values() if not t.done()], timeout=2.0)
|
|
393
|
+
|
|
394
|
+
if pending2:
|
|
395
|
+
for task in pending2:
|
|
396
|
+
task.cancel()
|
|
397
|
+
|
|
398
|
+
# Extract results, tracking which ones failed
|
|
399
|
+
results = {}
|
|
400
|
+
failed = []
|
|
401
|
+
for key, task in tasks.items():
|
|
402
|
+
if task.done() and not task.cancelled():
|
|
403
|
+
try:
|
|
404
|
+
results[key] = task.result()
|
|
405
|
+
except Exception as e:
|
|
406
|
+
self.logger.warning(f'CDP request {key} failed with exception: {e}')
|
|
407
|
+
failed.append(key)
|
|
408
|
+
else:
|
|
409
|
+
self.logger.warning(f'CDP request {key} timed out')
|
|
410
|
+
failed.append(key)
|
|
411
|
+
|
|
412
|
+
# If any required tasks failed, raise an exception
|
|
413
|
+
if failed:
|
|
414
|
+
raise TimeoutError(f'CDP requests failed or timed out: {", ".join(failed)}')
|
|
415
|
+
|
|
416
|
+
snapshot = results['snapshot']
|
|
417
|
+
dom_tree = results['dom_tree']
|
|
418
|
+
ax_tree = results['ax_tree']
|
|
419
|
+
device_pixel_ratio = results['device_pixel_ratio']
|
|
420
|
+
end = time.time()
|
|
421
|
+
cdp_timing = {'cdp_calls_total': end - start}
|
|
422
|
+
|
|
423
|
+
# DEBUG: Log snapshot info and limit documents to prevent explosion
|
|
424
|
+
if snapshot and 'documents' in snapshot:
|
|
425
|
+
original_doc_count = len(snapshot['documents'])
|
|
426
|
+
# Limit to max_iframes documents to prevent iframe explosion
|
|
427
|
+
if original_doc_count > self.max_iframes:
|
|
428
|
+
self.logger.warning(
|
|
429
|
+
f'⚠️ Limiting processing of {original_doc_count} iframes on page to only first {self.max_iframes} to prevent crashes!'
|
|
430
|
+
)
|
|
431
|
+
snapshot['documents'] = snapshot['documents'][: self.max_iframes]
|
|
432
|
+
|
|
433
|
+
total_nodes = sum(len(doc.get('nodes', [])) for doc in snapshot['documents'])
|
|
434
|
+
self.logger.debug(f'🔍 DEBUG: Snapshot contains {len(snapshot["documents"])} frames with {total_nodes} total nodes')
|
|
435
|
+
# Log iframe-specific info
|
|
436
|
+
for doc_idx, doc in enumerate(snapshot['documents']):
|
|
437
|
+
if doc_idx > 0: # Not the main document
|
|
438
|
+
self.logger.debug(
|
|
439
|
+
f'🔍 DEBUG: Iframe #{doc_idx} {doc.get("frameId", "no-frame-id")} {doc.get("url", "no-url")} has {len(doc.get("nodes", []))} nodes'
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return TargetAllTrees(
|
|
443
|
+
snapshot=snapshot,
|
|
444
|
+
dom_tree=dom_tree,
|
|
445
|
+
ax_tree=ax_tree,
|
|
446
|
+
device_pixel_ratio=device_pixel_ratio,
|
|
447
|
+
cdp_timing=cdp_timing,
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='get_dom_tree')
|
|
451
|
+
async def get_dom_tree(
|
|
452
|
+
self,
|
|
453
|
+
target_id: TargetID,
|
|
454
|
+
initial_html_frames: list[EnhancedDOMTreeNode] | None = None,
|
|
455
|
+
initial_total_frame_offset: DOMRect | None = None,
|
|
456
|
+
iframe_depth: int = 0,
|
|
457
|
+
) -> EnhancedDOMTreeNode:
|
|
458
|
+
"""Get the DOM tree for a specific target.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
target_id: Target ID of the page to get the DOM tree for.
|
|
462
|
+
initial_html_frames: List of HTML frame nodes encountered so far
|
|
463
|
+
initial_total_frame_offset: Accumulated coordinate offset
|
|
464
|
+
iframe_depth: Current depth of iframe nesting to prevent infinite recursion
|
|
465
|
+
"""
|
|
466
|
+
|
|
467
|
+
trees = await self._get_all_trees(target_id)
|
|
468
|
+
|
|
469
|
+
dom_tree = trees.dom_tree
|
|
470
|
+
ax_tree = trees.ax_tree
|
|
471
|
+
snapshot = trees.snapshot
|
|
472
|
+
device_pixel_ratio = trees.device_pixel_ratio
|
|
473
|
+
|
|
474
|
+
ax_tree_lookup: dict[int, AXNode] = {
|
|
475
|
+
ax_node['backendDOMNodeId']: ax_node for ax_node in ax_tree['nodes'] if 'backendDOMNodeId' in ax_node
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
enhanced_dom_tree_node_lookup: dict[int, EnhancedDOMTreeNode] = {}
|
|
479
|
+
""" NodeId (NOT backend node id) -> enhanced dom tree node""" # way to get the parent/content node
|
|
480
|
+
|
|
481
|
+
# Parse snapshot data with everything calculated upfront
|
|
482
|
+
snapshot_lookup = build_snapshot_lookup(snapshot, device_pixel_ratio)
|
|
483
|
+
|
|
484
|
+
async def _construct_enhanced_node(
|
|
485
|
+
node: Node, html_frames: list[EnhancedDOMTreeNode] | None, total_frame_offset: DOMRect | None
|
|
486
|
+
) -> EnhancedDOMTreeNode:
|
|
487
|
+
"""
|
|
488
|
+
Recursively construct enhanced DOM tree nodes.
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
node: The DOM node to construct
|
|
492
|
+
html_frames: List of HTML frame nodes encountered so far
|
|
493
|
+
accumulated_iframe_offset: Accumulated coordinate translation from parent iframes (includes scroll corrections)
|
|
494
|
+
"""
|
|
495
|
+
|
|
496
|
+
# Initialize lists if not provided
|
|
497
|
+
if html_frames is None:
|
|
498
|
+
html_frames = []
|
|
499
|
+
|
|
500
|
+
# to get rid of the pointer references
|
|
501
|
+
if total_frame_offset is None:
|
|
502
|
+
total_frame_offset = DOMRect(x=0.0, y=0.0, width=0.0, height=0.0)
|
|
503
|
+
else:
|
|
504
|
+
total_frame_offset = DOMRect(
|
|
505
|
+
total_frame_offset.x, total_frame_offset.y, total_frame_offset.width, total_frame_offset.height
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# memoize the mf (I don't know if some nodes are duplicated)
|
|
509
|
+
if node['nodeId'] in enhanced_dom_tree_node_lookup:
|
|
510
|
+
return enhanced_dom_tree_node_lookup[node['nodeId']]
|
|
511
|
+
|
|
512
|
+
ax_node = ax_tree_lookup.get(node['backendNodeId'])
|
|
513
|
+
if ax_node:
|
|
514
|
+
enhanced_ax_node = self._build_enhanced_ax_node(ax_node)
|
|
515
|
+
else:
|
|
516
|
+
enhanced_ax_node = None
|
|
517
|
+
|
|
518
|
+
# To make attributes more readable
|
|
519
|
+
attributes: dict[str, str] | None = None
|
|
520
|
+
if 'attributes' in node and node['attributes']:
|
|
521
|
+
attributes = {}
|
|
522
|
+
for i in range(0, len(node['attributes']), 2):
|
|
523
|
+
attributes[node['attributes'][i]] = node['attributes'][i + 1]
|
|
524
|
+
|
|
525
|
+
shadow_root_type = None
|
|
526
|
+
if 'shadowRootType' in node and node['shadowRootType']:
|
|
527
|
+
try:
|
|
528
|
+
shadow_root_type = node['shadowRootType']
|
|
529
|
+
except ValueError:
|
|
530
|
+
pass
|
|
531
|
+
|
|
532
|
+
# Get snapshot data and calculate absolute position
|
|
533
|
+
snapshot_data = snapshot_lookup.get(node['backendNodeId'], None)
|
|
534
|
+
absolute_position = None
|
|
535
|
+
if snapshot_data and snapshot_data.bounds:
|
|
536
|
+
absolute_position = DOMRect(
|
|
537
|
+
x=snapshot_data.bounds.x + total_frame_offset.x,
|
|
538
|
+
y=snapshot_data.bounds.y + total_frame_offset.y,
|
|
539
|
+
width=snapshot_data.bounds.width,
|
|
540
|
+
height=snapshot_data.bounds.height,
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
dom_tree_node = EnhancedDOMTreeNode(
|
|
544
|
+
node_id=node['nodeId'],
|
|
545
|
+
backend_node_id=node['backendNodeId'],
|
|
546
|
+
node_type=NodeType(node['nodeType']),
|
|
547
|
+
node_name=node['nodeName'],
|
|
548
|
+
node_value=node['nodeValue'],
|
|
549
|
+
attributes=attributes or {},
|
|
550
|
+
is_scrollable=node.get('isScrollable', None),
|
|
551
|
+
frame_id=node.get('frameId', None),
|
|
552
|
+
session_id=self.browser_session.agent_focus.session_id if self.browser_session.agent_focus else None,
|
|
553
|
+
target_id=target_id,
|
|
554
|
+
content_document=None,
|
|
555
|
+
shadow_root_type=shadow_root_type,
|
|
556
|
+
shadow_roots=None,
|
|
557
|
+
parent_node=None,
|
|
558
|
+
children_nodes=None,
|
|
559
|
+
ax_node=enhanced_ax_node,
|
|
560
|
+
snapshot_node=snapshot_data,
|
|
561
|
+
is_visible=None,
|
|
562
|
+
absolute_position=absolute_position,
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
enhanced_dom_tree_node_lookup[node['nodeId']] = dom_tree_node
|
|
566
|
+
|
|
567
|
+
if 'parentId' in node and node['parentId']:
|
|
568
|
+
dom_tree_node.parent_node = enhanced_dom_tree_node_lookup[
|
|
569
|
+
node['parentId']
|
|
570
|
+
] # parents should always be in the lookup
|
|
571
|
+
|
|
572
|
+
# Check if this is an HTML frame node and add it to the list
|
|
573
|
+
updated_html_frames = html_frames.copy()
|
|
574
|
+
if node['nodeType'] == NodeType.ELEMENT_NODE.value and node['nodeName'] == 'HTML' and node.get('frameId') is not None:
|
|
575
|
+
updated_html_frames.append(dom_tree_node)
|
|
576
|
+
|
|
577
|
+
# and adjust the total frame offset by scroll
|
|
578
|
+
if snapshot_data and snapshot_data.scrollRects:
|
|
579
|
+
total_frame_offset.x -= snapshot_data.scrollRects.x
|
|
580
|
+
total_frame_offset.y -= snapshot_data.scrollRects.y
|
|
581
|
+
# DEBUG: Log iframe scroll information
|
|
582
|
+
self.logger.debug(
|
|
583
|
+
f'🔍 DEBUG: HTML frame scroll - scrollY={snapshot_data.scrollRects.y}, scrollX={snapshot_data.scrollRects.x}, frameId={node.get("frameId")}, nodeId={node["nodeId"]}'
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
# Calculate new iframe offset for content documents, accounting for iframe scroll
|
|
587
|
+
if (
|
|
588
|
+
(node['nodeName'].upper() == 'IFRAME' or node['nodeName'].upper() == 'FRAME')
|
|
589
|
+
and snapshot_data
|
|
590
|
+
and snapshot_data.bounds
|
|
591
|
+
):
|
|
592
|
+
if snapshot_data.bounds:
|
|
593
|
+
updated_html_frames.append(dom_tree_node)
|
|
594
|
+
|
|
595
|
+
total_frame_offset.x += snapshot_data.bounds.x
|
|
596
|
+
total_frame_offset.y += snapshot_data.bounds.y
|
|
597
|
+
|
|
598
|
+
if 'contentDocument' in node and node['contentDocument']:
|
|
599
|
+
dom_tree_node.content_document = await _construct_enhanced_node(
|
|
600
|
+
node['contentDocument'], updated_html_frames, total_frame_offset
|
|
601
|
+
)
|
|
602
|
+
dom_tree_node.content_document.parent_node = dom_tree_node
|
|
603
|
+
# forcefully set the parent node to the content document node (helps traverse the tree)
|
|
604
|
+
|
|
605
|
+
if 'shadowRoots' in node and node['shadowRoots']:
|
|
606
|
+
dom_tree_node.shadow_roots = []
|
|
607
|
+
for shadow_root in node['shadowRoots']:
|
|
608
|
+
shadow_root_node = await _construct_enhanced_node(shadow_root, updated_html_frames, total_frame_offset)
|
|
609
|
+
# forcefully set the parent node to the shadow root node (helps traverse the tree)
|
|
610
|
+
shadow_root_node.parent_node = dom_tree_node
|
|
611
|
+
dom_tree_node.shadow_roots.append(shadow_root_node)
|
|
612
|
+
|
|
613
|
+
if 'children' in node and node['children']:
|
|
614
|
+
dom_tree_node.children_nodes = []
|
|
615
|
+
# Build set of shadow root node IDs to filter them out from children
|
|
616
|
+
shadow_root_node_ids = set()
|
|
617
|
+
if 'shadowRoots' in node and node['shadowRoots']:
|
|
618
|
+
for shadow_root in node['shadowRoots']:
|
|
619
|
+
shadow_root_node_ids.add(shadow_root['nodeId'])
|
|
620
|
+
|
|
621
|
+
for child in node['children']:
|
|
622
|
+
# Skip shadow roots - they should only be in shadow_roots list
|
|
623
|
+
if child['nodeId'] in shadow_root_node_ids:
|
|
624
|
+
continue
|
|
625
|
+
dom_tree_node.children_nodes.append(
|
|
626
|
+
await _construct_enhanced_node(child, updated_html_frames, total_frame_offset)
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
# Set visibility using the collected HTML frames
|
|
630
|
+
dom_tree_node.is_visible = self.is_element_visible_according_to_all_parents(dom_tree_node, updated_html_frames)
|
|
631
|
+
|
|
632
|
+
# DEBUG: Log visibility info for form elements in iframes
|
|
633
|
+
if dom_tree_node.tag_name and dom_tree_node.tag_name.upper() in ['INPUT', 'SELECT', 'TEXTAREA', 'LABEL']:
|
|
634
|
+
attrs = dom_tree_node.attributes or {}
|
|
635
|
+
elem_id = attrs.get('id', '')
|
|
636
|
+
elem_name = attrs.get('name', '')
|
|
637
|
+
if (
|
|
638
|
+
'city' in elem_id.lower()
|
|
639
|
+
or 'city' in elem_name.lower()
|
|
640
|
+
or 'state' in elem_id.lower()
|
|
641
|
+
or 'state' in elem_name.lower()
|
|
642
|
+
or 'zip' in elem_id.lower()
|
|
643
|
+
or 'zip' in elem_name.lower()
|
|
644
|
+
):
|
|
645
|
+
self.logger.debug(
|
|
646
|
+
f"🔍 DEBUG: Form element {dom_tree_node.tag_name} id='{elem_id}' name='{elem_name}' - visible={dom_tree_node.is_visible}, bounds={dom_tree_node.snapshot_node.bounds if dom_tree_node.snapshot_node else 'NO_SNAPSHOT'}"
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# handle cross origin iframe (just recursively call the main function with the proper target if it exists in iframes)
|
|
650
|
+
# only do this if the iframe is visible (otherwise it's not worth it)
|
|
651
|
+
|
|
652
|
+
if (
|
|
653
|
+
# TODO: hacky way to disable cross origin iframes for now
|
|
654
|
+
self.cross_origin_iframes and node['nodeName'].upper() == 'IFRAME' and node.get('contentDocument', None) is None
|
|
655
|
+
): # None meaning there is no content
|
|
656
|
+
# Check iframe depth to prevent infinite recursion
|
|
657
|
+
if iframe_depth >= self.max_iframe_depth:
|
|
658
|
+
self.logger.debug(
|
|
659
|
+
f'Skipping iframe at depth {iframe_depth} to prevent infinite recursion (max depth: {self.max_iframe_depth})'
|
|
660
|
+
)
|
|
661
|
+
else:
|
|
662
|
+
# Check if iframe is visible and large enough (>= 50px in both dimensions)
|
|
663
|
+
should_process_iframe = False
|
|
664
|
+
|
|
665
|
+
# First check if the iframe element itself is visible
|
|
666
|
+
if dom_tree_node.is_visible:
|
|
667
|
+
# Check iframe dimensions
|
|
668
|
+
if dom_tree_node.snapshot_node and dom_tree_node.snapshot_node.bounds:
|
|
669
|
+
bounds = dom_tree_node.snapshot_node.bounds
|
|
670
|
+
width = bounds.width
|
|
671
|
+
height = bounds.height
|
|
672
|
+
|
|
673
|
+
# Only process if iframe is at least 50px in both dimensions
|
|
674
|
+
if width >= 50 and height >= 50:
|
|
675
|
+
should_process_iframe = True
|
|
676
|
+
self.logger.debug(f'Processing cross-origin iframe: visible=True, width={width}, height={height}')
|
|
677
|
+
else:
|
|
678
|
+
self.logger.debug(
|
|
679
|
+
f'Skipping small cross-origin iframe: width={width}, height={height} (needs >= 50px)'
|
|
680
|
+
)
|
|
681
|
+
else:
|
|
682
|
+
self.logger.debug('Skipping cross-origin iframe: no bounds available')
|
|
683
|
+
else:
|
|
684
|
+
self.logger.debug('Skipping invisible cross-origin iframe')
|
|
685
|
+
|
|
686
|
+
if should_process_iframe:
|
|
687
|
+
# Use get_all_frames to find the iframe's target
|
|
688
|
+
frame_id = node.get('frameId', None)
|
|
689
|
+
if frame_id:
|
|
690
|
+
all_frames, _ = await self.browser_session.get_all_frames()
|
|
691
|
+
frame_info = all_frames.get(frame_id)
|
|
692
|
+
iframe_document_target = None
|
|
693
|
+
if frame_info and frame_info.get('frameTargetId'):
|
|
694
|
+
# Get the target info for this iframe
|
|
695
|
+
targets = await self.browser_session.cdp_client.send.Target.getTargets()
|
|
696
|
+
iframe_document_target = next(
|
|
697
|
+
(t for t in targets['targetInfos'] if t['targetId'] == frame_info['frameTargetId']), None
|
|
698
|
+
)
|
|
699
|
+
else:
|
|
700
|
+
iframe_document_target = None
|
|
701
|
+
# if target actually exists in one of the frames, just recursively build the dom tree for it
|
|
702
|
+
if iframe_document_target:
|
|
703
|
+
self.logger.debug(
|
|
704
|
+
f'Getting content document for iframe {node.get("frameId", None)} at depth {iframe_depth + 1}'
|
|
705
|
+
)
|
|
706
|
+
content_document = await self.get_dom_tree(
|
|
707
|
+
target_id=iframe_document_target.get('targetId'),
|
|
708
|
+
# TODO: experiment with this values -> not sure whether the whole cross origin iframe should be ALWAYS included as soon as some part of it is visible or not.
|
|
709
|
+
# Current config: if the cross origin iframe is AT ALL visible, then just include everything inside of it!
|
|
710
|
+
# initial_html_frames=updated_html_frames,
|
|
711
|
+
initial_total_frame_offset=total_frame_offset,
|
|
712
|
+
iframe_depth=iframe_depth + 1,
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
dom_tree_node.content_document = content_document
|
|
716
|
+
dom_tree_node.content_document.parent_node = dom_tree_node
|
|
717
|
+
|
|
718
|
+
return dom_tree_node
|
|
719
|
+
|
|
720
|
+
enhanced_dom_tree_node = await _construct_enhanced_node(dom_tree['root'], initial_html_frames, initial_total_frame_offset)
|
|
721
|
+
|
|
722
|
+
return enhanced_dom_tree_node
|
|
723
|
+
|
|
724
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='get_serialized_dom_tree')
|
|
725
|
+
async def get_serialized_dom_tree(
|
|
726
|
+
self, previous_cached_state: SerializedDOMState | None = None
|
|
727
|
+
) -> tuple[SerializedDOMState, EnhancedDOMTreeNode, dict[str, float]]:
|
|
728
|
+
"""Get the serialized DOM tree representation for LLM consumption.
|
|
729
|
+
|
|
730
|
+
Returns:
|
|
731
|
+
Tuple of (serialized_dom_state, enhanced_dom_tree_root, timing_info)
|
|
732
|
+
"""
|
|
733
|
+
|
|
734
|
+
# Use current target (None means use current)
|
|
735
|
+
assert self.browser_session.current_target_id is not None
|
|
736
|
+
enhanced_dom_tree = await self.get_dom_tree(target_id=self.browser_session.current_target_id)
|
|
737
|
+
|
|
738
|
+
start = time.time()
|
|
739
|
+
serialized_dom_state, serializer_timing = DOMTreeSerializer(
|
|
740
|
+
enhanced_dom_tree, previous_cached_state, paint_order_filtering=self.paint_order_filtering
|
|
741
|
+
).serialize_accessible_elements()
|
|
742
|
+
|
|
743
|
+
end = time.time()
|
|
744
|
+
serialize_total_timing = {'serialize_dom_tree_total': end - start}
|
|
745
|
+
|
|
746
|
+
# Combine all timing info
|
|
747
|
+
all_timing = {**serializer_timing, **serialize_total_timing}
|
|
748
|
+
|
|
749
|
+
return serialized_dom_state, enhanced_dom_tree, all_timing
|
|
750
|
+
|
|
751
|
+
@staticmethod
|
|
752
|
+
def detect_pagination_buttons(selector_map: dict[int, EnhancedDOMTreeNode]) -> list[dict[str, str | int | bool]]:
|
|
753
|
+
"""Detect pagination buttons from the selector map.
|
|
754
|
+
|
|
755
|
+
Args:
|
|
756
|
+
selector_map: Map of element indices to EnhancedDOMTreeNode
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
List of pagination button information dicts with:
|
|
760
|
+
- button_type: 'next', 'prev', 'first', 'last', 'page_number'
|
|
761
|
+
- backend_node_id: Backend node ID for clicking
|
|
762
|
+
- text: Button text/label
|
|
763
|
+
- selector: XPath selector
|
|
764
|
+
- is_disabled: Whether the button appears disabled
|
|
765
|
+
"""
|
|
766
|
+
pagination_buttons: list[dict[str, str | int | bool]] = []
|
|
767
|
+
|
|
768
|
+
# Common pagination patterns to look for
|
|
769
|
+
next_patterns = ['next', '>', '»', '→', 'siguiente', 'suivant', 'weiter', 'volgende']
|
|
770
|
+
prev_patterns = ['prev', 'previous', '<', '«', '←', 'anterior', 'précédent', 'zurück', 'vorige']
|
|
771
|
+
first_patterns = ['first', '⇤', '«', 'primera', 'première', 'erste', 'eerste']
|
|
772
|
+
last_patterns = ['last', '⇥', '»', 'última', 'dernier', 'letzte', 'laatste']
|
|
773
|
+
|
|
774
|
+
for index, node in selector_map.items():
|
|
775
|
+
# Skip non-clickable elements
|
|
776
|
+
if not node.snapshot_node or not node.snapshot_node.is_clickable:
|
|
777
|
+
continue
|
|
778
|
+
|
|
779
|
+
# Get element text and attributes
|
|
780
|
+
text = node.get_all_children_text().lower().strip()
|
|
781
|
+
aria_label = node.attributes.get('aria-label', '').lower()
|
|
782
|
+
title = node.attributes.get('title', '').lower()
|
|
783
|
+
class_name = node.attributes.get('class', '').lower()
|
|
784
|
+
role = node.attributes.get('role', '').lower()
|
|
785
|
+
|
|
786
|
+
# Combine all text sources for pattern matching
|
|
787
|
+
all_text = f'{text} {aria_label} {title} {class_name}'.strip()
|
|
788
|
+
|
|
789
|
+
# Check if it's disabled
|
|
790
|
+
is_disabled = (
|
|
791
|
+
node.attributes.get('disabled') == 'true'
|
|
792
|
+
or node.attributes.get('aria-disabled') == 'true'
|
|
793
|
+
or 'disabled' in class_name
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
button_type: str | None = None
|
|
797
|
+
|
|
798
|
+
# Check for next button
|
|
799
|
+
if any(pattern in all_text for pattern in next_patterns):
|
|
800
|
+
button_type = 'next'
|
|
801
|
+
# Check for previous button
|
|
802
|
+
elif any(pattern in all_text for pattern in prev_patterns):
|
|
803
|
+
button_type = 'prev'
|
|
804
|
+
# Check for first button
|
|
805
|
+
elif any(pattern in all_text for pattern in first_patterns):
|
|
806
|
+
button_type = 'first'
|
|
807
|
+
# Check for last button
|
|
808
|
+
elif any(pattern in all_text for pattern in last_patterns):
|
|
809
|
+
button_type = 'last'
|
|
810
|
+
# Check for numeric page buttons (single or double digit)
|
|
811
|
+
elif text.isdigit() and len(text) <= 2 and role in ['button', 'link', '']:
|
|
812
|
+
button_type = 'page_number'
|
|
813
|
+
|
|
814
|
+
if button_type:
|
|
815
|
+
pagination_buttons.append(
|
|
816
|
+
{
|
|
817
|
+
'button_type': button_type,
|
|
818
|
+
'backend_node_id': index,
|
|
819
|
+
'text': node.get_all_children_text().strip() or aria_label or title,
|
|
820
|
+
'selector': node.xpath,
|
|
821
|
+
'is_disabled': is_disabled,
|
|
822
|
+
}
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
return pagination_buttons
|