optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
browser_use/dom/views.py
ADDED
|
@@ -0,0 +1,906 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from dataclasses import asdict, dataclass, field
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from cdp_use.cdp.accessibility.commands import GetFullAXTreeReturns
|
|
7
|
+
from cdp_use.cdp.accessibility.types import AXPropertyName
|
|
8
|
+
from cdp_use.cdp.dom.commands import GetDocumentReturns
|
|
9
|
+
from cdp_use.cdp.dom.types import ShadowRootType
|
|
10
|
+
from cdp_use.cdp.domsnapshot.commands import CaptureSnapshotReturns
|
|
11
|
+
from cdp_use.cdp.target.types import SessionID, TargetID, TargetInfo
|
|
12
|
+
from uuid_extensions import uuid7str
|
|
13
|
+
|
|
14
|
+
from browser_use.dom.utils import cap_text_length
|
|
15
|
+
from browser_use.observability import observe_debug
|
|
16
|
+
|
|
17
|
+
# Serializer types
|
|
18
|
+
DEFAULT_INCLUDE_ATTRIBUTES = [
|
|
19
|
+
'title',
|
|
20
|
+
'type',
|
|
21
|
+
'checked',
|
|
22
|
+
# 'class',
|
|
23
|
+
'id',
|
|
24
|
+
'name',
|
|
25
|
+
'role',
|
|
26
|
+
'value',
|
|
27
|
+
'placeholder',
|
|
28
|
+
'data-date-format',
|
|
29
|
+
'alt',
|
|
30
|
+
'aria-label',
|
|
31
|
+
'aria-expanded',
|
|
32
|
+
'data-state',
|
|
33
|
+
'aria-checked',
|
|
34
|
+
# ARIA value attributes for datetime/range inputs
|
|
35
|
+
'aria-valuemin',
|
|
36
|
+
'aria-valuemax',
|
|
37
|
+
'aria-valuenow',
|
|
38
|
+
'aria-placeholder',
|
|
39
|
+
# Validation attributes - help agents avoid brute force attempts
|
|
40
|
+
'pattern',
|
|
41
|
+
'min',
|
|
42
|
+
'max',
|
|
43
|
+
'minlength',
|
|
44
|
+
'maxlength',
|
|
45
|
+
'step',
|
|
46
|
+
'accept', # File input types (e.g., accept="image/*" or accept=".pdf")
|
|
47
|
+
'multiple', # Whether multiple files/selections are allowed
|
|
48
|
+
'inputmode', # Virtual keyboard hint (numeric, tel, email, url, etc.)
|
|
49
|
+
'autocomplete', # Autocomplete behavior hint
|
|
50
|
+
'data-mask', # Input mask format (e.g., phone numbers, credit cards)
|
|
51
|
+
'data-inputmask', # Alternative input mask attribute
|
|
52
|
+
'data-datepicker', # jQuery datepicker indicator
|
|
53
|
+
'format', # Synthetic attribute for date/time input format (e.g., MM/dd/yyyy)
|
|
54
|
+
'expected_format', # Synthetic attribute for explicit expected format (e.g., AngularJS datepickers)
|
|
55
|
+
'contenteditable', # Rich text editor detection
|
|
56
|
+
# Webkit shadow DOM identifiers
|
|
57
|
+
'pseudo',
|
|
58
|
+
# Accessibility properties from ax_node (ordered by importance for automation)
|
|
59
|
+
'checked',
|
|
60
|
+
'selected',
|
|
61
|
+
'expanded',
|
|
62
|
+
'pressed',
|
|
63
|
+
'disabled',
|
|
64
|
+
'invalid', # Current validation state from AX node
|
|
65
|
+
'valuemin', # Min value from AX node (for datetime/range)
|
|
66
|
+
'valuemax', # Max value from AX node (for datetime/range)
|
|
67
|
+
'valuenow',
|
|
68
|
+
'keyshortcuts',
|
|
69
|
+
'haspopup',
|
|
70
|
+
'multiselectable',
|
|
71
|
+
# Less commonly needed (uncomment if required):
|
|
72
|
+
# 'readonly',
|
|
73
|
+
'required',
|
|
74
|
+
'valuetext',
|
|
75
|
+
'level',
|
|
76
|
+
'busy',
|
|
77
|
+
'live',
|
|
78
|
+
# Accessibility name (contains text content for StaticText elements)
|
|
79
|
+
'ax_name',
|
|
80
|
+
"data-qtip"
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
STATIC_ATTRIBUTES = {
|
|
84
|
+
'class',
|
|
85
|
+
'id',
|
|
86
|
+
'name',
|
|
87
|
+
'type',
|
|
88
|
+
'placeholder',
|
|
89
|
+
'aria-label',
|
|
90
|
+
'title',
|
|
91
|
+
# 'aria-expanded',
|
|
92
|
+
'role',
|
|
93
|
+
'data-testid',
|
|
94
|
+
'data-test',
|
|
95
|
+
'data-cy',
|
|
96
|
+
'data-selenium',
|
|
97
|
+
'for',
|
|
98
|
+
'required',
|
|
99
|
+
'disabled',
|
|
100
|
+
'readonly',
|
|
101
|
+
'checked',
|
|
102
|
+
'selected',
|
|
103
|
+
'multiple',
|
|
104
|
+
'accept',
|
|
105
|
+
'href',
|
|
106
|
+
'target',
|
|
107
|
+
'rel',
|
|
108
|
+
'aria-describedby',
|
|
109
|
+
'aria-labelledby',
|
|
110
|
+
'aria-controls',
|
|
111
|
+
'aria-owns',
|
|
112
|
+
'aria-live',
|
|
113
|
+
'aria-atomic',
|
|
114
|
+
'aria-busy',
|
|
115
|
+
'aria-disabled',
|
|
116
|
+
'aria-hidden',
|
|
117
|
+
'aria-pressed',
|
|
118
|
+
'aria-checked',
|
|
119
|
+
'aria-selected',
|
|
120
|
+
'tabindex',
|
|
121
|
+
'alt',
|
|
122
|
+
'src',
|
|
123
|
+
'lang',
|
|
124
|
+
'itemscope',
|
|
125
|
+
'itemtype',
|
|
126
|
+
'itemprop',
|
|
127
|
+
# Webkit shadow DOM attributes
|
|
128
|
+
'pseudo',
|
|
129
|
+
'aria-valuemin',
|
|
130
|
+
'aria-valuemax',
|
|
131
|
+
'aria-valuenow',
|
|
132
|
+
'aria-placeholder',
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class CurrentPageTargets:
|
|
138
|
+
page_session: TargetInfo
|
|
139
|
+
iframe_sessions: list[TargetInfo]
|
|
140
|
+
"""
|
|
141
|
+
Iframe sessions are ALL the iframes sessions of all the pages (not just the current page)
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class TargetAllTrees:
|
|
147
|
+
snapshot: CaptureSnapshotReturns
|
|
148
|
+
dom_tree: GetDocumentReturns
|
|
149
|
+
ax_tree: GetFullAXTreeReturns
|
|
150
|
+
device_pixel_ratio: float
|
|
151
|
+
cdp_timing: dict[str, float]
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass(slots=True)
|
|
155
|
+
class PropagatingBounds:
|
|
156
|
+
"""Track bounds that propagate from parent elements to filter children."""
|
|
157
|
+
|
|
158
|
+
tag: str # The tag that started propagation ('a' or 'button')
|
|
159
|
+
bounds: 'DOMRect' # The bounding box
|
|
160
|
+
node_id: int # Node ID for debugging
|
|
161
|
+
depth: int # How deep in tree this started (for debugging)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@dataclass(slots=True)
|
|
165
|
+
class SimplifiedNode:
|
|
166
|
+
"""Simplified tree node for optimization."""
|
|
167
|
+
|
|
168
|
+
original_node: 'EnhancedDOMTreeNode'
|
|
169
|
+
children: list['SimplifiedNode']
|
|
170
|
+
should_display: bool = True
|
|
171
|
+
is_interactive: bool = False # True if element is in selector_map
|
|
172
|
+
|
|
173
|
+
is_new: bool = False
|
|
174
|
+
|
|
175
|
+
ignored_by_paint_order: bool = False # More info in dom/serializer/paint_order.py
|
|
176
|
+
excluded_by_parent: bool = False # New field for bbox filtering
|
|
177
|
+
is_shadow_host: bool = False # New field for shadow DOM hosts
|
|
178
|
+
is_compound_component: bool = False # True for virtual components of compound controls
|
|
179
|
+
|
|
180
|
+
def _clean_original_node_json(self, node_json: dict) -> dict:
|
|
181
|
+
"""Recursively remove children_nodes and shadow_roots from original_node JSON."""
|
|
182
|
+
# Remove the fields we don't want in SimplifiedNode serialization
|
|
183
|
+
if 'children_nodes' in node_json:
|
|
184
|
+
del node_json['children_nodes']
|
|
185
|
+
if 'shadow_roots' in node_json:
|
|
186
|
+
del node_json['shadow_roots']
|
|
187
|
+
|
|
188
|
+
# Clean nested content_document if it exists
|
|
189
|
+
if node_json.get('content_document'):
|
|
190
|
+
node_json['content_document'] = self._clean_original_node_json(node_json['content_document'])
|
|
191
|
+
|
|
192
|
+
return node_json
|
|
193
|
+
|
|
194
|
+
def __json__(self) -> dict:
|
|
195
|
+
original_node_json = self.original_node.__json__()
|
|
196
|
+
# Remove children_nodes and shadow_roots to avoid duplication with SimplifiedNode.children
|
|
197
|
+
cleaned_original_node_json = self._clean_original_node_json(original_node_json)
|
|
198
|
+
return {
|
|
199
|
+
'should_display': self.should_display,
|
|
200
|
+
'is_interactive': self.is_interactive,
|
|
201
|
+
'ignored_by_paint_order': self.ignored_by_paint_order,
|
|
202
|
+
'excluded_by_parent': self.excluded_by_parent,
|
|
203
|
+
'original_node': cleaned_original_node_json,
|
|
204
|
+
'children': [c.__json__() for c in self.children],
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class NodeType(int, Enum):
|
|
209
|
+
"""DOM node types based on the DOM specification."""
|
|
210
|
+
|
|
211
|
+
ELEMENT_NODE = 1
|
|
212
|
+
ATTRIBUTE_NODE = 2
|
|
213
|
+
TEXT_NODE = 3
|
|
214
|
+
CDATA_SECTION_NODE = 4
|
|
215
|
+
ENTITY_REFERENCE_NODE = 5
|
|
216
|
+
ENTITY_NODE = 6
|
|
217
|
+
PROCESSING_INSTRUCTION_NODE = 7
|
|
218
|
+
COMMENT_NODE = 8
|
|
219
|
+
DOCUMENT_NODE = 9
|
|
220
|
+
DOCUMENT_TYPE_NODE = 10
|
|
221
|
+
DOCUMENT_FRAGMENT_NODE = 11
|
|
222
|
+
NOTATION_NODE = 12
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@dataclass(slots=True)
|
|
226
|
+
class DOMRect:
|
|
227
|
+
x: float
|
|
228
|
+
y: float
|
|
229
|
+
width: float
|
|
230
|
+
height: float
|
|
231
|
+
|
|
232
|
+
def to_dict(self) -> dict[str, Any]:
|
|
233
|
+
return {
|
|
234
|
+
'x': self.x,
|
|
235
|
+
'y': self.y,
|
|
236
|
+
'width': self.width,
|
|
237
|
+
'height': self.height,
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
def __json__(self) -> dict:
|
|
241
|
+
return self.to_dict()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@dataclass(slots=True)
|
|
245
|
+
class EnhancedAXProperty:
|
|
246
|
+
"""we don't need `sources` and `related_nodes` for now (not sure how to use them)
|
|
247
|
+
|
|
248
|
+
TODO: there is probably some way to determine whether it has a value or related nodes or not, but for now it's kinda fine idk
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
name: AXPropertyName
|
|
252
|
+
value: str | bool | None
|
|
253
|
+
# related_nodes: list[EnhancedAXRelatedNode] | None
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@dataclass(slots=True)
|
|
257
|
+
class EnhancedAXNode:
|
|
258
|
+
ax_node_id: str
|
|
259
|
+
"""Not to be confused the DOM node_id. Only useful for AX node tree"""
|
|
260
|
+
ignored: bool
|
|
261
|
+
# we don't need ignored_reasons as we anyway ignore the node otherwise
|
|
262
|
+
role: str | None
|
|
263
|
+
name: str | None
|
|
264
|
+
description: str | None
|
|
265
|
+
|
|
266
|
+
properties: list[EnhancedAXProperty] | None
|
|
267
|
+
child_ids: list[str] | None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@dataclass(slots=True)
|
|
271
|
+
class EnhancedSnapshotNode:
|
|
272
|
+
"""Snapshot data extracted from DOMSnapshot for enhanced functionality."""
|
|
273
|
+
|
|
274
|
+
is_clickable: bool | None
|
|
275
|
+
cursor_style: str | None
|
|
276
|
+
bounds: DOMRect | None
|
|
277
|
+
"""
|
|
278
|
+
Document coordinates (origin = top-left of the page, ignores current scroll).
|
|
279
|
+
Equivalent JS API: layoutNode.boundingBox in the older API.
|
|
280
|
+
Typical use: Quick hit-test that doesn't care about scroll position.
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
clientRects: DOMRect | None
|
|
284
|
+
"""
|
|
285
|
+
Viewport coordinates (origin = top-left of the visible scrollport).
|
|
286
|
+
Equivalent JS API: element.getClientRects() / getBoundingClientRect().
|
|
287
|
+
Typical use: Pixel-perfect hit-testing on screen, taking current scroll into account.
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
scrollRects: DOMRect | None
|
|
291
|
+
"""
|
|
292
|
+
Scrollable area of the element.
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
computed_styles: dict[str, str] | None
|
|
296
|
+
"""Computed styles from the layout tree"""
|
|
297
|
+
paint_order: int | None
|
|
298
|
+
"""Paint order from the layout tree"""
|
|
299
|
+
stacking_contexts: int | None
|
|
300
|
+
"""Stacking contexts from the layout tree"""
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# @dataclass(slots=True)
|
|
304
|
+
# class SuperSelector:
|
|
305
|
+
# node_id: int
|
|
306
|
+
# backend_node_id: int
|
|
307
|
+
# frame_id: str | None
|
|
308
|
+
# target_id: TargetID
|
|
309
|
+
|
|
310
|
+
# node_type: NodeType
|
|
311
|
+
# node_name: str
|
|
312
|
+
|
|
313
|
+
# # is_visible: bool | None
|
|
314
|
+
# # is_scrollable: bool | None
|
|
315
|
+
|
|
316
|
+
# element_index: int | None
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@dataclass(slots=True)
|
|
320
|
+
class EnhancedDOMTreeNode:
|
|
321
|
+
"""
|
|
322
|
+
Enhanced DOM tree node that contains information from AX, DOM, and Snapshot trees. It's mostly based on the types on DOM node type with enhanced data from AX and Snapshot trees.
|
|
323
|
+
|
|
324
|
+
@dev when serializing check if the value is a valid value first!
|
|
325
|
+
|
|
326
|
+
Learn more about the fields:
|
|
327
|
+
- (DOM node) https://chromedevtools.github.io/devtools-protocol/tot/DOM/#type-BackendNode
|
|
328
|
+
- (AX node) https://chromedevtools.github.io/devtools-protocol/tot/Accessibility/#type-AXNode
|
|
329
|
+
- (Snapshot node) https://chromedevtools.github.io/devtools-protocol/tot/DOMSnapshot/#type-DOMNode
|
|
330
|
+
"""
|
|
331
|
+
|
|
332
|
+
# region - DOM Node data
|
|
333
|
+
|
|
334
|
+
node_id: int
|
|
335
|
+
backend_node_id: int
|
|
336
|
+
|
|
337
|
+
node_type: NodeType
|
|
338
|
+
"""Node types, defined in `NodeType` enum."""
|
|
339
|
+
node_name: str
|
|
340
|
+
"""Only applicable for `NodeType.ELEMENT_NODE`"""
|
|
341
|
+
node_value: str
|
|
342
|
+
"""this is where the value from `NodeType.TEXT_NODE` is stored usually"""
|
|
343
|
+
attributes: dict[str, str]
|
|
344
|
+
"""slightly changed from the original attributes to be more readable"""
|
|
345
|
+
is_scrollable: bool | None
|
|
346
|
+
"""
|
|
347
|
+
Whether the node is scrollable.
|
|
348
|
+
"""
|
|
349
|
+
is_visible: bool | None
|
|
350
|
+
"""
|
|
351
|
+
Whether the node is visible according to the upper most frame node.
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
absolute_position: DOMRect | None
|
|
355
|
+
"""
|
|
356
|
+
Absolute position of the node in the document according to the top-left of the page.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
# frames
|
|
360
|
+
target_id: TargetID
|
|
361
|
+
frame_id: str | None
|
|
362
|
+
session_id: SessionID | None
|
|
363
|
+
content_document: 'EnhancedDOMTreeNode | None'
|
|
364
|
+
"""
|
|
365
|
+
Content document is the document inside a new iframe.
|
|
366
|
+
"""
|
|
367
|
+
# Shadow DOM
|
|
368
|
+
shadow_root_type: ShadowRootType | None
|
|
369
|
+
shadow_roots: list['EnhancedDOMTreeNode'] | None
|
|
370
|
+
"""
|
|
371
|
+
Shadow roots are the shadow DOMs of the element.
|
|
372
|
+
"""
|
|
373
|
+
|
|
374
|
+
# Navigation
|
|
375
|
+
parent_node: 'EnhancedDOMTreeNode | None'
|
|
376
|
+
children_nodes: list['EnhancedDOMTreeNode'] | None
|
|
377
|
+
|
|
378
|
+
# endregion - DOM Node data
|
|
379
|
+
|
|
380
|
+
# region - AX Node data
|
|
381
|
+
ax_node: EnhancedAXNode | None
|
|
382
|
+
|
|
383
|
+
# endregion - AX Node data
|
|
384
|
+
|
|
385
|
+
# region - Snapshot Node data
|
|
386
|
+
snapshot_node: EnhancedSnapshotNode | None
|
|
387
|
+
|
|
388
|
+
# endregion - Snapshot Node data
|
|
389
|
+
|
|
390
|
+
# Compound control child components information
|
|
391
|
+
_compound_children: list[dict[str, Any]] = field(default_factory=list)
|
|
392
|
+
|
|
393
|
+
uuid: str = field(default_factory=uuid7str)
|
|
394
|
+
|
|
395
|
+
@property
|
|
396
|
+
def parent(self) -> 'EnhancedDOMTreeNode | None':
|
|
397
|
+
return self.parent_node
|
|
398
|
+
|
|
399
|
+
@property
|
|
400
|
+
def children(self) -> list['EnhancedDOMTreeNode']:
|
|
401
|
+
return self.children_nodes or []
|
|
402
|
+
|
|
403
|
+
@property
|
|
404
|
+
def children_and_shadow_roots(self) -> list['EnhancedDOMTreeNode']:
|
|
405
|
+
"""
|
|
406
|
+
Returns all children nodes, including shadow roots
|
|
407
|
+
"""
|
|
408
|
+
# IMPORTANT: Make a copy to avoid mutating the original children_nodes list!
|
|
409
|
+
children = list(self.children_nodes) if self.children_nodes else []
|
|
410
|
+
if self.shadow_roots:
|
|
411
|
+
children.extend(self.shadow_roots)
|
|
412
|
+
return children
|
|
413
|
+
|
|
414
|
+
@property
|
|
415
|
+
def tag_name(self) -> str:
|
|
416
|
+
return self.node_name.lower()
|
|
417
|
+
|
|
418
|
+
@property
|
|
419
|
+
def xpath(self) -> str:
|
|
420
|
+
"""Generate XPath for this DOM node, stopping at shadow boundaries or iframes."""
|
|
421
|
+
segments = []
|
|
422
|
+
current_element = self
|
|
423
|
+
|
|
424
|
+
while current_element and (
|
|
425
|
+
current_element.node_type == NodeType.ELEMENT_NODE or current_element.node_type == NodeType.DOCUMENT_FRAGMENT_NODE
|
|
426
|
+
):
|
|
427
|
+
# just pass through shadow roots
|
|
428
|
+
if current_element.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
|
|
429
|
+
current_element = current_element.parent_node
|
|
430
|
+
continue
|
|
431
|
+
|
|
432
|
+
# stop ONLY if we hit iframe
|
|
433
|
+
if current_element.parent_node and current_element.parent_node.node_name.lower() == 'iframe':
|
|
434
|
+
break
|
|
435
|
+
|
|
436
|
+
position = self._get_element_position(current_element)
|
|
437
|
+
tag_name = current_element.node_name.lower()
|
|
438
|
+
xpath_index = f'[{position}]' if position > 0 else ''
|
|
439
|
+
segments.insert(0, f'{tag_name}{xpath_index}')
|
|
440
|
+
|
|
441
|
+
current_element = current_element.parent_node
|
|
442
|
+
|
|
443
|
+
return '/'.join(segments)
|
|
444
|
+
|
|
445
|
+
def _get_element_position(self, element: 'EnhancedDOMTreeNode') -> int:
|
|
446
|
+
"""Get the position of an element among its siblings with the same tag name.
|
|
447
|
+
Returns 0 if it's the only element of its type, otherwise returns 1-based index."""
|
|
448
|
+
if not element.parent_node or not element.parent_node.children_nodes:
|
|
449
|
+
return 0
|
|
450
|
+
|
|
451
|
+
same_tag_siblings = [
|
|
452
|
+
child
|
|
453
|
+
for child in element.parent_node.children_nodes
|
|
454
|
+
if child.node_type == NodeType.ELEMENT_NODE and child.node_name.lower() == element.node_name.lower()
|
|
455
|
+
]
|
|
456
|
+
|
|
457
|
+
if len(same_tag_siblings) <= 1:
|
|
458
|
+
return 0 # No index needed if it's the only one
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
# XPath is 1-indexed
|
|
462
|
+
position = same_tag_siblings.index(element) + 1
|
|
463
|
+
return position
|
|
464
|
+
except ValueError:
|
|
465
|
+
return 0
|
|
466
|
+
|
|
467
|
+
def __json__(self) -> dict:
|
|
468
|
+
"""Serializes the node and its descendants to a dictionary, omitting parent references."""
|
|
469
|
+
return {
|
|
470
|
+
'node_id': self.node_id,
|
|
471
|
+
'backend_node_id': self.backend_node_id,
|
|
472
|
+
'node_type': self.node_type.name,
|
|
473
|
+
'node_name': self.node_name,
|
|
474
|
+
'node_value': self.node_value,
|
|
475
|
+
'is_visible': self.is_visible,
|
|
476
|
+
'attributes': self.attributes,
|
|
477
|
+
'is_scrollable': self.is_scrollable,
|
|
478
|
+
'session_id': self.session_id,
|
|
479
|
+
'target_id': self.target_id,
|
|
480
|
+
'frame_id': self.frame_id,
|
|
481
|
+
'content_document': self.content_document.__json__() if self.content_document else None,
|
|
482
|
+
'shadow_root_type': self.shadow_root_type,
|
|
483
|
+
'ax_node': asdict(self.ax_node) if self.ax_node else None,
|
|
484
|
+
'snapshot_node': asdict(self.snapshot_node) if self.snapshot_node else None,
|
|
485
|
+
# these two in the end, so it's easier to read json
|
|
486
|
+
'shadow_roots': [r.__json__() for r in self.shadow_roots] if self.shadow_roots else [],
|
|
487
|
+
'children_nodes': [c.__json__() for c in self.children_nodes] if self.children_nodes else [],
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
def get_all_children_text(self, max_depth: int = -1) -> str:
|
|
491
|
+
text_parts = []
|
|
492
|
+
|
|
493
|
+
def collect_text(node: EnhancedDOMTreeNode, current_depth: int) -> None:
|
|
494
|
+
if max_depth != -1 and current_depth > max_depth:
|
|
495
|
+
return
|
|
496
|
+
|
|
497
|
+
# Skip this branch if we hit a highlighted element (except for the current node)
|
|
498
|
+
# TODO: think whether if makese sense to add text until the next clickable element or everything from children
|
|
499
|
+
# if node.node_type == NodeType.ELEMENT_NODE
|
|
500
|
+
# if isinstance(node, DOMElementNode) and node != self and node.highlight_index is not None:
|
|
501
|
+
# return
|
|
502
|
+
|
|
503
|
+
if node.node_type == NodeType.TEXT_NODE:
|
|
504
|
+
text_parts.append(node.node_value)
|
|
505
|
+
elif node.node_type == NodeType.ELEMENT_NODE:
|
|
506
|
+
for child in node.children:
|
|
507
|
+
collect_text(child, current_depth + 1)
|
|
508
|
+
|
|
509
|
+
collect_text(self, 0)
|
|
510
|
+
return '\n'.join(text_parts).strip()
|
|
511
|
+
|
|
512
|
+
def __repr__(self) -> str:
|
|
513
|
+
"""
|
|
514
|
+
@DEV ! don't display this to the LLM, it's SUPER long
|
|
515
|
+
"""
|
|
516
|
+
attributes = ', '.join([f'{k}={v}' for k, v in self.attributes.items()])
|
|
517
|
+
is_scrollable = getattr(self, 'is_scrollable', False)
|
|
518
|
+
num_children = len(self.children_nodes or [])
|
|
519
|
+
return (
|
|
520
|
+
f'<{self.tag_name} {attributes} is_scrollable={is_scrollable} '
|
|
521
|
+
f'num_children={num_children} >{self.node_value}</{self.tag_name}>'
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
def llm_representation(self, max_text_length: int = 100) -> str:
|
|
525
|
+
"""
|
|
526
|
+
Token friendly representation of the node, used in the LLM
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
return f'<{self.tag_name}>{cap_text_length(self.get_all_children_text(), max_text_length) or ""}'
|
|
530
|
+
|
|
531
|
+
def get_meaningful_text_for_llm(self) -> str:
|
|
532
|
+
"""
|
|
533
|
+
Get the meaningful text content that the LLM actually sees for this element.
|
|
534
|
+
This matches exactly what goes into the DOMTreeSerializer output.
|
|
535
|
+
"""
|
|
536
|
+
meaningful_text = ''
|
|
537
|
+
if hasattr(self, 'attributes') and self.attributes:
|
|
538
|
+
# Priority order: value, aria-label, title, placeholder, alt, text content
|
|
539
|
+
for attr in ['value', 'aria-label', 'title', 'placeholder', 'alt']:
|
|
540
|
+
if attr in self.attributes and self.attributes[attr]:
|
|
541
|
+
meaningful_text = self.attributes[attr]
|
|
542
|
+
break
|
|
543
|
+
|
|
544
|
+
# Fallback to text content if no meaningful attributes
|
|
545
|
+
if not meaningful_text:
|
|
546
|
+
meaningful_text = self.get_all_children_text()
|
|
547
|
+
|
|
548
|
+
return meaningful_text.strip()
|
|
549
|
+
|
|
550
|
+
@property
|
|
551
|
+
def is_actually_scrollable(self) -> bool:
|
|
552
|
+
"""
|
|
553
|
+
Enhanced scroll detection that combines CDP detection with CSS analysis.
|
|
554
|
+
|
|
555
|
+
This detects scrollable elements that Chrome's CDP might miss, which is common
|
|
556
|
+
in iframes and dynamically sized containers.
|
|
557
|
+
"""
|
|
558
|
+
# First check if CDP already detected it as scrollable
|
|
559
|
+
if self.is_scrollable:
|
|
560
|
+
return True
|
|
561
|
+
|
|
562
|
+
# Enhanced detection for elements CDP missed
|
|
563
|
+
if not self.snapshot_node:
|
|
564
|
+
return False
|
|
565
|
+
|
|
566
|
+
# Check scroll vs client rects - this is the most reliable indicator
|
|
567
|
+
scroll_rects = self.snapshot_node.scrollRects
|
|
568
|
+
client_rects = self.snapshot_node.clientRects
|
|
569
|
+
|
|
570
|
+
if scroll_rects and client_rects:
|
|
571
|
+
# Content is larger than visible area = scrollable
|
|
572
|
+
has_vertical_scroll = scroll_rects.height > client_rects.height + 1 # +1 for rounding
|
|
573
|
+
has_horizontal_scroll = scroll_rects.width > client_rects.width + 1
|
|
574
|
+
|
|
575
|
+
if has_vertical_scroll or has_horizontal_scroll:
|
|
576
|
+
# Also check CSS to make sure scrolling is allowed
|
|
577
|
+
if self.snapshot_node.computed_styles:
|
|
578
|
+
styles = self.snapshot_node.computed_styles
|
|
579
|
+
|
|
580
|
+
overflow = styles.get('overflow', 'visible').lower()
|
|
581
|
+
overflow_x = styles.get('overflow-x', overflow).lower()
|
|
582
|
+
overflow_y = styles.get('overflow-y', overflow).lower()
|
|
583
|
+
|
|
584
|
+
# Only allow scrolling if overflow is explicitly set to auto, scroll, or overlay
|
|
585
|
+
# Do NOT consider 'visible' overflow as scrollable - this was causing the issue
|
|
586
|
+
allows_scroll = (
|
|
587
|
+
overflow in ['auto', 'scroll', 'overlay']
|
|
588
|
+
or overflow_x in ['auto', 'scroll', 'overlay']
|
|
589
|
+
or overflow_y in ['auto', 'scroll', 'overlay']
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
return allows_scroll
|
|
593
|
+
else:
|
|
594
|
+
# No CSS info, but content overflows - be more conservative
|
|
595
|
+
# Only consider it scrollable if it's a common scrollable container element
|
|
596
|
+
scrollable_tags = {'div', 'main', 'section', 'article', 'aside', 'body', 'html'}
|
|
597
|
+
return self.tag_name.lower() in scrollable_tags
|
|
598
|
+
|
|
599
|
+
return False
|
|
600
|
+
|
|
601
|
+
@property
|
|
602
|
+
def should_show_scroll_info(self) -> bool:
|
|
603
|
+
"""
|
|
604
|
+
Simple check: show scroll info only if this element is scrollable
|
|
605
|
+
and doesn't have a scrollable parent (to avoid nested scroll spam).
|
|
606
|
+
|
|
607
|
+
Special case for iframes: Always show scroll info since Chrome might not
|
|
608
|
+
always detect iframe scrollability correctly (scrollHeight: 0 issue).
|
|
609
|
+
"""
|
|
610
|
+
# Special case: Always show scroll info for iframe elements
|
|
611
|
+
# Even if not detected as scrollable, they might have scrollable content
|
|
612
|
+
if self.tag_name.lower() == 'iframe':
|
|
613
|
+
return True
|
|
614
|
+
|
|
615
|
+
# Must be scrollable first for non-iframe elements
|
|
616
|
+
if not (self.is_scrollable or self.is_actually_scrollable):
|
|
617
|
+
return False
|
|
618
|
+
|
|
619
|
+
# Always show for iframe content documents (body/html)
|
|
620
|
+
if self.tag_name.lower() in {'body', 'html'}:
|
|
621
|
+
return True
|
|
622
|
+
|
|
623
|
+
# Don't show if parent is already scrollable (avoid nested spam)
|
|
624
|
+
if self.parent_node and (self.parent_node.is_scrollable or self.parent_node.is_actually_scrollable):
|
|
625
|
+
return False
|
|
626
|
+
|
|
627
|
+
return True
|
|
628
|
+
|
|
629
|
+
def _find_html_in_content_document(self) -> 'EnhancedDOMTreeNode | None':
|
|
630
|
+
"""Find HTML element in iframe content document."""
|
|
631
|
+
if not self.content_document:
|
|
632
|
+
return None
|
|
633
|
+
|
|
634
|
+
# Check if content document itself is HTML
|
|
635
|
+
if self.content_document.tag_name.lower() == 'html':
|
|
636
|
+
return self.content_document
|
|
637
|
+
|
|
638
|
+
# Look through children for HTML element
|
|
639
|
+
if self.content_document.children_nodes:
|
|
640
|
+
for child in self.content_document.children_nodes:
|
|
641
|
+
if child.tag_name.lower() == 'html':
|
|
642
|
+
return child
|
|
643
|
+
|
|
644
|
+
return None
|
|
645
|
+
|
|
646
|
+
@property
|
|
647
|
+
def scroll_info(self) -> dict[str, Any] | None:
|
|
648
|
+
"""Calculate scroll information for this element if it's scrollable."""
|
|
649
|
+
if not self.is_actually_scrollable or not self.snapshot_node:
|
|
650
|
+
return None
|
|
651
|
+
|
|
652
|
+
# Get scroll and client rects from snapshot data
|
|
653
|
+
scroll_rects = self.snapshot_node.scrollRects
|
|
654
|
+
client_rects = self.snapshot_node.clientRects
|
|
655
|
+
bounds = self.snapshot_node.bounds
|
|
656
|
+
|
|
657
|
+
if not scroll_rects or not client_rects:
|
|
658
|
+
return None
|
|
659
|
+
|
|
660
|
+
# Calculate scroll position and percentages
|
|
661
|
+
scroll_top = scroll_rects.y
|
|
662
|
+
scroll_left = scroll_rects.x
|
|
663
|
+
|
|
664
|
+
# Total scrollable height and width
|
|
665
|
+
scrollable_height = scroll_rects.height
|
|
666
|
+
scrollable_width = scroll_rects.width
|
|
667
|
+
|
|
668
|
+
# Visible (client) dimensions
|
|
669
|
+
visible_height = client_rects.height
|
|
670
|
+
visible_width = client_rects.width
|
|
671
|
+
|
|
672
|
+
# Calculate how much content is above/below/left/right of current view
|
|
673
|
+
content_above = max(0, scroll_top)
|
|
674
|
+
content_below = max(0, scrollable_height - visible_height - scroll_top)
|
|
675
|
+
content_left = max(0, scroll_left)
|
|
676
|
+
content_right = max(0, scrollable_width - visible_width - scroll_left)
|
|
677
|
+
|
|
678
|
+
# Calculate scroll percentages
|
|
679
|
+
vertical_scroll_percentage = 0
|
|
680
|
+
horizontal_scroll_percentage = 0
|
|
681
|
+
|
|
682
|
+
if scrollable_height > visible_height:
|
|
683
|
+
max_scroll_top = scrollable_height - visible_height
|
|
684
|
+
vertical_scroll_percentage = (scroll_top / max_scroll_top) * 100 if max_scroll_top > 0 else 0
|
|
685
|
+
|
|
686
|
+
if scrollable_width > visible_width:
|
|
687
|
+
max_scroll_left = scrollable_width - visible_width
|
|
688
|
+
horizontal_scroll_percentage = (scroll_left / max_scroll_left) * 100 if max_scroll_left > 0 else 0
|
|
689
|
+
|
|
690
|
+
# Calculate pages equivalent (using visible height as page unit)
|
|
691
|
+
pages_above = content_above / visible_height if visible_height > 0 else 0
|
|
692
|
+
pages_below = content_below / visible_height if visible_height > 0 else 0
|
|
693
|
+
total_pages = scrollable_height / visible_height if visible_height > 0 else 1
|
|
694
|
+
|
|
695
|
+
return {
|
|
696
|
+
'scroll_top': scroll_top,
|
|
697
|
+
'scroll_left': scroll_left,
|
|
698
|
+
'scrollable_height': scrollable_height,
|
|
699
|
+
'scrollable_width': scrollable_width,
|
|
700
|
+
'visible_height': visible_height,
|
|
701
|
+
'visible_width': visible_width,
|
|
702
|
+
'content_above': content_above,
|
|
703
|
+
'content_below': content_below,
|
|
704
|
+
'content_left': content_left,
|
|
705
|
+
'content_right': content_right,
|
|
706
|
+
'vertical_scroll_percentage': round(vertical_scroll_percentage, 1),
|
|
707
|
+
'horizontal_scroll_percentage': round(horizontal_scroll_percentage, 1),
|
|
708
|
+
'pages_above': round(pages_above, 1),
|
|
709
|
+
'pages_below': round(pages_below, 1),
|
|
710
|
+
'total_pages': round(total_pages, 1),
|
|
711
|
+
'can_scroll_up': content_above > 0,
|
|
712
|
+
'can_scroll_down': content_below > 0,
|
|
713
|
+
'can_scroll_left': content_left > 0,
|
|
714
|
+
'can_scroll_right': content_right > 0,
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
def get_scroll_info_text(self) -> str:
|
|
718
|
+
"""Get human-readable scroll information text for this element."""
|
|
719
|
+
# Special case for iframes: check content document for scroll info
|
|
720
|
+
if self.tag_name.lower() == 'iframe':
|
|
721
|
+
# Try to get scroll info from the HTML document inside the iframe
|
|
722
|
+
if self.content_document:
|
|
723
|
+
# Look for HTML element in content document
|
|
724
|
+
html_element = self._find_html_in_content_document()
|
|
725
|
+
if html_element and html_element.scroll_info:
|
|
726
|
+
info = html_element.scroll_info
|
|
727
|
+
# Provide minimal but useful scroll info
|
|
728
|
+
pages_below = info.get('pages_below', 0)
|
|
729
|
+
pages_above = info.get('pages_above', 0)
|
|
730
|
+
v_pct = int(info.get('vertical_scroll_percentage', 0))
|
|
731
|
+
|
|
732
|
+
if pages_below > 0 or pages_above > 0:
|
|
733
|
+
return f'scroll: {pages_above:.1f}↑ {pages_below:.1f}↓ {v_pct}%'
|
|
734
|
+
|
|
735
|
+
return 'scroll'
|
|
736
|
+
|
|
737
|
+
scroll_info = self.scroll_info
|
|
738
|
+
if not scroll_info:
|
|
739
|
+
return ''
|
|
740
|
+
|
|
741
|
+
parts = []
|
|
742
|
+
|
|
743
|
+
# Vertical scroll info (concise format)
|
|
744
|
+
if scroll_info['scrollable_height'] > scroll_info['visible_height']:
|
|
745
|
+
parts.append(f'{scroll_info["pages_above"]:.1f} pages above, {scroll_info["pages_below"]:.1f} pages below')
|
|
746
|
+
|
|
747
|
+
# Horizontal scroll info (concise format)
|
|
748
|
+
if scroll_info['scrollable_width'] > scroll_info['visible_width']:
|
|
749
|
+
parts.append(f'horizontal {scroll_info["horizontal_scroll_percentage"]:.0f}%')
|
|
750
|
+
|
|
751
|
+
return ' '.join(parts)
|
|
752
|
+
|
|
753
|
+
@property
|
|
754
|
+
def element_hash(self) -> int:
|
|
755
|
+
return hash(self)
|
|
756
|
+
|
|
757
|
+
def __str__(self) -> str:
|
|
758
|
+
return f'[<{self.tag_name}>#{self.frame_id[-4:] if self.frame_id else "?"}:{self.backend_node_id}]'
|
|
759
|
+
|
|
760
|
+
def __hash__(self) -> int:
|
|
761
|
+
"""
|
|
762
|
+
Hash the element based on its parent branch path and attributes.
|
|
763
|
+
|
|
764
|
+
TODO: migrate this to use only backendNodeId + current SessionId
|
|
765
|
+
"""
|
|
766
|
+
|
|
767
|
+
# Get parent branch path
|
|
768
|
+
parent_branch_path = self._get_parent_branch_path()
|
|
769
|
+
parent_branch_path_string = '/'.join(parent_branch_path)
|
|
770
|
+
|
|
771
|
+
attributes_string = ''.join(
|
|
772
|
+
f'{k}={v}' for k, v in sorted((k, v) for k, v in self.attributes.items() if k in STATIC_ATTRIBUTES)
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
# Combine both for final hash
|
|
776
|
+
combined_string = f'{parent_branch_path_string}|{attributes_string}'
|
|
777
|
+
element_hash = hashlib.sha256(combined_string.encode()).hexdigest()
|
|
778
|
+
|
|
779
|
+
# Convert to int for __hash__ return type - use first 16 chars and convert from hex to int
|
|
780
|
+
return int(element_hash[:16], 16)
|
|
781
|
+
|
|
782
|
+
def parent_branch_hash(self) -> int:
|
|
783
|
+
"""
|
|
784
|
+
Hash the element based on its parent branch path and attributes.
|
|
785
|
+
"""
|
|
786
|
+
parent_branch_path = self._get_parent_branch_path()
|
|
787
|
+
parent_branch_path_string = '/'.join(parent_branch_path)
|
|
788
|
+
element_hash = hashlib.sha256(parent_branch_path_string.encode()).hexdigest()
|
|
789
|
+
|
|
790
|
+
return int(element_hash[:16], 16)
|
|
791
|
+
|
|
792
|
+
def _get_parent_branch_path(self) -> list[str]:
|
|
793
|
+
"""Get the parent branch path as a list of tag names from root to current element."""
|
|
794
|
+
parents: list['EnhancedDOMTreeNode'] = []
|
|
795
|
+
current_element: 'EnhancedDOMTreeNode | None' = self
|
|
796
|
+
|
|
797
|
+
while current_element is not None:
|
|
798
|
+
if current_element.node_type == NodeType.ELEMENT_NODE:
|
|
799
|
+
parents.append(current_element)
|
|
800
|
+
current_element = current_element.parent_node
|
|
801
|
+
|
|
802
|
+
parents.reverse()
|
|
803
|
+
return [parent.tag_name for parent in parents]
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
DOMSelectorMap = dict[int, EnhancedDOMTreeNode]
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
@dataclass
|
|
810
|
+
class SerializedDOMState:
|
|
811
|
+
_root: SimplifiedNode | None
|
|
812
|
+
"""Not meant to be used directly, use `llm_representation` instead"""
|
|
813
|
+
|
|
814
|
+
selector_map: DOMSelectorMap
|
|
815
|
+
|
|
816
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='llm_representation')
|
|
817
|
+
def llm_representation(
|
|
818
|
+
self,
|
|
819
|
+
include_attributes: list[str] | None = None,
|
|
820
|
+
) -> str:
|
|
821
|
+
"""Kinda ugly, but leaving this as an internal method because include_attributes are a parameter on the agent, so we need to leave it as a 2 step process"""
|
|
822
|
+
from browser_use.dom.serializer.serializer import DOMTreeSerializer
|
|
823
|
+
|
|
824
|
+
if not self._root:
|
|
825
|
+
return 'Empty DOM tree (you might have to wait for the page to load)'
|
|
826
|
+
|
|
827
|
+
include_attributes = include_attributes or DEFAULT_INCLUDE_ATTRIBUTES
|
|
828
|
+
|
|
829
|
+
return DOMTreeSerializer.serialize_tree(self._root, include_attributes)
|
|
830
|
+
|
|
831
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='eval_representation')
|
|
832
|
+
def eval_representation(
|
|
833
|
+
self,
|
|
834
|
+
include_attributes: list[str] | None = None,
|
|
835
|
+
) -> str:
|
|
836
|
+
"""
|
|
837
|
+
Evaluation-focused DOM representation without interactive indexes.
|
|
838
|
+
|
|
839
|
+
This serializer is designed for evaluation/judge contexts where:
|
|
840
|
+
- No interactive indexes are needed (we're not clicking)
|
|
841
|
+
- Full HTML structure should be preserved for context
|
|
842
|
+
- More attribute information is helpful
|
|
843
|
+
- Text content is important for understanding page structure
|
|
844
|
+
"""
|
|
845
|
+
from browser_use.dom.serializer.eval_serializer import DOMEvalSerializer
|
|
846
|
+
|
|
847
|
+
if not self._root:
|
|
848
|
+
return 'Empty DOM tree (you might have to wait for the page to load)'
|
|
849
|
+
|
|
850
|
+
include_attributes = include_attributes or DEFAULT_INCLUDE_ATTRIBUTES
|
|
851
|
+
|
|
852
|
+
return DOMEvalSerializer.serialize_tree(self._root, include_attributes)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
@dataclass
|
|
856
|
+
class DOMInteractedElement:
|
|
857
|
+
"""
|
|
858
|
+
DOMInteractedElement is a class that represents a DOM element that has been interacted with.
|
|
859
|
+
It is used to store the DOM element that has been interacted with and to store the DOM element that has been interacted with.
|
|
860
|
+
|
|
861
|
+
TODO: this is a bit of a hack, we should probably have a better way to do this
|
|
862
|
+
"""
|
|
863
|
+
|
|
864
|
+
node_id: int
|
|
865
|
+
backend_node_id: int
|
|
866
|
+
frame_id: str | None
|
|
867
|
+
|
|
868
|
+
node_type: NodeType
|
|
869
|
+
node_value: str
|
|
870
|
+
node_name: str
|
|
871
|
+
attributes: dict[str, str] | None
|
|
872
|
+
|
|
873
|
+
bounds: DOMRect | None
|
|
874
|
+
|
|
875
|
+
x_path: str
|
|
876
|
+
|
|
877
|
+
element_hash: int
|
|
878
|
+
|
|
879
|
+
def to_dict(self) -> dict[str, Any]:
|
|
880
|
+
return {
|
|
881
|
+
'node_id': self.node_id,
|
|
882
|
+
'backend_node_id': self.backend_node_id,
|
|
883
|
+
'frame_id': self.frame_id,
|
|
884
|
+
'node_type': self.node_type.value,
|
|
885
|
+
'node_value': self.node_value,
|
|
886
|
+
'node_name': self.node_name,
|
|
887
|
+
'attributes': self.attributes,
|
|
888
|
+
'x_path': self.x_path,
|
|
889
|
+
'element_hash': self.element_hash,
|
|
890
|
+
'bounds': self.bounds.to_dict() if self.bounds else None,
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
@classmethod
|
|
894
|
+
def load_from_enhanced_dom_tree(cls, enhanced_dom_tree: EnhancedDOMTreeNode) -> 'DOMInteractedElement':
|
|
895
|
+
return cls(
|
|
896
|
+
node_id=enhanced_dom_tree.node_id,
|
|
897
|
+
backend_node_id=enhanced_dom_tree.backend_node_id,
|
|
898
|
+
frame_id=enhanced_dom_tree.frame_id,
|
|
899
|
+
node_type=enhanced_dom_tree.node_type,
|
|
900
|
+
node_value=enhanced_dom_tree.node_value,
|
|
901
|
+
node_name=enhanced_dom_tree.node_name,
|
|
902
|
+
attributes=enhanced_dom_tree.attributes,
|
|
903
|
+
bounds=enhanced_dom_tree.snapshot_node.bounds if enhanced_dom_tree.snapshot_node else None,
|
|
904
|
+
x_path=enhanced_dom_tree.xpath,
|
|
905
|
+
element_hash=hash(enhanced_dom_tree),
|
|
906
|
+
)
|