optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,906 @@
1
+ import hashlib
2
+ from dataclasses import asdict, dataclass, field
3
+ from enum import Enum
4
+ from typing import Any
5
+
6
+ from cdp_use.cdp.accessibility.commands import GetFullAXTreeReturns
7
+ from cdp_use.cdp.accessibility.types import AXPropertyName
8
+ from cdp_use.cdp.dom.commands import GetDocumentReturns
9
+ from cdp_use.cdp.dom.types import ShadowRootType
10
+ from cdp_use.cdp.domsnapshot.commands import CaptureSnapshotReturns
11
+ from cdp_use.cdp.target.types import SessionID, TargetID, TargetInfo
12
+ from uuid_extensions import uuid7str
13
+
14
+ from browser_use.dom.utils import cap_text_length
15
+ from browser_use.observability import observe_debug
16
+
17
+ # Serializer types
18
+ DEFAULT_INCLUDE_ATTRIBUTES = [
19
+ 'title',
20
+ 'type',
21
+ 'checked',
22
+ # 'class',
23
+ 'id',
24
+ 'name',
25
+ 'role',
26
+ 'value',
27
+ 'placeholder',
28
+ 'data-date-format',
29
+ 'alt',
30
+ 'aria-label',
31
+ 'aria-expanded',
32
+ 'data-state',
33
+ 'aria-checked',
34
+ # ARIA value attributes for datetime/range inputs
35
+ 'aria-valuemin',
36
+ 'aria-valuemax',
37
+ 'aria-valuenow',
38
+ 'aria-placeholder',
39
+ # Validation attributes - help agents avoid brute force attempts
40
+ 'pattern',
41
+ 'min',
42
+ 'max',
43
+ 'minlength',
44
+ 'maxlength',
45
+ 'step',
46
+ 'accept', # File input types (e.g., accept="image/*" or accept=".pdf")
47
+ 'multiple', # Whether multiple files/selections are allowed
48
+ 'inputmode', # Virtual keyboard hint (numeric, tel, email, url, etc.)
49
+ 'autocomplete', # Autocomplete behavior hint
50
+ 'data-mask', # Input mask format (e.g., phone numbers, credit cards)
51
+ 'data-inputmask', # Alternative input mask attribute
52
+ 'data-datepicker', # jQuery datepicker indicator
53
+ 'format', # Synthetic attribute for date/time input format (e.g., MM/dd/yyyy)
54
+ 'expected_format', # Synthetic attribute for explicit expected format (e.g., AngularJS datepickers)
55
+ 'contenteditable', # Rich text editor detection
56
+ # Webkit shadow DOM identifiers
57
+ 'pseudo',
58
+ # Accessibility properties from ax_node (ordered by importance for automation)
59
+ 'checked',
60
+ 'selected',
61
+ 'expanded',
62
+ 'pressed',
63
+ 'disabled',
64
+ 'invalid', # Current validation state from AX node
65
+ 'valuemin', # Min value from AX node (for datetime/range)
66
+ 'valuemax', # Max value from AX node (for datetime/range)
67
+ 'valuenow',
68
+ 'keyshortcuts',
69
+ 'haspopup',
70
+ 'multiselectable',
71
+ # Less commonly needed (uncomment if required):
72
+ # 'readonly',
73
+ 'required',
74
+ 'valuetext',
75
+ 'level',
76
+ 'busy',
77
+ 'live',
78
+ # Accessibility name (contains text content for StaticText elements)
79
+ 'ax_name',
80
+ "data-qtip"
81
+ ]
82
+
83
+ STATIC_ATTRIBUTES = {
84
+ 'class',
85
+ 'id',
86
+ 'name',
87
+ 'type',
88
+ 'placeholder',
89
+ 'aria-label',
90
+ 'title',
91
+ # 'aria-expanded',
92
+ 'role',
93
+ 'data-testid',
94
+ 'data-test',
95
+ 'data-cy',
96
+ 'data-selenium',
97
+ 'for',
98
+ 'required',
99
+ 'disabled',
100
+ 'readonly',
101
+ 'checked',
102
+ 'selected',
103
+ 'multiple',
104
+ 'accept',
105
+ 'href',
106
+ 'target',
107
+ 'rel',
108
+ 'aria-describedby',
109
+ 'aria-labelledby',
110
+ 'aria-controls',
111
+ 'aria-owns',
112
+ 'aria-live',
113
+ 'aria-atomic',
114
+ 'aria-busy',
115
+ 'aria-disabled',
116
+ 'aria-hidden',
117
+ 'aria-pressed',
118
+ 'aria-checked',
119
+ 'aria-selected',
120
+ 'tabindex',
121
+ 'alt',
122
+ 'src',
123
+ 'lang',
124
+ 'itemscope',
125
+ 'itemtype',
126
+ 'itemprop',
127
+ # Webkit shadow DOM attributes
128
+ 'pseudo',
129
+ 'aria-valuemin',
130
+ 'aria-valuemax',
131
+ 'aria-valuenow',
132
+ 'aria-placeholder',
133
+ }
134
+
135
+
136
+ @dataclass
137
+ class CurrentPageTargets:
138
+ page_session: TargetInfo
139
+ iframe_sessions: list[TargetInfo]
140
+ """
141
+ Iframe sessions are ALL the iframes sessions of all the pages (not just the current page)
142
+ """
143
+
144
+
145
+ @dataclass
146
+ class TargetAllTrees:
147
+ snapshot: CaptureSnapshotReturns
148
+ dom_tree: GetDocumentReturns
149
+ ax_tree: GetFullAXTreeReturns
150
+ device_pixel_ratio: float
151
+ cdp_timing: dict[str, float]
152
+
153
+
154
+ @dataclass(slots=True)
155
+ class PropagatingBounds:
156
+ """Track bounds that propagate from parent elements to filter children."""
157
+
158
+ tag: str # The tag that started propagation ('a' or 'button')
159
+ bounds: 'DOMRect' # The bounding box
160
+ node_id: int # Node ID for debugging
161
+ depth: int # How deep in tree this started (for debugging)
162
+
163
+
164
+ @dataclass(slots=True)
165
+ class SimplifiedNode:
166
+ """Simplified tree node for optimization."""
167
+
168
+ original_node: 'EnhancedDOMTreeNode'
169
+ children: list['SimplifiedNode']
170
+ should_display: bool = True
171
+ is_interactive: bool = False # True if element is in selector_map
172
+
173
+ is_new: bool = False
174
+
175
+ ignored_by_paint_order: bool = False # More info in dom/serializer/paint_order.py
176
+ excluded_by_parent: bool = False # New field for bbox filtering
177
+ is_shadow_host: bool = False # New field for shadow DOM hosts
178
+ is_compound_component: bool = False # True for virtual components of compound controls
179
+
180
+ def _clean_original_node_json(self, node_json: dict) -> dict:
181
+ """Recursively remove children_nodes and shadow_roots from original_node JSON."""
182
+ # Remove the fields we don't want in SimplifiedNode serialization
183
+ if 'children_nodes' in node_json:
184
+ del node_json['children_nodes']
185
+ if 'shadow_roots' in node_json:
186
+ del node_json['shadow_roots']
187
+
188
+ # Clean nested content_document if it exists
189
+ if node_json.get('content_document'):
190
+ node_json['content_document'] = self._clean_original_node_json(node_json['content_document'])
191
+
192
+ return node_json
193
+
194
+ def __json__(self) -> dict:
195
+ original_node_json = self.original_node.__json__()
196
+ # Remove children_nodes and shadow_roots to avoid duplication with SimplifiedNode.children
197
+ cleaned_original_node_json = self._clean_original_node_json(original_node_json)
198
+ return {
199
+ 'should_display': self.should_display,
200
+ 'is_interactive': self.is_interactive,
201
+ 'ignored_by_paint_order': self.ignored_by_paint_order,
202
+ 'excluded_by_parent': self.excluded_by_parent,
203
+ 'original_node': cleaned_original_node_json,
204
+ 'children': [c.__json__() for c in self.children],
205
+ }
206
+
207
+
208
+ class NodeType(int, Enum):
209
+ """DOM node types based on the DOM specification."""
210
+
211
+ ELEMENT_NODE = 1
212
+ ATTRIBUTE_NODE = 2
213
+ TEXT_NODE = 3
214
+ CDATA_SECTION_NODE = 4
215
+ ENTITY_REFERENCE_NODE = 5
216
+ ENTITY_NODE = 6
217
+ PROCESSING_INSTRUCTION_NODE = 7
218
+ COMMENT_NODE = 8
219
+ DOCUMENT_NODE = 9
220
+ DOCUMENT_TYPE_NODE = 10
221
+ DOCUMENT_FRAGMENT_NODE = 11
222
+ NOTATION_NODE = 12
223
+
224
+
225
+ @dataclass(slots=True)
226
+ class DOMRect:
227
+ x: float
228
+ y: float
229
+ width: float
230
+ height: float
231
+
232
+ def to_dict(self) -> dict[str, Any]:
233
+ return {
234
+ 'x': self.x,
235
+ 'y': self.y,
236
+ 'width': self.width,
237
+ 'height': self.height,
238
+ }
239
+
240
+ def __json__(self) -> dict:
241
+ return self.to_dict()
242
+
243
+
244
+ @dataclass(slots=True)
245
+ class EnhancedAXProperty:
246
+ """we don't need `sources` and `related_nodes` for now (not sure how to use them)
247
+
248
+ TODO: there is probably some way to determine whether it has a value or related nodes or not, but for now it's kinda fine idk
249
+ """
250
+
251
+ name: AXPropertyName
252
+ value: str | bool | None
253
+ # related_nodes: list[EnhancedAXRelatedNode] | None
254
+
255
+
256
+ @dataclass(slots=True)
257
+ class EnhancedAXNode:
258
+ ax_node_id: str
259
+ """Not to be confused the DOM node_id. Only useful for AX node tree"""
260
+ ignored: bool
261
+ # we don't need ignored_reasons as we anyway ignore the node otherwise
262
+ role: str | None
263
+ name: str | None
264
+ description: str | None
265
+
266
+ properties: list[EnhancedAXProperty] | None
267
+ child_ids: list[str] | None
268
+
269
+
270
+ @dataclass(slots=True)
271
+ class EnhancedSnapshotNode:
272
+ """Snapshot data extracted from DOMSnapshot for enhanced functionality."""
273
+
274
+ is_clickable: bool | None
275
+ cursor_style: str | None
276
+ bounds: DOMRect | None
277
+ """
278
+ Document coordinates (origin = top-left of the page, ignores current scroll).
279
+ Equivalent JS API: layoutNode.boundingBox in the older API.
280
+ Typical use: Quick hit-test that doesn't care about scroll position.
281
+ """
282
+
283
+ clientRects: DOMRect | None
284
+ """
285
+ Viewport coordinates (origin = top-left of the visible scrollport).
286
+ Equivalent JS API: element.getClientRects() / getBoundingClientRect().
287
+ Typical use: Pixel-perfect hit-testing on screen, taking current scroll into account.
288
+ """
289
+
290
+ scrollRects: DOMRect | None
291
+ """
292
+ Scrollable area of the element.
293
+ """
294
+
295
+ computed_styles: dict[str, str] | None
296
+ """Computed styles from the layout tree"""
297
+ paint_order: int | None
298
+ """Paint order from the layout tree"""
299
+ stacking_contexts: int | None
300
+ """Stacking contexts from the layout tree"""
301
+
302
+
303
+ # @dataclass(slots=True)
304
+ # class SuperSelector:
305
+ # node_id: int
306
+ # backend_node_id: int
307
+ # frame_id: str | None
308
+ # target_id: TargetID
309
+
310
+ # node_type: NodeType
311
+ # node_name: str
312
+
313
+ # # is_visible: bool | None
314
+ # # is_scrollable: bool | None
315
+
316
+ # element_index: int | None
317
+
318
+
319
+ @dataclass(slots=True)
320
+ class EnhancedDOMTreeNode:
321
+ """
322
+ Enhanced DOM tree node that contains information from AX, DOM, and Snapshot trees. It's mostly based on the types on DOM node type with enhanced data from AX and Snapshot trees.
323
+
324
+ @dev when serializing check if the value is a valid value first!
325
+
326
+ Learn more about the fields:
327
+ - (DOM node) https://chromedevtools.github.io/devtools-protocol/tot/DOM/#type-BackendNode
328
+ - (AX node) https://chromedevtools.github.io/devtools-protocol/tot/Accessibility/#type-AXNode
329
+ - (Snapshot node) https://chromedevtools.github.io/devtools-protocol/tot/DOMSnapshot/#type-DOMNode
330
+ """
331
+
332
+ # region - DOM Node data
333
+
334
+ node_id: int
335
+ backend_node_id: int
336
+
337
+ node_type: NodeType
338
+ """Node types, defined in `NodeType` enum."""
339
+ node_name: str
340
+ """Only applicable for `NodeType.ELEMENT_NODE`"""
341
+ node_value: str
342
+ """this is where the value from `NodeType.TEXT_NODE` is stored usually"""
343
+ attributes: dict[str, str]
344
+ """slightly changed from the original attributes to be more readable"""
345
+ is_scrollable: bool | None
346
+ """
347
+ Whether the node is scrollable.
348
+ """
349
+ is_visible: bool | None
350
+ """
351
+ Whether the node is visible according to the upper most frame node.
352
+ """
353
+
354
+ absolute_position: DOMRect | None
355
+ """
356
+ Absolute position of the node in the document according to the top-left of the page.
357
+ """
358
+
359
+ # frames
360
+ target_id: TargetID
361
+ frame_id: str | None
362
+ session_id: SessionID | None
363
+ content_document: 'EnhancedDOMTreeNode | None'
364
+ """
365
+ Content document is the document inside a new iframe.
366
+ """
367
+ # Shadow DOM
368
+ shadow_root_type: ShadowRootType | None
369
+ shadow_roots: list['EnhancedDOMTreeNode'] | None
370
+ """
371
+ Shadow roots are the shadow DOMs of the element.
372
+ """
373
+
374
+ # Navigation
375
+ parent_node: 'EnhancedDOMTreeNode | None'
376
+ children_nodes: list['EnhancedDOMTreeNode'] | None
377
+
378
+ # endregion - DOM Node data
379
+
380
+ # region - AX Node data
381
+ ax_node: EnhancedAXNode | None
382
+
383
+ # endregion - AX Node data
384
+
385
+ # region - Snapshot Node data
386
+ snapshot_node: EnhancedSnapshotNode | None
387
+
388
+ # endregion - Snapshot Node data
389
+
390
+ # Compound control child components information
391
+ _compound_children: list[dict[str, Any]] = field(default_factory=list)
392
+
393
+ uuid: str = field(default_factory=uuid7str)
394
+
395
+ @property
396
+ def parent(self) -> 'EnhancedDOMTreeNode | None':
397
+ return self.parent_node
398
+
399
+ @property
400
+ def children(self) -> list['EnhancedDOMTreeNode']:
401
+ return self.children_nodes or []
402
+
403
+ @property
404
+ def children_and_shadow_roots(self) -> list['EnhancedDOMTreeNode']:
405
+ """
406
+ Returns all children nodes, including shadow roots
407
+ """
408
+ # IMPORTANT: Make a copy to avoid mutating the original children_nodes list!
409
+ children = list(self.children_nodes) if self.children_nodes else []
410
+ if self.shadow_roots:
411
+ children.extend(self.shadow_roots)
412
+ return children
413
+
414
+ @property
415
+ def tag_name(self) -> str:
416
+ return self.node_name.lower()
417
+
418
+ @property
419
+ def xpath(self) -> str:
420
+ """Generate XPath for this DOM node, stopping at shadow boundaries or iframes."""
421
+ segments = []
422
+ current_element = self
423
+
424
+ while current_element and (
425
+ current_element.node_type == NodeType.ELEMENT_NODE or current_element.node_type == NodeType.DOCUMENT_FRAGMENT_NODE
426
+ ):
427
+ # just pass through shadow roots
428
+ if current_element.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
429
+ current_element = current_element.parent_node
430
+ continue
431
+
432
+ # stop ONLY if we hit iframe
433
+ if current_element.parent_node and current_element.parent_node.node_name.lower() == 'iframe':
434
+ break
435
+
436
+ position = self._get_element_position(current_element)
437
+ tag_name = current_element.node_name.lower()
438
+ xpath_index = f'[{position}]' if position > 0 else ''
439
+ segments.insert(0, f'{tag_name}{xpath_index}')
440
+
441
+ current_element = current_element.parent_node
442
+
443
+ return '/'.join(segments)
444
+
445
+ def _get_element_position(self, element: 'EnhancedDOMTreeNode') -> int:
446
+ """Get the position of an element among its siblings with the same tag name.
447
+ Returns 0 if it's the only element of its type, otherwise returns 1-based index."""
448
+ if not element.parent_node or not element.parent_node.children_nodes:
449
+ return 0
450
+
451
+ same_tag_siblings = [
452
+ child
453
+ for child in element.parent_node.children_nodes
454
+ if child.node_type == NodeType.ELEMENT_NODE and child.node_name.lower() == element.node_name.lower()
455
+ ]
456
+
457
+ if len(same_tag_siblings) <= 1:
458
+ return 0 # No index needed if it's the only one
459
+
460
+ try:
461
+ # XPath is 1-indexed
462
+ position = same_tag_siblings.index(element) + 1
463
+ return position
464
+ except ValueError:
465
+ return 0
466
+
467
+ def __json__(self) -> dict:
468
+ """Serializes the node and its descendants to a dictionary, omitting parent references."""
469
+ return {
470
+ 'node_id': self.node_id,
471
+ 'backend_node_id': self.backend_node_id,
472
+ 'node_type': self.node_type.name,
473
+ 'node_name': self.node_name,
474
+ 'node_value': self.node_value,
475
+ 'is_visible': self.is_visible,
476
+ 'attributes': self.attributes,
477
+ 'is_scrollable': self.is_scrollable,
478
+ 'session_id': self.session_id,
479
+ 'target_id': self.target_id,
480
+ 'frame_id': self.frame_id,
481
+ 'content_document': self.content_document.__json__() if self.content_document else None,
482
+ 'shadow_root_type': self.shadow_root_type,
483
+ 'ax_node': asdict(self.ax_node) if self.ax_node else None,
484
+ 'snapshot_node': asdict(self.snapshot_node) if self.snapshot_node else None,
485
+ # these two in the end, so it's easier to read json
486
+ 'shadow_roots': [r.__json__() for r in self.shadow_roots] if self.shadow_roots else [],
487
+ 'children_nodes': [c.__json__() for c in self.children_nodes] if self.children_nodes else [],
488
+ }
489
+
490
+ def get_all_children_text(self, max_depth: int = -1) -> str:
491
+ text_parts = []
492
+
493
+ def collect_text(node: EnhancedDOMTreeNode, current_depth: int) -> None:
494
+ if max_depth != -1 and current_depth > max_depth:
495
+ return
496
+
497
+ # Skip this branch if we hit a highlighted element (except for the current node)
498
+ # TODO: think whether if makese sense to add text until the next clickable element or everything from children
499
+ # if node.node_type == NodeType.ELEMENT_NODE
500
+ # if isinstance(node, DOMElementNode) and node != self and node.highlight_index is not None:
501
+ # return
502
+
503
+ if node.node_type == NodeType.TEXT_NODE:
504
+ text_parts.append(node.node_value)
505
+ elif node.node_type == NodeType.ELEMENT_NODE:
506
+ for child in node.children:
507
+ collect_text(child, current_depth + 1)
508
+
509
+ collect_text(self, 0)
510
+ return '\n'.join(text_parts).strip()
511
+
512
+ def __repr__(self) -> str:
513
+ """
514
+ @DEV ! don't display this to the LLM, it's SUPER long
515
+ """
516
+ attributes = ', '.join([f'{k}={v}' for k, v in self.attributes.items()])
517
+ is_scrollable = getattr(self, 'is_scrollable', False)
518
+ num_children = len(self.children_nodes or [])
519
+ return (
520
+ f'<{self.tag_name} {attributes} is_scrollable={is_scrollable} '
521
+ f'num_children={num_children} >{self.node_value}</{self.tag_name}>'
522
+ )
523
+
524
+ def llm_representation(self, max_text_length: int = 100) -> str:
525
+ """
526
+ Token friendly representation of the node, used in the LLM
527
+ """
528
+
529
+ return f'<{self.tag_name}>{cap_text_length(self.get_all_children_text(), max_text_length) or ""}'
530
+
531
+ def get_meaningful_text_for_llm(self) -> str:
532
+ """
533
+ Get the meaningful text content that the LLM actually sees for this element.
534
+ This matches exactly what goes into the DOMTreeSerializer output.
535
+ """
536
+ meaningful_text = ''
537
+ if hasattr(self, 'attributes') and self.attributes:
538
+ # Priority order: value, aria-label, title, placeholder, alt, text content
539
+ for attr in ['value', 'aria-label', 'title', 'placeholder', 'alt']:
540
+ if attr in self.attributes and self.attributes[attr]:
541
+ meaningful_text = self.attributes[attr]
542
+ break
543
+
544
+ # Fallback to text content if no meaningful attributes
545
+ if not meaningful_text:
546
+ meaningful_text = self.get_all_children_text()
547
+
548
+ return meaningful_text.strip()
549
+
550
+ @property
551
+ def is_actually_scrollable(self) -> bool:
552
+ """
553
+ Enhanced scroll detection that combines CDP detection with CSS analysis.
554
+
555
+ This detects scrollable elements that Chrome's CDP might miss, which is common
556
+ in iframes and dynamically sized containers.
557
+ """
558
+ # First check if CDP already detected it as scrollable
559
+ if self.is_scrollable:
560
+ return True
561
+
562
+ # Enhanced detection for elements CDP missed
563
+ if not self.snapshot_node:
564
+ return False
565
+
566
+ # Check scroll vs client rects - this is the most reliable indicator
567
+ scroll_rects = self.snapshot_node.scrollRects
568
+ client_rects = self.snapshot_node.clientRects
569
+
570
+ if scroll_rects and client_rects:
571
+ # Content is larger than visible area = scrollable
572
+ has_vertical_scroll = scroll_rects.height > client_rects.height + 1 # +1 for rounding
573
+ has_horizontal_scroll = scroll_rects.width > client_rects.width + 1
574
+
575
+ if has_vertical_scroll or has_horizontal_scroll:
576
+ # Also check CSS to make sure scrolling is allowed
577
+ if self.snapshot_node.computed_styles:
578
+ styles = self.snapshot_node.computed_styles
579
+
580
+ overflow = styles.get('overflow', 'visible').lower()
581
+ overflow_x = styles.get('overflow-x', overflow).lower()
582
+ overflow_y = styles.get('overflow-y', overflow).lower()
583
+
584
+ # Only allow scrolling if overflow is explicitly set to auto, scroll, or overlay
585
+ # Do NOT consider 'visible' overflow as scrollable - this was causing the issue
586
+ allows_scroll = (
587
+ overflow in ['auto', 'scroll', 'overlay']
588
+ or overflow_x in ['auto', 'scroll', 'overlay']
589
+ or overflow_y in ['auto', 'scroll', 'overlay']
590
+ )
591
+
592
+ return allows_scroll
593
+ else:
594
+ # No CSS info, but content overflows - be more conservative
595
+ # Only consider it scrollable if it's a common scrollable container element
596
+ scrollable_tags = {'div', 'main', 'section', 'article', 'aside', 'body', 'html'}
597
+ return self.tag_name.lower() in scrollable_tags
598
+
599
+ return False
600
+
601
+ @property
602
+ def should_show_scroll_info(self) -> bool:
603
+ """
604
+ Simple check: show scroll info only if this element is scrollable
605
+ and doesn't have a scrollable parent (to avoid nested scroll spam).
606
+
607
+ Special case for iframes: Always show scroll info since Chrome might not
608
+ always detect iframe scrollability correctly (scrollHeight: 0 issue).
609
+ """
610
+ # Special case: Always show scroll info for iframe elements
611
+ # Even if not detected as scrollable, they might have scrollable content
612
+ if self.tag_name.lower() == 'iframe':
613
+ return True
614
+
615
+ # Must be scrollable first for non-iframe elements
616
+ if not (self.is_scrollable or self.is_actually_scrollable):
617
+ return False
618
+
619
+ # Always show for iframe content documents (body/html)
620
+ if self.tag_name.lower() in {'body', 'html'}:
621
+ return True
622
+
623
+ # Don't show if parent is already scrollable (avoid nested spam)
624
+ if self.parent_node and (self.parent_node.is_scrollable or self.parent_node.is_actually_scrollable):
625
+ return False
626
+
627
+ return True
628
+
629
+ def _find_html_in_content_document(self) -> 'EnhancedDOMTreeNode | None':
630
+ """Find HTML element in iframe content document."""
631
+ if not self.content_document:
632
+ return None
633
+
634
+ # Check if content document itself is HTML
635
+ if self.content_document.tag_name.lower() == 'html':
636
+ return self.content_document
637
+
638
+ # Look through children for HTML element
639
+ if self.content_document.children_nodes:
640
+ for child in self.content_document.children_nodes:
641
+ if child.tag_name.lower() == 'html':
642
+ return child
643
+
644
+ return None
645
+
646
+ @property
647
+ def scroll_info(self) -> dict[str, Any] | None:
648
+ """Calculate scroll information for this element if it's scrollable."""
649
+ if not self.is_actually_scrollable or not self.snapshot_node:
650
+ return None
651
+
652
+ # Get scroll and client rects from snapshot data
653
+ scroll_rects = self.snapshot_node.scrollRects
654
+ client_rects = self.snapshot_node.clientRects
655
+ bounds = self.snapshot_node.bounds
656
+
657
+ if not scroll_rects or not client_rects:
658
+ return None
659
+
660
+ # Calculate scroll position and percentages
661
+ scroll_top = scroll_rects.y
662
+ scroll_left = scroll_rects.x
663
+
664
+ # Total scrollable height and width
665
+ scrollable_height = scroll_rects.height
666
+ scrollable_width = scroll_rects.width
667
+
668
+ # Visible (client) dimensions
669
+ visible_height = client_rects.height
670
+ visible_width = client_rects.width
671
+
672
+ # Calculate how much content is above/below/left/right of current view
673
+ content_above = max(0, scroll_top)
674
+ content_below = max(0, scrollable_height - visible_height - scroll_top)
675
+ content_left = max(0, scroll_left)
676
+ content_right = max(0, scrollable_width - visible_width - scroll_left)
677
+
678
+ # Calculate scroll percentages
679
+ vertical_scroll_percentage = 0
680
+ horizontal_scroll_percentage = 0
681
+
682
+ if scrollable_height > visible_height:
683
+ max_scroll_top = scrollable_height - visible_height
684
+ vertical_scroll_percentage = (scroll_top / max_scroll_top) * 100 if max_scroll_top > 0 else 0
685
+
686
+ if scrollable_width > visible_width:
687
+ max_scroll_left = scrollable_width - visible_width
688
+ horizontal_scroll_percentage = (scroll_left / max_scroll_left) * 100 if max_scroll_left > 0 else 0
689
+
690
+ # Calculate pages equivalent (using visible height as page unit)
691
+ pages_above = content_above / visible_height if visible_height > 0 else 0
692
+ pages_below = content_below / visible_height if visible_height > 0 else 0
693
+ total_pages = scrollable_height / visible_height if visible_height > 0 else 1
694
+
695
+ return {
696
+ 'scroll_top': scroll_top,
697
+ 'scroll_left': scroll_left,
698
+ 'scrollable_height': scrollable_height,
699
+ 'scrollable_width': scrollable_width,
700
+ 'visible_height': visible_height,
701
+ 'visible_width': visible_width,
702
+ 'content_above': content_above,
703
+ 'content_below': content_below,
704
+ 'content_left': content_left,
705
+ 'content_right': content_right,
706
+ 'vertical_scroll_percentage': round(vertical_scroll_percentage, 1),
707
+ 'horizontal_scroll_percentage': round(horizontal_scroll_percentage, 1),
708
+ 'pages_above': round(pages_above, 1),
709
+ 'pages_below': round(pages_below, 1),
710
+ 'total_pages': round(total_pages, 1),
711
+ 'can_scroll_up': content_above > 0,
712
+ 'can_scroll_down': content_below > 0,
713
+ 'can_scroll_left': content_left > 0,
714
+ 'can_scroll_right': content_right > 0,
715
+ }
716
+
717
+ def get_scroll_info_text(self) -> str:
718
+ """Get human-readable scroll information text for this element."""
719
+ # Special case for iframes: check content document for scroll info
720
+ if self.tag_name.lower() == 'iframe':
721
+ # Try to get scroll info from the HTML document inside the iframe
722
+ if self.content_document:
723
+ # Look for HTML element in content document
724
+ html_element = self._find_html_in_content_document()
725
+ if html_element and html_element.scroll_info:
726
+ info = html_element.scroll_info
727
+ # Provide minimal but useful scroll info
728
+ pages_below = info.get('pages_below', 0)
729
+ pages_above = info.get('pages_above', 0)
730
+ v_pct = int(info.get('vertical_scroll_percentage', 0))
731
+
732
+ if pages_below > 0 or pages_above > 0:
733
+ return f'scroll: {pages_above:.1f}↑ {pages_below:.1f}↓ {v_pct}%'
734
+
735
+ return 'scroll'
736
+
737
+ scroll_info = self.scroll_info
738
+ if not scroll_info:
739
+ return ''
740
+
741
+ parts = []
742
+
743
+ # Vertical scroll info (concise format)
744
+ if scroll_info['scrollable_height'] > scroll_info['visible_height']:
745
+ parts.append(f'{scroll_info["pages_above"]:.1f} pages above, {scroll_info["pages_below"]:.1f} pages below')
746
+
747
+ # Horizontal scroll info (concise format)
748
+ if scroll_info['scrollable_width'] > scroll_info['visible_width']:
749
+ parts.append(f'horizontal {scroll_info["horizontal_scroll_percentage"]:.0f}%')
750
+
751
+ return ' '.join(parts)
752
+
753
+ @property
754
+ def element_hash(self) -> int:
755
+ return hash(self)
756
+
757
+ def __str__(self) -> str:
758
+ return f'[<{self.tag_name}>#{self.frame_id[-4:] if self.frame_id else "?"}:{self.backend_node_id}]'
759
+
760
+ def __hash__(self) -> int:
761
+ """
762
+ Hash the element based on its parent branch path and attributes.
763
+
764
+ TODO: migrate this to use only backendNodeId + current SessionId
765
+ """
766
+
767
+ # Get parent branch path
768
+ parent_branch_path = self._get_parent_branch_path()
769
+ parent_branch_path_string = '/'.join(parent_branch_path)
770
+
771
+ attributes_string = ''.join(
772
+ f'{k}={v}' for k, v in sorted((k, v) for k, v in self.attributes.items() if k in STATIC_ATTRIBUTES)
773
+ )
774
+
775
+ # Combine both for final hash
776
+ combined_string = f'{parent_branch_path_string}|{attributes_string}'
777
+ element_hash = hashlib.sha256(combined_string.encode()).hexdigest()
778
+
779
+ # Convert to int for __hash__ return type - use first 16 chars and convert from hex to int
780
+ return int(element_hash[:16], 16)
781
+
782
+ def parent_branch_hash(self) -> int:
783
+ """
784
+ Hash the element based on its parent branch path and attributes.
785
+ """
786
+ parent_branch_path = self._get_parent_branch_path()
787
+ parent_branch_path_string = '/'.join(parent_branch_path)
788
+ element_hash = hashlib.sha256(parent_branch_path_string.encode()).hexdigest()
789
+
790
+ return int(element_hash[:16], 16)
791
+
792
+ def _get_parent_branch_path(self) -> list[str]:
793
+ """Get the parent branch path as a list of tag names from root to current element."""
794
+ parents: list['EnhancedDOMTreeNode'] = []
795
+ current_element: 'EnhancedDOMTreeNode | None' = self
796
+
797
+ while current_element is not None:
798
+ if current_element.node_type == NodeType.ELEMENT_NODE:
799
+ parents.append(current_element)
800
+ current_element = current_element.parent_node
801
+
802
+ parents.reverse()
803
+ return [parent.tag_name for parent in parents]
804
+
805
+
806
+ DOMSelectorMap = dict[int, EnhancedDOMTreeNode]
807
+
808
+
809
+ @dataclass
810
+ class SerializedDOMState:
811
+ _root: SimplifiedNode | None
812
+ """Not meant to be used directly, use `llm_representation` instead"""
813
+
814
+ selector_map: DOMSelectorMap
815
+
816
+ @observe_debug(ignore_input=True, ignore_output=True, name='llm_representation')
817
+ def llm_representation(
818
+ self,
819
+ include_attributes: list[str] | None = None,
820
+ ) -> str:
821
+ """Kinda ugly, but leaving this as an internal method because include_attributes are a parameter on the agent, so we need to leave it as a 2 step process"""
822
+ from browser_use.dom.serializer.serializer import DOMTreeSerializer
823
+
824
+ if not self._root:
825
+ return 'Empty DOM tree (you might have to wait for the page to load)'
826
+
827
+ include_attributes = include_attributes or DEFAULT_INCLUDE_ATTRIBUTES
828
+
829
+ return DOMTreeSerializer.serialize_tree(self._root, include_attributes)
830
+
831
+ @observe_debug(ignore_input=True, ignore_output=True, name='eval_representation')
832
+ def eval_representation(
833
+ self,
834
+ include_attributes: list[str] | None = None,
835
+ ) -> str:
836
+ """
837
+ Evaluation-focused DOM representation without interactive indexes.
838
+
839
+ This serializer is designed for evaluation/judge contexts where:
840
+ - No interactive indexes are needed (we're not clicking)
841
+ - Full HTML structure should be preserved for context
842
+ - More attribute information is helpful
843
+ - Text content is important for understanding page structure
844
+ """
845
+ from browser_use.dom.serializer.eval_serializer import DOMEvalSerializer
846
+
847
+ if not self._root:
848
+ return 'Empty DOM tree (you might have to wait for the page to load)'
849
+
850
+ include_attributes = include_attributes or DEFAULT_INCLUDE_ATTRIBUTES
851
+
852
+ return DOMEvalSerializer.serialize_tree(self._root, include_attributes)
853
+
854
+
855
+ @dataclass
856
+ class DOMInteractedElement:
857
+ """
858
+ DOMInteractedElement is a class that represents a DOM element that has been interacted with.
859
+ It is used to store the DOM element that has been interacted with and to store the DOM element that has been interacted with.
860
+
861
+ TODO: this is a bit of a hack, we should probably have a better way to do this
862
+ """
863
+
864
+ node_id: int
865
+ backend_node_id: int
866
+ frame_id: str | None
867
+
868
+ node_type: NodeType
869
+ node_value: str
870
+ node_name: str
871
+ attributes: dict[str, str] | None
872
+
873
+ bounds: DOMRect | None
874
+
875
+ x_path: str
876
+
877
+ element_hash: int
878
+
879
+ def to_dict(self) -> dict[str, Any]:
880
+ return {
881
+ 'node_id': self.node_id,
882
+ 'backend_node_id': self.backend_node_id,
883
+ 'frame_id': self.frame_id,
884
+ 'node_type': self.node_type.value,
885
+ 'node_value': self.node_value,
886
+ 'node_name': self.node_name,
887
+ 'attributes': self.attributes,
888
+ 'x_path': self.x_path,
889
+ 'element_hash': self.element_hash,
890
+ 'bounds': self.bounds.to_dict() if self.bounds else None,
891
+ }
892
+
893
+ @classmethod
894
+ def load_from_enhanced_dom_tree(cls, enhanced_dom_tree: EnhancedDOMTreeNode) -> 'DOMInteractedElement':
895
+ return cls(
896
+ node_id=enhanced_dom_tree.node_id,
897
+ backend_node_id=enhanced_dom_tree.backend_node_id,
898
+ frame_id=enhanced_dom_tree.frame_id,
899
+ node_type=enhanced_dom_tree.node_type,
900
+ node_value=enhanced_dom_tree.node_value,
901
+ node_name=enhanced_dom_tree.node_name,
902
+ attributes=enhanced_dom_tree.attributes,
903
+ bounds=enhanced_dom_tree.snapshot_node.bounds if enhanced_dom_tree.snapshot_node else None,
904
+ x_path=enhanced_dom_tree.xpath,
905
+ element_hash=hash(enhanced_dom_tree),
906
+ )