optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1170 @@
|
|
|
1
|
+
# @file purpose: Serializes enhanced DOM trees to string format for LLM consumption
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from browser_use.dom.serializer.clickable_elements import ClickableElementDetector
|
|
6
|
+
from browser_use.dom.serializer.paint_order import PaintOrderRemover
|
|
7
|
+
from browser_use.dom.utils import cap_text_length
|
|
8
|
+
from browser_use.dom.views import (
|
|
9
|
+
DOMRect,
|
|
10
|
+
DOMSelectorMap,
|
|
11
|
+
EnhancedDOMTreeNode,
|
|
12
|
+
NodeType,
|
|
13
|
+
PropagatingBounds,
|
|
14
|
+
SerializedDOMState,
|
|
15
|
+
SimplifiedNode,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
DISABLED_ELEMENTS = {'style', 'script', 'head', 'meta', 'link', 'title'}
|
|
19
|
+
|
|
20
|
+
# SVG child elements to skip (decorative only, no interaction value)
|
|
21
|
+
SVG_ELEMENTS = {
|
|
22
|
+
'path',
|
|
23
|
+
'rect',
|
|
24
|
+
'g',
|
|
25
|
+
'circle',
|
|
26
|
+
'ellipse',
|
|
27
|
+
'line',
|
|
28
|
+
'polyline',
|
|
29
|
+
'polygon',
|
|
30
|
+
'use',
|
|
31
|
+
'defs',
|
|
32
|
+
'clipPath',
|
|
33
|
+
'mask',
|
|
34
|
+
'pattern',
|
|
35
|
+
'image',
|
|
36
|
+
'text',
|
|
37
|
+
'tspan',
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DOMTreeSerializer:
|
|
42
|
+
"""Serializes enhanced DOM trees to string format."""
|
|
43
|
+
|
|
44
|
+
# Configuration - elements that propagate bounds to their children
|
|
45
|
+
PROPAGATING_ELEMENTS = [
|
|
46
|
+
{'tag': 'a', 'role': None}, # Any <a> tag
|
|
47
|
+
{'tag': 'button', 'role': None}, # Any <button> tag
|
|
48
|
+
{'tag': 'div', 'role': 'button'}, # <div role="button">
|
|
49
|
+
{'tag': 'div', 'role': 'combobox'}, # <div role="combobox"> - dropdowns/selects
|
|
50
|
+
{'tag': 'span', 'role': 'button'}, # <span role="button">
|
|
51
|
+
{'tag': 'span', 'role': 'combobox'}, # <span role="combobox">
|
|
52
|
+
{'tag': 'input', 'role': 'combobox'}, # <input role="combobox"> - autocomplete inputs
|
|
53
|
+
{'tag': 'input', 'role': 'combobox'}, # <input type="text"> - text inputs with suggestions
|
|
54
|
+
# {'tag': 'div', 'role': 'link'}, # <div role="link">
|
|
55
|
+
# {'tag': 'span', 'role': 'link'}, # <span role="link">
|
|
56
|
+
]
|
|
57
|
+
DEFAULT_CONTAINMENT_THRESHOLD = 0.99 # 99% containment by default
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
root_node: EnhancedDOMTreeNode,
|
|
62
|
+
previous_cached_state: SerializedDOMState | None = None,
|
|
63
|
+
enable_bbox_filtering: bool = True,
|
|
64
|
+
containment_threshold: float | None = None,
|
|
65
|
+
paint_order_filtering: bool = True,
|
|
66
|
+
):
|
|
67
|
+
self.root_node = root_node
|
|
68
|
+
self._interactive_counter = 1
|
|
69
|
+
self._selector_map: DOMSelectorMap = {}
|
|
70
|
+
self._previous_cached_selector_map = previous_cached_state.selector_map if previous_cached_state else None
|
|
71
|
+
# Add timing tracking
|
|
72
|
+
self.timing_info: dict[str, float] = {}
|
|
73
|
+
# Cache for clickable element detection to avoid redundant calls
|
|
74
|
+
self._clickable_cache: dict[int, bool] = {}
|
|
75
|
+
# Bounding box filtering configuration
|
|
76
|
+
self.enable_bbox_filtering = enable_bbox_filtering
|
|
77
|
+
self.containment_threshold = containment_threshold or self.DEFAULT_CONTAINMENT_THRESHOLD
|
|
78
|
+
# Paint order filtering configuration
|
|
79
|
+
self.paint_order_filtering = paint_order_filtering
|
|
80
|
+
|
|
81
|
+
def _safe_parse_number(self, value_str: str, default: float) -> float:
|
|
82
|
+
"""Parse string to float, handling negatives and decimals."""
|
|
83
|
+
try:
|
|
84
|
+
return float(value_str)
|
|
85
|
+
except (ValueError, TypeError):
|
|
86
|
+
return default
|
|
87
|
+
|
|
88
|
+
def _safe_parse_optional_number(self, value_str: str | None) -> float | None:
|
|
89
|
+
"""Parse string to float, returning None for invalid values."""
|
|
90
|
+
if not value_str:
|
|
91
|
+
return None
|
|
92
|
+
try:
|
|
93
|
+
return float(value_str)
|
|
94
|
+
except (ValueError, TypeError):
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
def serialize_accessible_elements(self) -> tuple[SerializedDOMState, dict[str, float]]:
|
|
98
|
+
import time
|
|
99
|
+
|
|
100
|
+
start_total = time.time()
|
|
101
|
+
|
|
102
|
+
# Reset state
|
|
103
|
+
self._interactive_counter = 1
|
|
104
|
+
self._selector_map = {}
|
|
105
|
+
self._semantic_groups = []
|
|
106
|
+
self._clickable_cache = {} # Clear cache for new serialization
|
|
107
|
+
|
|
108
|
+
# Step 1: Create simplified tree (includes clickable element detection)
|
|
109
|
+
start_step1 = time.time()
|
|
110
|
+
simplified_tree = self._create_simplified_tree(self.root_node)
|
|
111
|
+
end_step1 = time.time()
|
|
112
|
+
self.timing_info['create_simplified_tree'] = end_step1 - start_step1
|
|
113
|
+
|
|
114
|
+
# Step 2: Remove elements based on paint order
|
|
115
|
+
start_step3 = time.time()
|
|
116
|
+
if self.paint_order_filtering and simplified_tree:
|
|
117
|
+
PaintOrderRemover(simplified_tree).calculate_paint_order()
|
|
118
|
+
end_step3 = time.time()
|
|
119
|
+
self.timing_info['calculate_paint_order'] = end_step3 - start_step3
|
|
120
|
+
|
|
121
|
+
# Step 3: Optimize tree (remove unnecessary parents)
|
|
122
|
+
start_step2 = time.time()
|
|
123
|
+
optimized_tree = self._optimize_tree(simplified_tree)
|
|
124
|
+
end_step2 = time.time()
|
|
125
|
+
self.timing_info['optimize_tree'] = end_step2 - start_step2
|
|
126
|
+
|
|
127
|
+
# Step 3: Apply bounding box filtering (NEW)
|
|
128
|
+
if self.enable_bbox_filtering and optimized_tree:
|
|
129
|
+
start_step3 = time.time()
|
|
130
|
+
filtered_tree = self._apply_bounding_box_filtering(optimized_tree)
|
|
131
|
+
end_step3 = time.time()
|
|
132
|
+
self.timing_info['bbox_filtering'] = end_step3 - start_step3
|
|
133
|
+
else:
|
|
134
|
+
filtered_tree = optimized_tree
|
|
135
|
+
|
|
136
|
+
# Step 4: Assign interactive indices to clickable elements
|
|
137
|
+
start_step4 = time.time()
|
|
138
|
+
self._assign_interactive_indices_and_mark_new_nodes(filtered_tree)
|
|
139
|
+
end_step4 = time.time()
|
|
140
|
+
self.timing_info['assign_interactive_indices'] = end_step4 - start_step4
|
|
141
|
+
|
|
142
|
+
end_total = time.time()
|
|
143
|
+
self.timing_info['serialize_accessible_elements_total'] = end_total - start_total
|
|
144
|
+
|
|
145
|
+
return SerializedDOMState(_root=filtered_tree, selector_map=self._selector_map), self.timing_info
|
|
146
|
+
|
|
147
|
+
def _add_compound_components(self, simplified: SimplifiedNode, node: EnhancedDOMTreeNode) -> None:
|
|
148
|
+
"""Enhance compound controls with information from their child components."""
|
|
149
|
+
# Only process elements that might have compound components
|
|
150
|
+
if node.tag_name not in ['input', 'select', 'details', 'audio', 'video']:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
# For input elements, check for compound input types
|
|
154
|
+
if node.tag_name == 'input':
|
|
155
|
+
if not node.attributes or node.attributes.get('type') not in [
|
|
156
|
+
'date',
|
|
157
|
+
'time',
|
|
158
|
+
'datetime-local',
|
|
159
|
+
'month',
|
|
160
|
+
'week',
|
|
161
|
+
'range',
|
|
162
|
+
'number',
|
|
163
|
+
'color',
|
|
164
|
+
'file',
|
|
165
|
+
]:
|
|
166
|
+
return
|
|
167
|
+
# For other elements, check if they have AX child indicators
|
|
168
|
+
elif not node.ax_node or not node.ax_node.child_ids:
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
# Add compound component information based on element type
|
|
172
|
+
element_type = node.tag_name
|
|
173
|
+
input_type = node.attributes.get('type', '') if node.attributes else ''
|
|
174
|
+
|
|
175
|
+
if element_type == 'input':
|
|
176
|
+
# NOTE: For date/time inputs, we DON'T add compound components because:
|
|
177
|
+
# 1. They confuse the model (seeing "Day, Month, Year" suggests DD.MM.YYYY format)
|
|
178
|
+
# 2. HTML5 date/time inputs ALWAYS require ISO format (YYYY-MM-DD, HH:MM, etc.)
|
|
179
|
+
# 3. The placeholder attribute clearly shows the required format
|
|
180
|
+
# 4. These inputs use direct value assignment, not sequential typing
|
|
181
|
+
if input_type in ['date', 'time', 'datetime-local', 'month', 'week']:
|
|
182
|
+
# Skip compound components for date/time inputs - format is shown in placeholder
|
|
183
|
+
pass
|
|
184
|
+
elif input_type == 'range':
|
|
185
|
+
# Range slider with value indicator
|
|
186
|
+
min_val = node.attributes.get('min', '0') if node.attributes else '0'
|
|
187
|
+
max_val = node.attributes.get('max', '100') if node.attributes else '100'
|
|
188
|
+
|
|
189
|
+
node._compound_children.append(
|
|
190
|
+
{
|
|
191
|
+
'role': 'slider',
|
|
192
|
+
'name': 'Value',
|
|
193
|
+
'valuemin': self._safe_parse_number(min_val, 0.0),
|
|
194
|
+
'valuemax': self._safe_parse_number(max_val, 100.0),
|
|
195
|
+
'valuenow': None,
|
|
196
|
+
}
|
|
197
|
+
)
|
|
198
|
+
simplified.is_compound_component = True
|
|
199
|
+
elif input_type == 'number':
|
|
200
|
+
# Number input with increment/decrement buttons
|
|
201
|
+
min_val = node.attributes.get('min') if node.attributes else None
|
|
202
|
+
max_val = node.attributes.get('max') if node.attributes else None
|
|
203
|
+
|
|
204
|
+
node._compound_children.extend(
|
|
205
|
+
[
|
|
206
|
+
{'role': 'button', 'name': 'Increment', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
207
|
+
{'role': 'button', 'name': 'Decrement', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
208
|
+
{
|
|
209
|
+
'role': 'textbox',
|
|
210
|
+
'name': 'Value',
|
|
211
|
+
'valuemin': self._safe_parse_optional_number(min_val),
|
|
212
|
+
'valuemax': self._safe_parse_optional_number(max_val),
|
|
213
|
+
'valuenow': None,
|
|
214
|
+
},
|
|
215
|
+
]
|
|
216
|
+
)
|
|
217
|
+
simplified.is_compound_component = True
|
|
218
|
+
elif input_type == 'color':
|
|
219
|
+
# Color picker with components
|
|
220
|
+
node._compound_children.extend(
|
|
221
|
+
[
|
|
222
|
+
{'role': 'textbox', 'name': 'Hex Value', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
223
|
+
{'role': 'button', 'name': 'Color Picker', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
224
|
+
]
|
|
225
|
+
)
|
|
226
|
+
simplified.is_compound_component = True
|
|
227
|
+
elif input_type == 'file':
|
|
228
|
+
# File input with browse button
|
|
229
|
+
multiple = 'multiple' in node.attributes if node.attributes else False
|
|
230
|
+
|
|
231
|
+
# Extract current file selection state from AX tree
|
|
232
|
+
current_value = 'None' # Default to explicit "None" string for clarity
|
|
233
|
+
if node.ax_node and node.ax_node.properties:
|
|
234
|
+
for prop in node.ax_node.properties:
|
|
235
|
+
# Try valuetext first (human-readable display like "file.pdf")
|
|
236
|
+
if prop.name == 'valuetext' and prop.value:
|
|
237
|
+
value_str = str(prop.value).strip()
|
|
238
|
+
if value_str and value_str.lower() not in ['', 'no file chosen', 'no file selected']:
|
|
239
|
+
current_value = value_str
|
|
240
|
+
break
|
|
241
|
+
# Also try 'value' property (may include full path)
|
|
242
|
+
elif prop.name == 'value' and prop.value:
|
|
243
|
+
value_str = str(prop.value).strip()
|
|
244
|
+
if value_str:
|
|
245
|
+
# For file inputs, value might be a full path - extract just filename
|
|
246
|
+
if '\\' in value_str:
|
|
247
|
+
current_value = value_str.split('\\')[-1]
|
|
248
|
+
elif '/' in value_str:
|
|
249
|
+
current_value = value_str.split('/')[-1]
|
|
250
|
+
else:
|
|
251
|
+
current_value = value_str
|
|
252
|
+
break
|
|
253
|
+
|
|
254
|
+
node._compound_children.extend(
|
|
255
|
+
[
|
|
256
|
+
{'role': 'button', 'name': 'Browse Files', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
257
|
+
{
|
|
258
|
+
'role': 'textbox',
|
|
259
|
+
'name': f'{"Files" if multiple else "File"} Selected',
|
|
260
|
+
'valuemin': None,
|
|
261
|
+
'valuemax': None,
|
|
262
|
+
'valuenow': current_value, # Always shows state: filename or "None"
|
|
263
|
+
},
|
|
264
|
+
]
|
|
265
|
+
)
|
|
266
|
+
simplified.is_compound_component = True
|
|
267
|
+
|
|
268
|
+
elif element_type == 'select':
|
|
269
|
+
# Select dropdown with option list and detailed option information
|
|
270
|
+
base_components = [
|
|
271
|
+
{'role': 'button', 'name': 'Dropdown Toggle', 'valuemin': None, 'valuemax': None, 'valuenow': None}
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
# Extract option information from child nodes
|
|
275
|
+
options_info = self._extract_select_options(node)
|
|
276
|
+
if options_info:
|
|
277
|
+
options_component = {
|
|
278
|
+
'role': 'listbox',
|
|
279
|
+
'name': 'Options',
|
|
280
|
+
'valuemin': None,
|
|
281
|
+
'valuemax': None,
|
|
282
|
+
'valuenow': None,
|
|
283
|
+
'options_count': options_info['count'],
|
|
284
|
+
'first_options': options_info['first_options'],
|
|
285
|
+
}
|
|
286
|
+
if options_info['format_hint']:
|
|
287
|
+
options_component['format_hint'] = options_info['format_hint']
|
|
288
|
+
base_components.append(options_component)
|
|
289
|
+
else:
|
|
290
|
+
base_components.append(
|
|
291
|
+
{'role': 'listbox', 'name': 'Options', 'valuemin': None, 'valuemax': None, 'valuenow': None}
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
node._compound_children.extend(base_components)
|
|
295
|
+
simplified.is_compound_component = True
|
|
296
|
+
|
|
297
|
+
elif element_type == 'details':
|
|
298
|
+
# Details/summary disclosure widget
|
|
299
|
+
node._compound_children.extend(
|
|
300
|
+
[
|
|
301
|
+
{'role': 'button', 'name': 'Toggle Disclosure', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
302
|
+
{'role': 'region', 'name': 'Content Area', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
303
|
+
]
|
|
304
|
+
)
|
|
305
|
+
simplified.is_compound_component = True
|
|
306
|
+
|
|
307
|
+
elif element_type == 'audio':
|
|
308
|
+
# Audio player controls
|
|
309
|
+
node._compound_children.extend(
|
|
310
|
+
[
|
|
311
|
+
{'role': 'button', 'name': 'Play/Pause', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
312
|
+
{'role': 'slider', 'name': 'Progress', 'valuemin': 0, 'valuemax': 100, 'valuenow': None},
|
|
313
|
+
{'role': 'button', 'name': 'Mute', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
314
|
+
{'role': 'slider', 'name': 'Volume', 'valuemin': 0, 'valuemax': 100, 'valuenow': None},
|
|
315
|
+
]
|
|
316
|
+
)
|
|
317
|
+
simplified.is_compound_component = True
|
|
318
|
+
|
|
319
|
+
elif element_type == 'video':
|
|
320
|
+
# Video player controls
|
|
321
|
+
node._compound_children.extend(
|
|
322
|
+
[
|
|
323
|
+
{'role': 'button', 'name': 'Play/Pause', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
324
|
+
{'role': 'slider', 'name': 'Progress', 'valuemin': 0, 'valuemax': 100, 'valuenow': None},
|
|
325
|
+
{'role': 'button', 'name': 'Mute', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
326
|
+
{'role': 'slider', 'name': 'Volume', 'valuemin': 0, 'valuemax': 100, 'valuenow': None},
|
|
327
|
+
{'role': 'button', 'name': 'Fullscreen', 'valuemin': None, 'valuemax': None, 'valuenow': None},
|
|
328
|
+
]
|
|
329
|
+
)
|
|
330
|
+
simplified.is_compound_component = True
|
|
331
|
+
|
|
332
|
+
def _extract_select_options(self, select_node: EnhancedDOMTreeNode) -> dict[str, Any] | None:
|
|
333
|
+
"""Extract option information from a select element."""
|
|
334
|
+
if not select_node.children:
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
options = []
|
|
338
|
+
option_values = []
|
|
339
|
+
|
|
340
|
+
def extract_options_recursive(node: EnhancedDOMTreeNode) -> None:
|
|
341
|
+
"""Recursively extract option elements, including from optgroups."""
|
|
342
|
+
if node.tag_name.lower() == 'option':
|
|
343
|
+
# Extract option text and value
|
|
344
|
+
option_text = ''
|
|
345
|
+
option_value = ''
|
|
346
|
+
|
|
347
|
+
# Get value attribute if present
|
|
348
|
+
if node.attributes and 'value' in node.attributes:
|
|
349
|
+
option_value = str(node.attributes['value']).strip()
|
|
350
|
+
|
|
351
|
+
# Get text content from direct child text nodes only to avoid duplication
|
|
352
|
+
def get_direct_text_content(n: EnhancedDOMTreeNode) -> str:
|
|
353
|
+
text = ''
|
|
354
|
+
for child in n.children:
|
|
355
|
+
if child.node_type == NodeType.TEXT_NODE and child.node_value:
|
|
356
|
+
text += child.node_value.strip() + ' '
|
|
357
|
+
return text.strip()
|
|
358
|
+
|
|
359
|
+
option_text = get_direct_text_content(node)
|
|
360
|
+
|
|
361
|
+
# Use text as value if no explicit value
|
|
362
|
+
if not option_value and option_text:
|
|
363
|
+
option_value = option_text
|
|
364
|
+
|
|
365
|
+
if option_text or option_value:
|
|
366
|
+
options.append({'text': option_text, 'value': option_value})
|
|
367
|
+
option_values.append(option_value)
|
|
368
|
+
|
|
369
|
+
elif node.tag_name.lower() == 'optgroup':
|
|
370
|
+
# Process optgroup children
|
|
371
|
+
for child in node.children:
|
|
372
|
+
extract_options_recursive(child)
|
|
373
|
+
else:
|
|
374
|
+
# Process other children that might contain options
|
|
375
|
+
for child in node.children:
|
|
376
|
+
extract_options_recursive(child)
|
|
377
|
+
|
|
378
|
+
# Extract all options from select children
|
|
379
|
+
for child in select_node.children:
|
|
380
|
+
extract_options_recursive(child)
|
|
381
|
+
|
|
382
|
+
if not options:
|
|
383
|
+
return None
|
|
384
|
+
|
|
385
|
+
# Prepare first 4 options for display
|
|
386
|
+
first_options = []
|
|
387
|
+
for option in options[:4]:
|
|
388
|
+
# Always use text if available, otherwise use value
|
|
389
|
+
display_text = option['text'] if option['text'] else option['value']
|
|
390
|
+
if display_text:
|
|
391
|
+
# Limit individual option text to avoid overly long attributes
|
|
392
|
+
text = display_text[:30] + ('...' if len(display_text) > 30 else '')
|
|
393
|
+
first_options.append(text)
|
|
394
|
+
|
|
395
|
+
# Add ellipsis indicator if there are more options than shown
|
|
396
|
+
if len(options) > 4:
|
|
397
|
+
first_options.append(f'... {len(options) - 4} more options...')
|
|
398
|
+
|
|
399
|
+
# Try to infer format hint from option values
|
|
400
|
+
format_hint = None
|
|
401
|
+
if len(option_values) >= 2:
|
|
402
|
+
# Check for common patterns
|
|
403
|
+
if all(val.isdigit() for val in option_values[:5] if val):
|
|
404
|
+
format_hint = 'numeric'
|
|
405
|
+
elif all(len(val) == 2 and val.isupper() for val in option_values[:5] if val):
|
|
406
|
+
format_hint = 'country/state codes'
|
|
407
|
+
elif all('/' in val or '-' in val for val in option_values[:5] if val):
|
|
408
|
+
format_hint = 'date/path format'
|
|
409
|
+
elif any('@' in val for val in option_values[:5] if val):
|
|
410
|
+
format_hint = 'email addresses'
|
|
411
|
+
|
|
412
|
+
return {'count': len(options), 'first_options': first_options, 'format_hint': format_hint, "all_options": options}
|
|
413
|
+
|
|
414
|
+
def _is_interactive_cached(self, node: EnhancedDOMTreeNode) -> bool:
|
|
415
|
+
"""Cached version of clickable element detection to avoid redundant calls."""
|
|
416
|
+
|
|
417
|
+
if node.node_id not in self._clickable_cache:
|
|
418
|
+
import time
|
|
419
|
+
|
|
420
|
+
start_time = time.time()
|
|
421
|
+
result = ClickableElementDetector.is_interactive(node)
|
|
422
|
+
end_time = time.time()
|
|
423
|
+
|
|
424
|
+
if 'clickable_detection_time' not in self.timing_info:
|
|
425
|
+
self.timing_info['clickable_detection_time'] = 0
|
|
426
|
+
self.timing_info['clickable_detection_time'] += end_time - start_time
|
|
427
|
+
|
|
428
|
+
self._clickable_cache[node.node_id] = result
|
|
429
|
+
|
|
430
|
+
return self._clickable_cache[node.node_id]
|
|
431
|
+
|
|
432
|
+
def _create_simplified_tree(self, node: EnhancedDOMTreeNode, depth: int = 0) -> SimplifiedNode | None:
|
|
433
|
+
"""Step 1: Create a simplified tree with enhanced element detection."""
|
|
434
|
+
|
|
435
|
+
if node.node_type == NodeType.DOCUMENT_NODE:
|
|
436
|
+
# for all cldren including shadow roots
|
|
437
|
+
for child in node.children_and_shadow_roots:
|
|
438
|
+
simplified_child = self._create_simplified_tree(child, depth + 1)
|
|
439
|
+
if simplified_child:
|
|
440
|
+
return simplified_child
|
|
441
|
+
|
|
442
|
+
return None
|
|
443
|
+
|
|
444
|
+
if node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
|
|
445
|
+
# ENHANCED shadow DOM processing - always include shadow content
|
|
446
|
+
simplified = SimplifiedNode(original_node=node, children=[])
|
|
447
|
+
for child in node.children_and_shadow_roots:
|
|
448
|
+
simplified_child = self._create_simplified_tree(child, depth + 1)
|
|
449
|
+
if simplified_child:
|
|
450
|
+
simplified.children.append(simplified_child)
|
|
451
|
+
|
|
452
|
+
# Always return shadow DOM fragments, even if children seem empty
|
|
453
|
+
# Shadow DOM often contains the actual interactive content in SPAs
|
|
454
|
+
return simplified if simplified.children else SimplifiedNode(original_node=node, children=[])
|
|
455
|
+
|
|
456
|
+
elif node.node_type == NodeType.ELEMENT_NODE:
|
|
457
|
+
# Skip non-content elements
|
|
458
|
+
if node.node_name.lower() in DISABLED_ELEMENTS:
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
# Skip SVG child elements entirely (path, rect, g, circle, etc.)
|
|
462
|
+
if node.node_name.lower() in SVG_ELEMENTS:
|
|
463
|
+
return None
|
|
464
|
+
|
|
465
|
+
if node.node_name == 'IFRAME' or node.node_name == 'FRAME':
|
|
466
|
+
if node.content_document:
|
|
467
|
+
simplified = SimplifiedNode(original_node=node, children=[])
|
|
468
|
+
for child in node.content_document.children_nodes or []:
|
|
469
|
+
simplified_child = self._create_simplified_tree(child, depth + 1)
|
|
470
|
+
if simplified_child is not None:
|
|
471
|
+
simplified.children.append(simplified_child)
|
|
472
|
+
return simplified
|
|
473
|
+
|
|
474
|
+
is_visible = node.is_visible
|
|
475
|
+
is_scrollable = node.is_actually_scrollable
|
|
476
|
+
has_shadow_content = bool(node.children_and_shadow_roots)
|
|
477
|
+
|
|
478
|
+
# ENHANCED SHADOW DOM DETECTION: Include shadow hosts even if not visible
|
|
479
|
+
is_shadow_host = any(child.node_type == NodeType.DOCUMENT_FRAGMENT_NODE for child in node.children_and_shadow_roots)
|
|
480
|
+
|
|
481
|
+
# Override visibility for elements with validation attributes
|
|
482
|
+
if not is_visible and node.attributes:
|
|
483
|
+
has_validation_attrs = any(attr.startswith(('aria-', 'pseudo')) for attr in node.attributes.keys())
|
|
484
|
+
if has_validation_attrs:
|
|
485
|
+
is_visible = True # Force visibility for validation elements
|
|
486
|
+
|
|
487
|
+
# EXCEPTION: File inputs are often hidden with opacity:0 but are still functional
|
|
488
|
+
# Bootstrap and other frameworks use this pattern with custom-styled file pickers
|
|
489
|
+
is_file_input = (
|
|
490
|
+
node.tag_name and node.tag_name.lower() == 'input' and node.attributes and node.attributes.get('type') == 'file'
|
|
491
|
+
)
|
|
492
|
+
if not is_visible and is_file_input:
|
|
493
|
+
is_visible = True # Force visibility for file inputs
|
|
494
|
+
|
|
495
|
+
# Include if visible, scrollable, has children, or is shadow host
|
|
496
|
+
if is_visible or is_scrollable or has_shadow_content or is_shadow_host:
|
|
497
|
+
simplified = SimplifiedNode(original_node=node, children=[], is_shadow_host=is_shadow_host)
|
|
498
|
+
|
|
499
|
+
# Process ALL children including shadow roots with enhanced logging
|
|
500
|
+
for child in node.children_and_shadow_roots:
|
|
501
|
+
simplified_child = self._create_simplified_tree(child, depth + 1)
|
|
502
|
+
if simplified_child:
|
|
503
|
+
simplified.children.append(simplified_child)
|
|
504
|
+
|
|
505
|
+
# COMPOUND CONTROL PROCESSING: Add virtual components for compound controls
|
|
506
|
+
self._add_compound_components(simplified, node)
|
|
507
|
+
|
|
508
|
+
# SHADOW DOM SPECIAL CASE: Always include shadow hosts even if not visible
|
|
509
|
+
# Many SPA frameworks (React, Vue) render content in shadow DOM
|
|
510
|
+
if is_shadow_host and simplified.children:
|
|
511
|
+
return simplified
|
|
512
|
+
|
|
513
|
+
# Return if meaningful or has meaningful children
|
|
514
|
+
if is_visible or is_scrollable or simplified.children:
|
|
515
|
+
return simplified
|
|
516
|
+
elif node.node_type == NodeType.TEXT_NODE:
|
|
517
|
+
# Include meaningful text nodes
|
|
518
|
+
is_visible = node.snapshot_node and node.is_visible
|
|
519
|
+
if is_visible and node.node_value and node.node_value.strip() and len(node.node_value.strip()) > 1:
|
|
520
|
+
return SimplifiedNode(original_node=node, children=[])
|
|
521
|
+
|
|
522
|
+
return None
|
|
523
|
+
|
|
524
|
+
def _optimize_tree(self, node: SimplifiedNode | None) -> SimplifiedNode | None:
|
|
525
|
+
"""Step 2: Optimize tree structure."""
|
|
526
|
+
if not node:
|
|
527
|
+
return None
|
|
528
|
+
|
|
529
|
+
# Process children
|
|
530
|
+
optimized_children = []
|
|
531
|
+
for child in node.children:
|
|
532
|
+
optimized_child = self._optimize_tree(child)
|
|
533
|
+
if optimized_child:
|
|
534
|
+
optimized_children.append(optimized_child)
|
|
535
|
+
|
|
536
|
+
node.children = optimized_children
|
|
537
|
+
|
|
538
|
+
# Keep meaningful nodes
|
|
539
|
+
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
|
|
540
|
+
|
|
541
|
+
# EXCEPTION: File inputs are often hidden with opacity:0 but are still functional
|
|
542
|
+
is_file_input = (
|
|
543
|
+
node.original_node.tag_name
|
|
544
|
+
and node.original_node.tag_name.lower() == 'input'
|
|
545
|
+
and node.original_node.attributes
|
|
546
|
+
and node.original_node.attributes.get('type') == 'file'
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
if (
|
|
550
|
+
is_visible # Keep all visible nodes
|
|
551
|
+
or node.original_node.is_actually_scrollable
|
|
552
|
+
or node.original_node.node_type == NodeType.TEXT_NODE
|
|
553
|
+
or node.children
|
|
554
|
+
or is_file_input # Keep file inputs even if not visible
|
|
555
|
+
):
|
|
556
|
+
return node
|
|
557
|
+
|
|
558
|
+
return None
|
|
559
|
+
|
|
560
|
+
def _collect_interactive_elements(self, node: SimplifiedNode, elements: list[SimplifiedNode]) -> None:
|
|
561
|
+
"""Recursively collect interactive elements that are also visible."""
|
|
562
|
+
is_interactive = self._is_interactive_cached(node.original_node)
|
|
563
|
+
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
|
|
564
|
+
|
|
565
|
+
# Only collect elements that are both interactive AND visible
|
|
566
|
+
if is_interactive and is_visible:
|
|
567
|
+
elements.append(node)
|
|
568
|
+
|
|
569
|
+
for child in node.children:
|
|
570
|
+
self._collect_interactive_elements(child, elements)
|
|
571
|
+
|
|
572
|
+
def _has_interactive_descendants(self, node: SimplifiedNode) -> bool:
|
|
573
|
+
"""Check if a node has any interactive descendants (not including the node itself)."""
|
|
574
|
+
# Check children for interactivity
|
|
575
|
+
for child in node.children:
|
|
576
|
+
# Check if child itself is interactive
|
|
577
|
+
if self._is_interactive_cached(child.original_node):
|
|
578
|
+
return True
|
|
579
|
+
# Recursively check child's descendants
|
|
580
|
+
if self._has_interactive_descendants(child):
|
|
581
|
+
return True
|
|
582
|
+
|
|
583
|
+
return False
|
|
584
|
+
|
|
585
|
+
def _assign_interactive_indices_and_mark_new_nodes(self, node: SimplifiedNode | None) -> None:
|
|
586
|
+
"""Assign interactive indices to clickable elements that are also visible."""
|
|
587
|
+
if not node:
|
|
588
|
+
return
|
|
589
|
+
|
|
590
|
+
# Skip assigning index to excluded nodes, or ignored by paint order
|
|
591
|
+
if not node.excluded_by_parent and not node.ignored_by_paint_order:
|
|
592
|
+
# Regular interactive element assignment (including enhanced compound controls)
|
|
593
|
+
is_interactive_assign = self._is_interactive_cached(node.original_node)
|
|
594
|
+
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
|
|
595
|
+
is_scrollable = node.original_node.is_actually_scrollable
|
|
596
|
+
|
|
597
|
+
# EXCEPTION: File inputs are often hidden with opacity:0 but are still functional
|
|
598
|
+
# Bootstrap and other frameworks use this pattern with custom-styled file pickers
|
|
599
|
+
is_file_input = (
|
|
600
|
+
node.original_node.tag_name
|
|
601
|
+
and node.original_node.tag_name.lower() == 'input'
|
|
602
|
+
and node.original_node.attributes
|
|
603
|
+
and node.original_node.attributes.get('type') == 'file'
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
# Check if scrollable container should be made interactive
|
|
607
|
+
# For scrollable elements, ONLY make them interactive if they have no interactive descendants
|
|
608
|
+
should_make_interactive = False
|
|
609
|
+
if is_scrollable:
|
|
610
|
+
# For scrollable elements, check if they have interactive children
|
|
611
|
+
has_interactive_desc = self._has_interactive_descendants(node)
|
|
612
|
+
|
|
613
|
+
# Only make scrollable container interactive if it has NO interactive descendants
|
|
614
|
+
if not has_interactive_desc:
|
|
615
|
+
should_make_interactive = True
|
|
616
|
+
elif is_interactive_assign and (is_visible or is_file_input):
|
|
617
|
+
# Non-scrollable interactive elements: make interactive if visible (or file input)
|
|
618
|
+
should_make_interactive = True
|
|
619
|
+
|
|
620
|
+
# Add to selector map if element should be interactive
|
|
621
|
+
if should_make_interactive:
|
|
622
|
+
# Mark node as interactive
|
|
623
|
+
node.is_interactive = True
|
|
624
|
+
# Store backend_node_id in selector map (model outputs backend_node_id)
|
|
625
|
+
self._selector_map[node.original_node.backend_node_id] = node.original_node
|
|
626
|
+
self._interactive_counter += 1
|
|
627
|
+
|
|
628
|
+
# Mark compound components as new for visibility
|
|
629
|
+
if node.is_compound_component:
|
|
630
|
+
node.is_new = True
|
|
631
|
+
elif self._previous_cached_selector_map:
|
|
632
|
+
# Check if node is new for regular elements
|
|
633
|
+
previous_backend_node_ids = {node.backend_node_id for node in self._previous_cached_selector_map.values()}
|
|
634
|
+
if node.original_node.backend_node_id not in previous_backend_node_ids:
|
|
635
|
+
node.is_new = True
|
|
636
|
+
|
|
637
|
+
# Process children
|
|
638
|
+
for child in node.children:
|
|
639
|
+
self._assign_interactive_indices_and_mark_new_nodes(child)
|
|
640
|
+
|
|
641
|
+
def _apply_bounding_box_filtering(self, node: SimplifiedNode | None) -> SimplifiedNode | None:
|
|
642
|
+
"""Filter children contained within propagating parent bounds."""
|
|
643
|
+
if not node:
|
|
644
|
+
return None
|
|
645
|
+
|
|
646
|
+
# Start with no active bounds
|
|
647
|
+
self._filter_tree_recursive(node, active_bounds=None, depth=0)
|
|
648
|
+
|
|
649
|
+
# Log statistics
|
|
650
|
+
excluded_count = self._count_excluded_nodes(node)
|
|
651
|
+
if excluded_count > 0:
|
|
652
|
+
import logging
|
|
653
|
+
|
|
654
|
+
logging.debug(f'BBox filtering excluded {excluded_count} nodes')
|
|
655
|
+
|
|
656
|
+
return node
|
|
657
|
+
|
|
658
|
+
def _filter_tree_recursive(self, node: SimplifiedNode, active_bounds: PropagatingBounds | None = None, depth: int = 0):
|
|
659
|
+
"""
|
|
660
|
+
Recursively filter tree with bounding box propagation.
|
|
661
|
+
Bounds propagate to ALL descendants until overridden.
|
|
662
|
+
"""
|
|
663
|
+
|
|
664
|
+
# Check if this node should be excluded by active bounds
|
|
665
|
+
if active_bounds and self._should_exclude_child(node, active_bounds):
|
|
666
|
+
node.excluded_by_parent = True
|
|
667
|
+
# Important: Still check if this node starts NEW propagation
|
|
668
|
+
|
|
669
|
+
# Check if this node starts new propagation (even if excluded!)
|
|
670
|
+
new_bounds = None
|
|
671
|
+
tag = node.original_node.tag_name.lower()
|
|
672
|
+
role = node.original_node.attributes.get('role') if node.original_node.attributes else None
|
|
673
|
+
attributes = {
|
|
674
|
+
'tag': tag,
|
|
675
|
+
'role': role,
|
|
676
|
+
}
|
|
677
|
+
# Check if this element matches any propagating element pattern
|
|
678
|
+
if self._is_propagating_element(attributes):
|
|
679
|
+
# This node propagates bounds to ALL its descendants
|
|
680
|
+
if node.original_node.snapshot_node and node.original_node.snapshot_node.bounds:
|
|
681
|
+
new_bounds = PropagatingBounds(
|
|
682
|
+
tag=tag,
|
|
683
|
+
bounds=node.original_node.snapshot_node.bounds,
|
|
684
|
+
node_id=node.original_node.node_id,
|
|
685
|
+
depth=depth,
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
# Propagate to ALL children
|
|
689
|
+
# Use new_bounds if this node starts propagation, otherwise continue with active_bounds
|
|
690
|
+
propagate_bounds = new_bounds if new_bounds else active_bounds
|
|
691
|
+
|
|
692
|
+
for child in node.children:
|
|
693
|
+
self._filter_tree_recursive(child, propagate_bounds, depth + 1)
|
|
694
|
+
|
|
695
|
+
def _should_exclude_child(self, node: SimplifiedNode, active_bounds: PropagatingBounds) -> bool:
|
|
696
|
+
"""
|
|
697
|
+
Determine if child should be excluded based on propagating bounds.
|
|
698
|
+
"""
|
|
699
|
+
|
|
700
|
+
# Never exclude text nodes - we always want to preserve text content
|
|
701
|
+
if node.original_node.node_type == NodeType.TEXT_NODE:
|
|
702
|
+
return False
|
|
703
|
+
|
|
704
|
+
# Get child bounds
|
|
705
|
+
if not node.original_node.snapshot_node or not node.original_node.snapshot_node.bounds:
|
|
706
|
+
return False # No bounds = can't determine containment
|
|
707
|
+
|
|
708
|
+
child_bounds = node.original_node.snapshot_node.bounds
|
|
709
|
+
|
|
710
|
+
# Check containment with configured threshold
|
|
711
|
+
if not self._is_contained(child_bounds, active_bounds.bounds, self.containment_threshold):
|
|
712
|
+
return False # Not sufficiently contained
|
|
713
|
+
|
|
714
|
+
# EXCEPTION RULES - Keep these even if contained:
|
|
715
|
+
|
|
716
|
+
child_tag = node.original_node.tag_name.lower()
|
|
717
|
+
child_role = node.original_node.attributes.get('role') if node.original_node.attributes else None
|
|
718
|
+
child_attributes = {
|
|
719
|
+
'tag': child_tag,
|
|
720
|
+
'role': child_role,
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
# 1. Never exclude form elements (they need individual interaction)
|
|
724
|
+
if child_tag in ['input', 'select', 'textarea', 'label']:
|
|
725
|
+
return False
|
|
726
|
+
|
|
727
|
+
# 2. Keep if child is also a propagating element
|
|
728
|
+
# (might have stopPropagation, e.g., button in button)
|
|
729
|
+
if self._is_propagating_element(child_attributes):
|
|
730
|
+
return False
|
|
731
|
+
|
|
732
|
+
# 3. Keep if has explicit onclick handler
|
|
733
|
+
if node.original_node.attributes and 'onclick' in node.original_node.attributes:
|
|
734
|
+
return False
|
|
735
|
+
|
|
736
|
+
# 4. Keep if has aria-label suggesting it's independently interactive
|
|
737
|
+
if node.original_node.attributes:
|
|
738
|
+
aria_label = node.original_node.attributes.get('aria-label')
|
|
739
|
+
if aria_label and aria_label.strip():
|
|
740
|
+
# Has meaningful aria-label, likely interactive
|
|
741
|
+
return False
|
|
742
|
+
|
|
743
|
+
# 5. Keep if has role suggesting interactivity
|
|
744
|
+
if node.original_node.attributes:
|
|
745
|
+
role = node.original_node.attributes.get('role')
|
|
746
|
+
if role in ['button', 'link', 'checkbox', 'radio', 'tab', 'menuitem', 'option']:
|
|
747
|
+
return False
|
|
748
|
+
|
|
749
|
+
# Default: exclude this child
|
|
750
|
+
return True
|
|
751
|
+
|
|
752
|
+
def _is_contained(self, child: DOMRect, parent: DOMRect, threshold: float) -> bool:
|
|
753
|
+
"""
|
|
754
|
+
Check if child is contained within parent bounds.
|
|
755
|
+
|
|
756
|
+
Args:
|
|
757
|
+
threshold: Percentage (0.0-1.0) of child that must be within parent
|
|
758
|
+
"""
|
|
759
|
+
# Calculate intersection
|
|
760
|
+
x_overlap = max(0, min(child.x + child.width, parent.x + parent.width) - max(child.x, parent.x))
|
|
761
|
+
y_overlap = max(0, min(child.y + child.height, parent.y + parent.height) - max(child.y, parent.y))
|
|
762
|
+
|
|
763
|
+
intersection_area = x_overlap * y_overlap
|
|
764
|
+
child_area = child.width * child.height
|
|
765
|
+
|
|
766
|
+
if child_area == 0:
|
|
767
|
+
return False # Zero-area element
|
|
768
|
+
|
|
769
|
+
containment_ratio = intersection_area / child_area
|
|
770
|
+
return containment_ratio >= threshold
|
|
771
|
+
|
|
772
|
+
def _count_excluded_nodes(self, node: SimplifiedNode, count: int = 0) -> int:
|
|
773
|
+
"""Count how many nodes were excluded (for debugging)."""
|
|
774
|
+
if hasattr(node, 'excluded_by_parent') and node.excluded_by_parent:
|
|
775
|
+
count += 1
|
|
776
|
+
for child in node.children:
|
|
777
|
+
count = self._count_excluded_nodes(child, count)
|
|
778
|
+
return count
|
|
779
|
+
|
|
780
|
+
def _is_propagating_element(self, attributes: dict[str, str | None]) -> bool:
|
|
781
|
+
"""
|
|
782
|
+
Check if an element should propagate bounds based on attributes.
|
|
783
|
+
If the element satisfies one of the patterns, it propagates bounds to all its children.
|
|
784
|
+
"""
|
|
785
|
+
keys_to_check = ['tag', 'role']
|
|
786
|
+
for pattern in self.PROPAGATING_ELEMENTS:
|
|
787
|
+
# Check if the element satisfies the pattern
|
|
788
|
+
check = [pattern.get(key) is None or pattern.get(key) == attributes.get(key) for key in keys_to_check]
|
|
789
|
+
if all(check):
|
|
790
|
+
return True
|
|
791
|
+
|
|
792
|
+
return False
|
|
793
|
+
|
|
794
|
+
@staticmethod
|
|
795
|
+
def serialize_tree(node: SimplifiedNode | None, include_attributes: list[str], depth: int = 0) -> str:
|
|
796
|
+
"""Serialize the optimized tree to string format."""
|
|
797
|
+
if not node:
|
|
798
|
+
return ''
|
|
799
|
+
|
|
800
|
+
# Skip rendering excluded nodes, but process their children
|
|
801
|
+
if hasattr(node, 'excluded_by_parent') and node.excluded_by_parent:
|
|
802
|
+
formatted_text = []
|
|
803
|
+
for child in node.children:
|
|
804
|
+
child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, depth)
|
|
805
|
+
if child_text:
|
|
806
|
+
formatted_text.append(child_text)
|
|
807
|
+
return '\n'.join(formatted_text)
|
|
808
|
+
|
|
809
|
+
formatted_text = []
|
|
810
|
+
depth_str = depth * '\t'
|
|
811
|
+
next_depth = depth
|
|
812
|
+
|
|
813
|
+
if node.original_node.node_type == NodeType.ELEMENT_NODE:
|
|
814
|
+
# Skip displaying nodes marked as should_display=False
|
|
815
|
+
if not node.should_display:
|
|
816
|
+
for child in node.children:
|
|
817
|
+
child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, depth)
|
|
818
|
+
if child_text:
|
|
819
|
+
formatted_text.append(child_text)
|
|
820
|
+
return '\n'.join(formatted_text)
|
|
821
|
+
|
|
822
|
+
# Special handling for SVG elements - show the tag but collapse children
|
|
823
|
+
if node.original_node.tag_name.lower() == 'svg':
|
|
824
|
+
shadow_prefix = ''
|
|
825
|
+
if node.is_shadow_host:
|
|
826
|
+
has_closed_shadow = any(
|
|
827
|
+
child.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE
|
|
828
|
+
and child.original_node.shadow_root_type
|
|
829
|
+
and child.original_node.shadow_root_type.lower() == 'closed'
|
|
830
|
+
for child in node.children
|
|
831
|
+
)
|
|
832
|
+
shadow_prefix = '|SHADOW(closed)|' if has_closed_shadow else '|SHADOW(open)|'
|
|
833
|
+
|
|
834
|
+
line = f'{depth_str}{shadow_prefix}'
|
|
835
|
+
# Add interactive marker if clickable
|
|
836
|
+
if node.is_interactive:
|
|
837
|
+
new_prefix = '*' if node.is_new else ''
|
|
838
|
+
line += f'{new_prefix}[{node.original_node.backend_node_id}]'
|
|
839
|
+
line += '<svg'
|
|
840
|
+
attributes_html_str = DOMTreeSerializer._build_attributes_string(node.original_node, include_attributes, '')
|
|
841
|
+
if attributes_html_str:
|
|
842
|
+
line += f' {attributes_html_str}'
|
|
843
|
+
line += ' /> <!-- SVG content collapsed -->'
|
|
844
|
+
formatted_text.append(line)
|
|
845
|
+
# Don't process children for SVG
|
|
846
|
+
return '\n'.join(formatted_text)
|
|
847
|
+
|
|
848
|
+
# Add element if clickable, scrollable, or iframe
|
|
849
|
+
is_any_scrollable = node.original_node.is_actually_scrollable or node.original_node.is_scrollable
|
|
850
|
+
should_show_scroll = node.original_node.should_show_scroll_info
|
|
851
|
+
if (
|
|
852
|
+
node.is_interactive
|
|
853
|
+
or is_any_scrollable
|
|
854
|
+
or node.original_node.tag_name.upper() == 'IFRAME'
|
|
855
|
+
or node.original_node.tag_name.upper() == 'FRAME'
|
|
856
|
+
):
|
|
857
|
+
next_depth += 1
|
|
858
|
+
|
|
859
|
+
# Build attributes string with compound component info
|
|
860
|
+
text_content = ''
|
|
861
|
+
attributes_html_str = DOMTreeSerializer._build_attributes_string(
|
|
862
|
+
node.original_node, include_attributes, text_content
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
# Add compound component information to attributes if present
|
|
866
|
+
if node.original_node._compound_children:
|
|
867
|
+
compound_info = []
|
|
868
|
+
for child_info in node.original_node._compound_children:
|
|
869
|
+
parts = []
|
|
870
|
+
if child_info['name']:
|
|
871
|
+
parts.append(f'name={child_info["name"]}')
|
|
872
|
+
if child_info['role']:
|
|
873
|
+
parts.append(f'role={child_info["role"]}')
|
|
874
|
+
if child_info['valuemin'] is not None:
|
|
875
|
+
parts.append(f'min={child_info["valuemin"]}')
|
|
876
|
+
if child_info['valuemax'] is not None:
|
|
877
|
+
parts.append(f'max={child_info["valuemax"]}')
|
|
878
|
+
if child_info['valuenow'] is not None:
|
|
879
|
+
parts.append(f'current={child_info["valuenow"]}')
|
|
880
|
+
|
|
881
|
+
# Add select-specific information
|
|
882
|
+
if 'options_count' in child_info and child_info['options_count'] is not None:
|
|
883
|
+
parts.append(f'count={child_info["options_count"]}')
|
|
884
|
+
if 'first_options' in child_info and child_info['first_options']:
|
|
885
|
+
options_str = '|'.join(child_info['first_options'][:4]) # Limit to 4 options
|
|
886
|
+
parts.append(f'options={options_str}')
|
|
887
|
+
if 'format_hint' in child_info and child_info['format_hint']:
|
|
888
|
+
parts.append(f'format={child_info["format_hint"]}')
|
|
889
|
+
|
|
890
|
+
if parts:
|
|
891
|
+
compound_info.append(f'({",".join(parts)})')
|
|
892
|
+
|
|
893
|
+
if compound_info:
|
|
894
|
+
compound_attr = f'compound_components={",".join(compound_info)}'
|
|
895
|
+
if attributes_html_str:
|
|
896
|
+
attributes_html_str += f' {compound_attr}'
|
|
897
|
+
else:
|
|
898
|
+
attributes_html_str = compound_attr
|
|
899
|
+
|
|
900
|
+
# Build the line with shadow host indicator
|
|
901
|
+
shadow_prefix = ''
|
|
902
|
+
if node.is_shadow_host:
|
|
903
|
+
# Check if any shadow children are closed
|
|
904
|
+
has_closed_shadow = any(
|
|
905
|
+
child.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE
|
|
906
|
+
and child.original_node.shadow_root_type
|
|
907
|
+
and child.original_node.shadow_root_type.lower() == 'closed'
|
|
908
|
+
for child in node.children
|
|
909
|
+
)
|
|
910
|
+
shadow_prefix = '|SHADOW(closed)|' if has_closed_shadow else '|SHADOW(open)|'
|
|
911
|
+
|
|
912
|
+
if should_show_scroll and not node.is_interactive:
|
|
913
|
+
# Scrollable container but not clickable
|
|
914
|
+
line = f'{depth_str}{shadow_prefix}|SCROLL|<{node.original_node.tag_name}'
|
|
915
|
+
elif node.is_interactive:
|
|
916
|
+
# Clickable (and possibly scrollable) - show backend_node_id
|
|
917
|
+
new_prefix = '*' if node.is_new else ''
|
|
918
|
+
scroll_prefix = '|SCROLL[' if should_show_scroll else '['
|
|
919
|
+
line = f'{depth_str}{shadow_prefix}{new_prefix}{scroll_prefix}{node.original_node.backend_node_id}]<{node.original_node.tag_name}'
|
|
920
|
+
elif node.original_node.tag_name.upper() == 'IFRAME':
|
|
921
|
+
# Iframe element (not interactive)
|
|
922
|
+
line = f'{depth_str}{shadow_prefix}|IFRAME|<{node.original_node.tag_name}'
|
|
923
|
+
elif node.original_node.tag_name.upper() == 'FRAME':
|
|
924
|
+
# Frame element (not interactive)
|
|
925
|
+
line = f'{depth_str}{shadow_prefix}|FRAME|<{node.original_node.tag_name}'
|
|
926
|
+
else:
|
|
927
|
+
line = f'{depth_str}{shadow_prefix}<{node.original_node.tag_name}'
|
|
928
|
+
|
|
929
|
+
if attributes_html_str:
|
|
930
|
+
line += f' {attributes_html_str}'
|
|
931
|
+
|
|
932
|
+
line += ' />'
|
|
933
|
+
|
|
934
|
+
# Add scroll information only when we should show it
|
|
935
|
+
if should_show_scroll:
|
|
936
|
+
scroll_info_text = node.original_node.get_scroll_info_text()
|
|
937
|
+
if scroll_info_text:
|
|
938
|
+
line += f' ({scroll_info_text})'
|
|
939
|
+
|
|
940
|
+
formatted_text.append(line)
|
|
941
|
+
|
|
942
|
+
elif node.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
|
|
943
|
+
# Shadow DOM representation - show clearly to LLM
|
|
944
|
+
if node.original_node.shadow_root_type and node.original_node.shadow_root_type.lower() == 'closed':
|
|
945
|
+
formatted_text.append(f'{depth_str}Closed Shadow')
|
|
946
|
+
else:
|
|
947
|
+
formatted_text.append(f'{depth_str}Open Shadow')
|
|
948
|
+
|
|
949
|
+
next_depth += 1
|
|
950
|
+
|
|
951
|
+
# Process shadow DOM children
|
|
952
|
+
for child in node.children:
|
|
953
|
+
child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth)
|
|
954
|
+
if child_text:
|
|
955
|
+
formatted_text.append(child_text)
|
|
956
|
+
|
|
957
|
+
# Close shadow DOM indicator
|
|
958
|
+
if node.children: # Only show close if we had content
|
|
959
|
+
formatted_text.append(f'{depth_str}Shadow End')
|
|
960
|
+
|
|
961
|
+
elif node.original_node.node_type == NodeType.TEXT_NODE:
|
|
962
|
+
# Include visible text
|
|
963
|
+
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
|
|
964
|
+
if (
|
|
965
|
+
is_visible
|
|
966
|
+
and node.original_node.node_value
|
|
967
|
+
and node.original_node.node_value.strip()
|
|
968
|
+
and len(node.original_node.node_value.strip()) > 1
|
|
969
|
+
):
|
|
970
|
+
clean_text = node.original_node.node_value.strip()
|
|
971
|
+
formatted_text.append(f'{depth_str}{clean_text}')
|
|
972
|
+
|
|
973
|
+
# Process children (for non-shadow elements)
|
|
974
|
+
if node.original_node.node_type != NodeType.DOCUMENT_FRAGMENT_NODE:
|
|
975
|
+
for child in node.children:
|
|
976
|
+
child_text = DOMTreeSerializer.serialize_tree(child, include_attributes, next_depth)
|
|
977
|
+
if child_text:
|
|
978
|
+
formatted_text.append(child_text)
|
|
979
|
+
|
|
980
|
+
return '\n'.join(formatted_text)
|
|
981
|
+
|
|
982
|
+
@staticmethod
|
|
983
|
+
def _build_attributes_string(node: EnhancedDOMTreeNode, include_attributes: list[str], text: str) -> str:
|
|
984
|
+
"""Build the attributes string for an element."""
|
|
985
|
+
attributes_to_include = {}
|
|
986
|
+
|
|
987
|
+
# Include HTML attributes
|
|
988
|
+
if node.attributes:
|
|
989
|
+
attributes_to_include.update(
|
|
990
|
+
{
|
|
991
|
+
key: str(value).strip()
|
|
992
|
+
for key, value in node.attributes.items()
|
|
993
|
+
if key in include_attributes and str(value).strip() != ''
|
|
994
|
+
}
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
# Add format hints for date/time inputs to help LLMs use the correct format
|
|
998
|
+
# NOTE: These formats are standardized by HTML5 specification (ISO 8601), NOT locale-dependent
|
|
999
|
+
# The browser may DISPLAY dates in locale format (MM/DD/YYYY in US, DD/MM/YYYY in EU),
|
|
1000
|
+
# but the .value attribute and programmatic setting ALWAYS uses these ISO formats:
|
|
1001
|
+
# - date: YYYY-MM-DD (e.g., "2024-03-15")
|
|
1002
|
+
# - time: HH:MM or HH:MM:SS (24-hour, e.g., "14:30")
|
|
1003
|
+
# - datetime-local: YYYY-MM-DDTHH:MM (e.g., "2024-03-15T14:30")
|
|
1004
|
+
# Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input/date
|
|
1005
|
+
if node.tag_name and node.tag_name.lower() == 'input' and node.attributes:
|
|
1006
|
+
input_type = node.attributes.get('type', '').lower()
|
|
1007
|
+
|
|
1008
|
+
# For HTML5 date/time inputs, add a highly visible "format" attribute
|
|
1009
|
+
# This makes it IMPOSSIBLE for the model to miss the required format
|
|
1010
|
+
if input_type in ['date', 'time', 'datetime-local', 'month', 'week']:
|
|
1011
|
+
format_map = {
|
|
1012
|
+
'date': 'YYYY-MM-DD',
|
|
1013
|
+
'time': 'HH:MM',
|
|
1014
|
+
'datetime-local': 'YYYY-MM-DDTHH:MM',
|
|
1015
|
+
'month': 'YYYY-MM',
|
|
1016
|
+
'week': 'YYYY-W##',
|
|
1017
|
+
}
|
|
1018
|
+
# Add format as a special attribute that appears prominently
|
|
1019
|
+
# This appears BEFORE placeholder in the serialized output
|
|
1020
|
+
attributes_to_include['format'] = format_map[input_type]
|
|
1021
|
+
|
|
1022
|
+
# Only add placeholder if it doesn't already exist
|
|
1023
|
+
if 'placeholder' in include_attributes and 'placeholder' not in attributes_to_include:
|
|
1024
|
+
# Native HTML5 date/time inputs - ISO format required
|
|
1025
|
+
if input_type == 'date':
|
|
1026
|
+
attributes_to_include['placeholder'] = 'YYYY-MM-DD'
|
|
1027
|
+
elif input_type == 'time':
|
|
1028
|
+
attributes_to_include['placeholder'] = 'HH:MM'
|
|
1029
|
+
elif input_type == 'datetime-local':
|
|
1030
|
+
attributes_to_include['placeholder'] = 'YYYY-MM-DDTHH:MM'
|
|
1031
|
+
elif input_type == 'month':
|
|
1032
|
+
attributes_to_include['placeholder'] = 'YYYY-MM'
|
|
1033
|
+
elif input_type == 'week':
|
|
1034
|
+
attributes_to_include['placeholder'] = 'YYYY-W##'
|
|
1035
|
+
# Tel - suggest format if no pattern attribute
|
|
1036
|
+
elif input_type == 'tel' and 'pattern' not in attributes_to_include:
|
|
1037
|
+
attributes_to_include['placeholder'] = '123-456-7890'
|
|
1038
|
+
# jQuery/Bootstrap/AngularJS datepickers (text inputs with datepicker classes/attributes)
|
|
1039
|
+
elif input_type in {'text', ''}:
|
|
1040
|
+
class_attr = node.attributes.get('class', '').lower()
|
|
1041
|
+
|
|
1042
|
+
# Check for AngularJS UI Bootstrap datepicker (uib-datepicker-popup attribute)
|
|
1043
|
+
# This takes precedence as it's the most specific indicator
|
|
1044
|
+
if 'uib-datepicker-popup' in node.attributes:
|
|
1045
|
+
# Extract format from uib-datepicker-popup="MM/dd/yyyy"
|
|
1046
|
+
date_format = node.attributes.get('uib-datepicker-popup', '')
|
|
1047
|
+
if date_format:
|
|
1048
|
+
# Use 'expected_format' for clarity - this is the required input format
|
|
1049
|
+
attributes_to_include['expected_format'] = date_format
|
|
1050
|
+
# Also keep format for consistency with HTML5 date inputs
|
|
1051
|
+
attributes_to_include['format'] = date_format
|
|
1052
|
+
# Detect jQuery/Bootstrap datepickers by class names
|
|
1053
|
+
elif any(indicator in class_attr for indicator in ['datepicker', 'datetimepicker', 'daterangepicker']):
|
|
1054
|
+
# Try to get format from data-date-format attribute
|
|
1055
|
+
date_format = node.attributes.get('data-date-format', '')
|
|
1056
|
+
if date_format:
|
|
1057
|
+
attributes_to_include['placeholder'] = date_format
|
|
1058
|
+
attributes_to_include['format'] = date_format # Also add format for jQuery datepickers
|
|
1059
|
+
else:
|
|
1060
|
+
# Default to common US format for jQuery datepickers
|
|
1061
|
+
attributes_to_include['placeholder'] = 'mm/dd/yyyy'
|
|
1062
|
+
attributes_to_include['format'] = 'mm/dd/yyyy'
|
|
1063
|
+
# Also detect by data-* attributes
|
|
1064
|
+
elif any(attr in node.attributes for attr in ['data-datepicker']):
|
|
1065
|
+
date_format = node.attributes.get('data-date-format', '')
|
|
1066
|
+
if date_format:
|
|
1067
|
+
attributes_to_include['placeholder'] = date_format
|
|
1068
|
+
attributes_to_include['format'] = date_format
|
|
1069
|
+
else:
|
|
1070
|
+
attributes_to_include['placeholder'] = 'mm/dd/yyyy'
|
|
1071
|
+
attributes_to_include['format'] = 'mm/dd/yyyy'
|
|
1072
|
+
|
|
1073
|
+
# Include accessibility properties
|
|
1074
|
+
if node.ax_node and node.ax_node.properties:
|
|
1075
|
+
for prop in node.ax_node.properties:
|
|
1076
|
+
try:
|
|
1077
|
+
if prop.name in include_attributes and prop.value is not None:
|
|
1078
|
+
# Convert boolean to lowercase string, keep others as-is
|
|
1079
|
+
if isinstance(prop.value, bool):
|
|
1080
|
+
attributes_to_include[prop.name] = str(prop.value).lower()
|
|
1081
|
+
else:
|
|
1082
|
+
prop_value_str = str(prop.value).strip()
|
|
1083
|
+
if prop_value_str:
|
|
1084
|
+
attributes_to_include[prop.name] = prop_value_str
|
|
1085
|
+
except (AttributeError, ValueError):
|
|
1086
|
+
continue
|
|
1087
|
+
|
|
1088
|
+
# Special handling for form elements - ensure current value is shown
|
|
1089
|
+
# For text inputs, textareas, and selects, prioritize showing the current value from AX tree
|
|
1090
|
+
if node.tag_name and node.tag_name.lower() in ['input', 'textarea', 'select']:
|
|
1091
|
+
# ALWAYS check AX tree - it reflects actual typed value, DOM attribute may not update
|
|
1092
|
+
if node.ax_node and node.ax_node.properties:
|
|
1093
|
+
for prop in node.ax_node.properties:
|
|
1094
|
+
# Try valuetext first (human-readable display value)
|
|
1095
|
+
if prop.name == 'valuetext' and prop.value:
|
|
1096
|
+
value_str = str(prop.value).strip()
|
|
1097
|
+
if value_str:
|
|
1098
|
+
attributes_to_include['value'] = value_str
|
|
1099
|
+
break
|
|
1100
|
+
# Also try 'value' property directly
|
|
1101
|
+
elif prop.name == 'value' and prop.value:
|
|
1102
|
+
value_str = str(prop.value).strip()
|
|
1103
|
+
if value_str:
|
|
1104
|
+
attributes_to_include['value'] = value_str
|
|
1105
|
+
break
|
|
1106
|
+
|
|
1107
|
+
if not attributes_to_include:
|
|
1108
|
+
return ''
|
|
1109
|
+
|
|
1110
|
+
# Remove duplicate values
|
|
1111
|
+
ordered_keys = [key for key in include_attributes if key in attributes_to_include]
|
|
1112
|
+
|
|
1113
|
+
if len(ordered_keys) > 1:
|
|
1114
|
+
keys_to_remove = set()
|
|
1115
|
+
seen_values = {}
|
|
1116
|
+
|
|
1117
|
+
# Attributes that should never be removed as duplicates (they serve distinct purposes)
|
|
1118
|
+
protected_attrs = {'format', 'expected_format', 'placeholder', 'value', 'aria-label', 'title'}
|
|
1119
|
+
|
|
1120
|
+
for key in ordered_keys:
|
|
1121
|
+
value = attributes_to_include[key]
|
|
1122
|
+
if len(value) > 5:
|
|
1123
|
+
if value in seen_values and key not in protected_attrs:
|
|
1124
|
+
keys_to_remove.add(key)
|
|
1125
|
+
else:
|
|
1126
|
+
seen_values[value] = key
|
|
1127
|
+
|
|
1128
|
+
for key in keys_to_remove:
|
|
1129
|
+
del attributes_to_include[key]
|
|
1130
|
+
|
|
1131
|
+
# Remove attributes that duplicate accessibility data
|
|
1132
|
+
role = node.ax_node.role if node.ax_node else None
|
|
1133
|
+
if role and node.node_name == role:
|
|
1134
|
+
attributes_to_include.pop('role', None)
|
|
1135
|
+
|
|
1136
|
+
# Remove type attribute if it matches the tag name (e.g. <button type="button">)
|
|
1137
|
+
if 'type' in attributes_to_include and attributes_to_include['type'].lower() == node.node_name.lower():
|
|
1138
|
+
del attributes_to_include['type']
|
|
1139
|
+
|
|
1140
|
+
# Remove invalid attribute if it's false (only show when true)
|
|
1141
|
+
if 'invalid' in attributes_to_include and attributes_to_include['invalid'].lower() == 'false':
|
|
1142
|
+
del attributes_to_include['invalid']
|
|
1143
|
+
|
|
1144
|
+
boolean_attrs = {'required'}
|
|
1145
|
+
for attr in boolean_attrs:
|
|
1146
|
+
if attr in attributes_to_include and attributes_to_include[attr].lower() in {'false', '0', 'no'}:
|
|
1147
|
+
del attributes_to_include[attr]
|
|
1148
|
+
|
|
1149
|
+
# Remove aria-expanded if we have expanded (prefer AX tree over HTML attribute)
|
|
1150
|
+
if 'expanded' in attributes_to_include and 'aria-expanded' in attributes_to_include:
|
|
1151
|
+
del attributes_to_include['aria-expanded']
|
|
1152
|
+
|
|
1153
|
+
attrs_to_remove_if_text_matches = ['aria-label', 'placeholder', 'title']
|
|
1154
|
+
for attr in attrs_to_remove_if_text_matches:
|
|
1155
|
+
if attributes_to_include.get(attr) and attributes_to_include.get(attr, '').strip().lower() == text.strip().lower():
|
|
1156
|
+
del attributes_to_include[attr]
|
|
1157
|
+
|
|
1158
|
+
if attributes_to_include:
|
|
1159
|
+
# Format attributes, wrapping empty values in quotes for clarity
|
|
1160
|
+
formatted_attrs = []
|
|
1161
|
+
for key, value in attributes_to_include.items():
|
|
1162
|
+
capped_value = cap_text_length(value, 100)
|
|
1163
|
+
# Show empty values as key='' instead of key=
|
|
1164
|
+
if not capped_value:
|
|
1165
|
+
formatted_attrs.append(f"{key}=''")
|
|
1166
|
+
else:
|
|
1167
|
+
formatted_attrs.append(f'{key}={capped_value}')
|
|
1168
|
+
return ' '.join(formatted_attrs)
|
|
1169
|
+
|
|
1170
|
+
return ''
|