optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,2729 @@
|
|
|
1
|
+
"""Default browser action handlers using CDP."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
from cdp_use.cdp.input.commands import DispatchKeyEventParameters
|
|
7
|
+
|
|
8
|
+
from browser_use.actor.utils import get_key_info
|
|
9
|
+
from browser_use.browser.events import (
|
|
10
|
+
ClickElementEvent,
|
|
11
|
+
GetDropdownOptionsEvent,
|
|
12
|
+
GoBackEvent,
|
|
13
|
+
GoForwardEvent,
|
|
14
|
+
RefreshEvent,
|
|
15
|
+
ScrollEvent,
|
|
16
|
+
ScrollToTextEvent,
|
|
17
|
+
SelectDropdownOptionEvent,
|
|
18
|
+
SendKeysEvent,
|
|
19
|
+
TypeTextEvent,
|
|
20
|
+
UploadFileEvent,
|
|
21
|
+
WaitEvent,
|
|
22
|
+
)
|
|
23
|
+
from browser_use.browser.views import BrowserError, URLNotAllowedError
|
|
24
|
+
from browser_use.browser.watchdog_base import BaseWatchdog
|
|
25
|
+
from browser_use.dom.service import EnhancedDOMTreeNode
|
|
26
|
+
from browser_use.observability import observe_debug
|
|
27
|
+
|
|
28
|
+
# Import EnhancedDOMTreeNode and rebuild event models that have forward references to it
|
|
29
|
+
# This must be done after all imports are complete
|
|
30
|
+
ClickElementEvent.model_rebuild()
|
|
31
|
+
GetDropdownOptionsEvent.model_rebuild()
|
|
32
|
+
SelectDropdownOptionEvent.model_rebuild()
|
|
33
|
+
TypeTextEvent.model_rebuild()
|
|
34
|
+
ScrollEvent.model_rebuild()
|
|
35
|
+
UploadFileEvent.model_rebuild()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DefaultActionWatchdog(BaseWatchdog):
|
|
39
|
+
"""Handles default browser actions like click, type, and scroll using CDP."""
|
|
40
|
+
|
|
41
|
+
def _is_print_related_element(self, element_node: EnhancedDOMTreeNode) -> bool:
|
|
42
|
+
"""Check if an element is related to printing (print buttons, print dialogs, etc.).
|
|
43
|
+
|
|
44
|
+
Primary check: onclick attribute (most reliable for print detection)
|
|
45
|
+
Fallback: button text/value (for cases without onclick)
|
|
46
|
+
"""
|
|
47
|
+
# Primary: Check onclick attribute for print-related functions (most reliable)
|
|
48
|
+
onclick = element_node.attributes.get('onclick', '').lower() if element_node.attributes else ''
|
|
49
|
+
if onclick and 'print' in onclick:
|
|
50
|
+
# Matches: window.print(), PrintElem(), print(), etc.
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
async def _handle_print_button_click(self, element_node: EnhancedDOMTreeNode) -> dict | None:
|
|
56
|
+
"""Handle print button by directly generating PDF via CDP instead of opening dialog.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Metadata dict with download path if successful, None otherwise
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
import base64
|
|
63
|
+
import os
|
|
64
|
+
from pathlib import Path
|
|
65
|
+
|
|
66
|
+
# Get CDP session
|
|
67
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
|
|
68
|
+
|
|
69
|
+
# Generate PDF using CDP Page.printToPDF
|
|
70
|
+
result = await asyncio.wait_for(
|
|
71
|
+
cdp_session.cdp_client.send.Page.printToPDF(
|
|
72
|
+
params={
|
|
73
|
+
'printBackground': True,
|
|
74
|
+
'preferCSSPageSize': True,
|
|
75
|
+
},
|
|
76
|
+
session_id=cdp_session.session_id,
|
|
77
|
+
),
|
|
78
|
+
timeout=15.0, # 15 second timeout for PDF generation
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
pdf_data = result.get('data')
|
|
82
|
+
if not pdf_data:
|
|
83
|
+
self.logger.warning('⚠️ PDF generation returned no data')
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
# Decode base64 PDF data
|
|
87
|
+
pdf_bytes = base64.b64decode(pdf_data)
|
|
88
|
+
|
|
89
|
+
# Get downloads path
|
|
90
|
+
downloads_path = self.browser_session.browser_profile.downloads_path
|
|
91
|
+
if not downloads_path:
|
|
92
|
+
self.logger.warning('⚠️ No downloads path configured, cannot save PDF')
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
# Generate filename from page title or URL
|
|
96
|
+
try:
|
|
97
|
+
page_title = await asyncio.wait_for(self.browser_session.get_current_page_title(), timeout=2.0)
|
|
98
|
+
# Sanitize title for filename
|
|
99
|
+
import re
|
|
100
|
+
|
|
101
|
+
safe_title = re.sub(r'[^\w\s-]', '', page_title)[:50] # Max 50 chars
|
|
102
|
+
filename = f'{safe_title}.pdf' if safe_title else 'print.pdf'
|
|
103
|
+
except Exception:
|
|
104
|
+
filename = 'print.pdf'
|
|
105
|
+
|
|
106
|
+
# Ensure downloads directory exists
|
|
107
|
+
downloads_dir = Path(downloads_path).expanduser().resolve()
|
|
108
|
+
downloads_dir.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
|
|
110
|
+
# Generate unique filename if file exists
|
|
111
|
+
final_path = downloads_dir / filename
|
|
112
|
+
if final_path.exists():
|
|
113
|
+
base, ext = os.path.splitext(filename)
|
|
114
|
+
counter = 1
|
|
115
|
+
while (downloads_dir / f'{base} ({counter}){ext}').exists():
|
|
116
|
+
counter += 1
|
|
117
|
+
final_path = downloads_dir / f'{base} ({counter}){ext}'
|
|
118
|
+
|
|
119
|
+
# Write PDF to file
|
|
120
|
+
import anyio
|
|
121
|
+
|
|
122
|
+
async with await anyio.open_file(final_path, 'wb') as f:
|
|
123
|
+
await f.write(pdf_bytes)
|
|
124
|
+
|
|
125
|
+
file_size = final_path.stat().st_size
|
|
126
|
+
self.logger.info(f'✅ Generated PDF via CDP: {final_path} ({file_size:,} bytes)')
|
|
127
|
+
|
|
128
|
+
# Dispatch FileDownloadedEvent
|
|
129
|
+
from browser_use.browser.events import FileDownloadedEvent
|
|
130
|
+
|
|
131
|
+
page_url = await self.browser_session.get_current_page_url()
|
|
132
|
+
self.browser_session.event_bus.dispatch(
|
|
133
|
+
FileDownloadedEvent(
|
|
134
|
+
url=page_url,
|
|
135
|
+
path=str(final_path),
|
|
136
|
+
file_name=final_path.name,
|
|
137
|
+
file_size=file_size,
|
|
138
|
+
file_type='pdf',
|
|
139
|
+
mime_type='application/pdf',
|
|
140
|
+
auto_download=False, # This was intentional (user clicked print)
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return {'pdf_generated': True, 'path': str(final_path)}
|
|
145
|
+
|
|
146
|
+
except TimeoutError:
|
|
147
|
+
self.logger.warning('⏱️ PDF generation timed out')
|
|
148
|
+
return None
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.logger.warning(f'⚠️ Failed to generate PDF via CDP: {type(e).__name__}: {e}')
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='click_element_event')
|
|
154
|
+
async def on_ClickElementEvent(self, event: ClickElementEvent) -> dict | None:
|
|
155
|
+
"""Handle click request with CDP."""
|
|
156
|
+
try:
|
|
157
|
+
# Check if session is alive before attempting any operations
|
|
158
|
+
if not self.browser_session.agent_focus or not self.browser_session.agent_focus.target_id:
|
|
159
|
+
error_msg = 'Cannot execute click: browser session is corrupted (target_id=None). Session may have crashed.'
|
|
160
|
+
self.logger.error(f'{error_msg}')
|
|
161
|
+
raise BrowserError(error_msg)
|
|
162
|
+
|
|
163
|
+
# Use the provided node
|
|
164
|
+
element_node = event.node
|
|
165
|
+
index_for_logging = element_node.backend_node_id or 'unknown'
|
|
166
|
+
starting_target_id = self.browser_session.agent_focus.target_id
|
|
167
|
+
|
|
168
|
+
# Check if element is a file input (should not be clicked)
|
|
169
|
+
if self.browser_session.is_file_input(element_node):
|
|
170
|
+
msg = f'Index {index_for_logging} - has an element which opens file upload dialog. To upload files please use a specific function to upload files'
|
|
171
|
+
self.logger.info(f'{msg}')
|
|
172
|
+
# Return validation error instead of raising to avoid ERROR logs
|
|
173
|
+
return {'validation_error': msg}
|
|
174
|
+
|
|
175
|
+
# Detect print-related elements and handle them specially
|
|
176
|
+
is_print_element = self._is_print_related_element(element_node)
|
|
177
|
+
if is_print_element:
|
|
178
|
+
self.logger.info(
|
|
179
|
+
f'🖨️ Detected print button (index {index_for_logging}), generating PDF directly instead of opening dialog...'
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Instead of clicking, directly generate PDF via CDP
|
|
183
|
+
click_metadata = await self._handle_print_button_click(element_node)
|
|
184
|
+
|
|
185
|
+
if click_metadata and click_metadata.get('pdf_generated'):
|
|
186
|
+
msg = f'Generated PDF: {click_metadata.get("path")}'
|
|
187
|
+
self.logger.info(f'💾 {msg}')
|
|
188
|
+
return click_metadata
|
|
189
|
+
else:
|
|
190
|
+
# Fallback to regular click if PDF generation failed
|
|
191
|
+
self.logger.warning('⚠️ PDF generation failed, falling back to regular click')
|
|
192
|
+
|
|
193
|
+
# Perform the actual click using internal implementation
|
|
194
|
+
click_metadata = await self._click_element_node_impl(element_node)
|
|
195
|
+
download_path = None # moved to downloads_watchdog.py
|
|
196
|
+
|
|
197
|
+
# Check for validation errors - return them without raising to avoid ERROR logs
|
|
198
|
+
if isinstance(click_metadata, dict) and 'validation_error' in click_metadata:
|
|
199
|
+
self.logger.info(f'{click_metadata["validation_error"]}')
|
|
200
|
+
return click_metadata
|
|
201
|
+
|
|
202
|
+
# Build success message
|
|
203
|
+
if download_path:
|
|
204
|
+
msg = f'Downloaded file to {download_path}'
|
|
205
|
+
self.logger.info(f'💾 {msg}')
|
|
206
|
+
else:
|
|
207
|
+
msg = f'Clicked button {element_node.node_name}: {element_node.get_all_children_text(max_depth=2)}'
|
|
208
|
+
self.logger.debug(f'🖱️ {msg}')
|
|
209
|
+
self.logger.debug(f'Element xpath: {element_node.xpath}')
|
|
210
|
+
|
|
211
|
+
return click_metadata if isinstance(click_metadata, dict) else None
|
|
212
|
+
except Exception as e:
|
|
213
|
+
raise
|
|
214
|
+
|
|
215
|
+
async def on_TypeTextEvent(self, event: TypeTextEvent) -> dict | None:
|
|
216
|
+
"""Handle text input request with CDP."""
|
|
217
|
+
try:
|
|
218
|
+
# Use the provided node
|
|
219
|
+
element_node = event.node
|
|
220
|
+
index_for_logging = element_node.backend_node_id or 'unknown'
|
|
221
|
+
|
|
222
|
+
# Check if this is index 0 or a falsy index - type to the page (whatever has focus)
|
|
223
|
+
if not element_node.backend_node_id or element_node.backend_node_id == 0:
|
|
224
|
+
# Type to the page without focusing any specific element
|
|
225
|
+
await self._type_to_page(event.text)
|
|
226
|
+
# Log with sensitive data protection
|
|
227
|
+
if event.is_sensitive:
|
|
228
|
+
if event.sensitive_key_name:
|
|
229
|
+
self.logger.info(f'⌨️ Typed <{event.sensitive_key_name}> to the page (current focus)')
|
|
230
|
+
else:
|
|
231
|
+
self.logger.info('⌨️ Typed <sensitive> to the page (current focus)')
|
|
232
|
+
else:
|
|
233
|
+
self.logger.info(f'⌨️ Typed "{event.text}" to the page (current focus)')
|
|
234
|
+
return None # No coordinates available for page typing
|
|
235
|
+
else:
|
|
236
|
+
try:
|
|
237
|
+
# Try to type to the specific element
|
|
238
|
+
input_metadata = await self._input_text_element_node_impl(
|
|
239
|
+
element_node,
|
|
240
|
+
event.text,
|
|
241
|
+
clear=event.clear or (not event.text),
|
|
242
|
+
is_sensitive=event.is_sensitive,
|
|
243
|
+
)
|
|
244
|
+
# Log with sensitive data protection
|
|
245
|
+
if event.is_sensitive:
|
|
246
|
+
if event.sensitive_key_name:
|
|
247
|
+
self.logger.info(f'⌨️ Typed <{event.sensitive_key_name}> into element with index {index_for_logging}')
|
|
248
|
+
else:
|
|
249
|
+
self.logger.info(f'⌨️ Typed <sensitive> into element with index {index_for_logging}')
|
|
250
|
+
else:
|
|
251
|
+
self.logger.info(f'⌨️ Typed "{event.text}" into element with index {index_for_logging}')
|
|
252
|
+
self.logger.debug(f'Element xpath: {element_node.xpath}')
|
|
253
|
+
return input_metadata # Return coordinates if available
|
|
254
|
+
except Exception as e:
|
|
255
|
+
# Element not found or error - fall back to typing to the page
|
|
256
|
+
self.logger.warning(f'Failed to type to element {index_for_logging}: {e}. Falling back to page typing.')
|
|
257
|
+
try:
|
|
258
|
+
await asyncio.wait_for(self._click_element_node_impl(element_node), timeout=10.0)
|
|
259
|
+
except Exception as e:
|
|
260
|
+
pass
|
|
261
|
+
await self._type_to_page(event.text)
|
|
262
|
+
# Log with sensitive data protection
|
|
263
|
+
if event.is_sensitive:
|
|
264
|
+
if event.sensitive_key_name:
|
|
265
|
+
self.logger.info(f'⌨️ Typed <{event.sensitive_key_name}> to the page as fallback')
|
|
266
|
+
else:
|
|
267
|
+
self.logger.info('⌨️ Typed <sensitive> to the page as fallback')
|
|
268
|
+
else:
|
|
269
|
+
self.logger.info(f'⌨️ Typed "{event.text}" to the page as fallback')
|
|
270
|
+
return None # No coordinates available for fallback typing
|
|
271
|
+
|
|
272
|
+
# Note: We don't clear cached state here - let multi_act handle DOM change detection
|
|
273
|
+
# by explicitly rebuilding and comparing when needed
|
|
274
|
+
except Exception as e:
|
|
275
|
+
raise
|
|
276
|
+
|
|
277
|
+
async def on_ScrollEvent(self, event: ScrollEvent) -> None:
|
|
278
|
+
"""Handle scroll request with CDP."""
|
|
279
|
+
# Check if we have a current target for scrolling
|
|
280
|
+
if not self.browser_session.agent_focus:
|
|
281
|
+
error_msg = 'No active target for scrolling'
|
|
282
|
+
raise BrowserError(error_msg)
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
# Convert direction and amount to pixels
|
|
286
|
+
# Positive pixels = scroll down, negative = scroll up
|
|
287
|
+
pixels = event.amount if event.direction == 'down' else -event.amount
|
|
288
|
+
|
|
289
|
+
# Element-specific scrolling if node is provided
|
|
290
|
+
if event.node is not None:
|
|
291
|
+
element_node = event.node
|
|
292
|
+
index_for_logging = element_node.backend_node_id or 'unknown'
|
|
293
|
+
|
|
294
|
+
# Check if the element is an iframe
|
|
295
|
+
is_iframe = element_node.tag_name and element_node.tag_name.upper() == 'IFRAME'
|
|
296
|
+
|
|
297
|
+
# Try to scroll the element's container
|
|
298
|
+
success = await self._scroll_element_container(element_node, pixels)
|
|
299
|
+
if success:
|
|
300
|
+
self.logger.debug(
|
|
301
|
+
f'📜 Scrolled element {index_for_logging} container {event.direction} by {event.amount} pixels'
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# For iframe scrolling, we need to force a full DOM refresh
|
|
305
|
+
# because the iframe's content has changed position
|
|
306
|
+
if is_iframe:
|
|
307
|
+
self.logger.debug('🔄 Forcing DOM refresh after iframe scroll')
|
|
308
|
+
# Note: We don't clear cached state here - let multi_act handle DOM change detection
|
|
309
|
+
# by explicitly rebuilding and comparing when needed
|
|
310
|
+
|
|
311
|
+
# Wait a bit for the scroll to settle and DOM to update
|
|
312
|
+
await asyncio.sleep(0.2)
|
|
313
|
+
|
|
314
|
+
return None
|
|
315
|
+
|
|
316
|
+
# Perform target-level scroll
|
|
317
|
+
await self._scroll_with_cdp_gesture(pixels)
|
|
318
|
+
|
|
319
|
+
# Note: We don't clear cached state here - let multi_act handle DOM change detection
|
|
320
|
+
# by explicitly rebuilding and comparing when needed
|
|
321
|
+
|
|
322
|
+
# Log success
|
|
323
|
+
self.logger.debug(f'📜 Scrolled {event.direction} by {event.amount} pixels')
|
|
324
|
+
return None
|
|
325
|
+
except Exception as e:
|
|
326
|
+
raise
|
|
327
|
+
|
|
328
|
+
# ========== Implementation Methods ==========
|
|
329
|
+
|
|
330
|
+
async def _check_element_occlusion(self, backend_node_id: int, x: float, y: float, cdp_session) -> bool:
|
|
331
|
+
"""Check if an element is occluded by other elements at the given coordinates.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
backend_node_id: The backend node ID of the target element
|
|
335
|
+
x: X coordinate to check
|
|
336
|
+
y: Y coordinate to check
|
|
337
|
+
cdp_session: CDP session to use
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
True if element is occluded, False if clickable
|
|
341
|
+
"""
|
|
342
|
+
try:
|
|
343
|
+
session_id = cdp_session.session_id
|
|
344
|
+
|
|
345
|
+
# Get target element info for comparison
|
|
346
|
+
target_result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
347
|
+
params={'backendNodeId': backend_node_id}, session_id=session_id
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
if 'object' not in target_result:
|
|
351
|
+
self.logger.debug('Could not resolve target element, assuming occluded')
|
|
352
|
+
return True
|
|
353
|
+
|
|
354
|
+
object_id = target_result['object']['objectId']
|
|
355
|
+
|
|
356
|
+
# Get target element info
|
|
357
|
+
target_info_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
358
|
+
params={
|
|
359
|
+
'objectId': object_id,
|
|
360
|
+
'functionDeclaration': """
|
|
361
|
+
function() {
|
|
362
|
+
const getElementInfo = (el) => {
|
|
363
|
+
return {
|
|
364
|
+
tagName: el.tagName,
|
|
365
|
+
id: el.id || '',
|
|
366
|
+
className: el.className || '',
|
|
367
|
+
textContent: (el.textContent || '').substring(0, 100)
|
|
368
|
+
};
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
const elementAtPoint = document.elementFromPoint(arguments[0], arguments[1]);
|
|
373
|
+
if (!elementAtPoint) {
|
|
374
|
+
return { targetInfo: getElementInfo(this), isClickable: false };
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
// Simple containment-based clickability logic
|
|
379
|
+
const isClickable = this === elementAtPoint ||
|
|
380
|
+
this.contains(elementAtPoint) ||
|
|
381
|
+
elementAtPoint.contains(this);
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
targetInfo: getElementInfo(this),
|
|
385
|
+
elementAtPointInfo: getElementInfo(elementAtPoint),
|
|
386
|
+
isClickable: isClickable
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
""",
|
|
390
|
+
'arguments': [{'value': x}, {'value': y}],
|
|
391
|
+
'returnByValue': True,
|
|
392
|
+
},
|
|
393
|
+
session_id=session_id,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
if 'result' not in target_info_result or 'value' not in target_info_result['result']:
|
|
397
|
+
self.logger.debug('Could not get target element info, assuming occluded')
|
|
398
|
+
return True
|
|
399
|
+
|
|
400
|
+
target_data = target_info_result['result']['value']
|
|
401
|
+
is_clickable = target_data.get('isClickable', False)
|
|
402
|
+
|
|
403
|
+
if is_clickable:
|
|
404
|
+
self.logger.debug('Element is clickable (target, contained, or semantically related)')
|
|
405
|
+
return False
|
|
406
|
+
else:
|
|
407
|
+
target_info = target_data.get('targetInfo', {})
|
|
408
|
+
element_at_point_info = target_data.get('elementAtPointInfo', {})
|
|
409
|
+
self.logger.debug(
|
|
410
|
+
f'Element is occluded. Target: {target_info.get("tagName", "unknown")} '
|
|
411
|
+
f'(id={target_info.get("id", "none")}), '
|
|
412
|
+
f'ElementAtPoint: {element_at_point_info.get("tagName", "unknown")} '
|
|
413
|
+
f'(id={element_at_point_info.get("id", "none")})'
|
|
414
|
+
)
|
|
415
|
+
return True
|
|
416
|
+
|
|
417
|
+
except Exception as e:
|
|
418
|
+
self.logger.debug(f'Occlusion check failed: {e}, assuming not occluded')
|
|
419
|
+
return False
|
|
420
|
+
|
|
421
|
+
async def _click_element_node_impl(self, element_node) -> dict | None:
|
|
422
|
+
"""
|
|
423
|
+
Click an element using pure CDP with multiple fallback methods for getting element geometry.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
element_node: The DOM element to click
|
|
427
|
+
"""
|
|
428
|
+
|
|
429
|
+
try:
|
|
430
|
+
# Check if element is a file input or select dropdown - these should not be clicked
|
|
431
|
+
tag_name = element_node.tag_name.lower() if element_node.tag_name else ''
|
|
432
|
+
element_type = element_node.attributes.get('type', '').lower() if element_node.attributes else ''
|
|
433
|
+
|
|
434
|
+
if tag_name == 'select':
|
|
435
|
+
msg = f'Cannot click on <select> elements. Use dropdown_options(index={element_node.backend_node_id}) action instead.'
|
|
436
|
+
# Return error dict instead of raising to avoid ERROR logs
|
|
437
|
+
return {'validation_error': msg}
|
|
438
|
+
|
|
439
|
+
if tag_name == 'input' and element_type == 'file':
|
|
440
|
+
msg = f'Cannot click on file input element (index={element_node.backend_node_id}). File uploads must be handled using upload_file_to_element action.'
|
|
441
|
+
# Return error dict instead of raising to avoid ERROR logs
|
|
442
|
+
return {'validation_error': msg}
|
|
443
|
+
|
|
444
|
+
# Get CDP client
|
|
445
|
+
cdp_session = await self.browser_session.cdp_client_for_node(element_node)
|
|
446
|
+
|
|
447
|
+
# Get the correct session ID for the element's frame
|
|
448
|
+
session_id = cdp_session.session_id
|
|
449
|
+
|
|
450
|
+
# Get element bounds
|
|
451
|
+
backend_node_id = element_node.backend_node_id
|
|
452
|
+
|
|
453
|
+
# Get viewport dimensions for visibility checks
|
|
454
|
+
layout_metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=session_id)
|
|
455
|
+
viewport_width = layout_metrics['layoutViewport']['clientWidth']
|
|
456
|
+
viewport_height = layout_metrics['layoutViewport']['clientHeight']
|
|
457
|
+
|
|
458
|
+
# Scroll element into view FIRST before getting coordinates
|
|
459
|
+
try:
|
|
460
|
+
await cdp_session.cdp_client.send.DOM.scrollIntoViewIfNeeded(
|
|
461
|
+
params={'backendNodeId': backend_node_id}, session_id=session_id
|
|
462
|
+
)
|
|
463
|
+
await asyncio.sleep(0.05) # Wait for scroll to complete
|
|
464
|
+
self.logger.debug('Scrolled element into view before getting coordinates')
|
|
465
|
+
except Exception as e:
|
|
466
|
+
self.logger.debug(f'Failed to scroll element into view: {e}')
|
|
467
|
+
|
|
468
|
+
# Get element coordinates using the unified method AFTER scrolling
|
|
469
|
+
element_rect = await self.browser_session.get_element_coordinates(backend_node_id, cdp_session)
|
|
470
|
+
|
|
471
|
+
# Convert rect to quads format if we got coordinates
|
|
472
|
+
quads = []
|
|
473
|
+
if element_rect:
|
|
474
|
+
# Convert DOMRect to quad format
|
|
475
|
+
x, y, w, h = element_rect.x, element_rect.y, element_rect.width, element_rect.height
|
|
476
|
+
quads = [
|
|
477
|
+
[
|
|
478
|
+
x,
|
|
479
|
+
y, # top-left
|
|
480
|
+
x + w,
|
|
481
|
+
y, # top-right
|
|
482
|
+
x + w,
|
|
483
|
+
y + h, # bottom-right
|
|
484
|
+
x,
|
|
485
|
+
y + h, # bottom-left
|
|
486
|
+
]
|
|
487
|
+
]
|
|
488
|
+
self.logger.debug(
|
|
489
|
+
f'Got coordinates from unified method: {element_rect.x}, {element_rect.y}, {element_rect.width}x{element_rect.height}'
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# If we still don't have quads, fall back to JS click
|
|
493
|
+
if not quads:
|
|
494
|
+
self.logger.warning('Could not get element geometry from any method, falling back to JavaScript click')
|
|
495
|
+
try:
|
|
496
|
+
result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
497
|
+
params={'backendNodeId': backend_node_id},
|
|
498
|
+
session_id=session_id,
|
|
499
|
+
)
|
|
500
|
+
assert 'object' in result and 'objectId' in result['object'], (
|
|
501
|
+
'Failed to find DOM element based on backendNodeId, maybe page content changed?'
|
|
502
|
+
)
|
|
503
|
+
object_id = result['object']['objectId']
|
|
504
|
+
|
|
505
|
+
await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
506
|
+
params={
|
|
507
|
+
'functionDeclaration': 'function() { this.click(); }',
|
|
508
|
+
'objectId': object_id,
|
|
509
|
+
},
|
|
510
|
+
session_id=session_id,
|
|
511
|
+
)
|
|
512
|
+
await asyncio.sleep(0.05)
|
|
513
|
+
# Navigation is handled by BrowserSession via events
|
|
514
|
+
return None
|
|
515
|
+
except Exception as js_e:
|
|
516
|
+
self.logger.error(f'CDP JavaScript click also failed: {js_e}')
|
|
517
|
+
if 'No node with given id found' in str(js_e):
|
|
518
|
+
raise Exception('Element with given id not found')
|
|
519
|
+
else:
|
|
520
|
+
raise Exception(f'Failed to click element: {js_e}')
|
|
521
|
+
|
|
522
|
+
# Find the largest visible quad within the viewport
|
|
523
|
+
best_quad = None
|
|
524
|
+
best_area = 0
|
|
525
|
+
|
|
526
|
+
for quad in quads:
|
|
527
|
+
if len(quad) < 8:
|
|
528
|
+
continue
|
|
529
|
+
|
|
530
|
+
# Calculate quad bounds
|
|
531
|
+
xs = [quad[i] for i in range(0, 8, 2)]
|
|
532
|
+
ys = [quad[i] for i in range(1, 8, 2)]
|
|
533
|
+
min_x, max_x = min(xs), max(xs)
|
|
534
|
+
min_y, max_y = min(ys), max(ys)
|
|
535
|
+
|
|
536
|
+
# Check if quad intersects with viewport
|
|
537
|
+
if max_x < 0 or max_y < 0 or min_x > viewport_width or min_y > viewport_height:
|
|
538
|
+
continue # Quad is completely outside viewport
|
|
539
|
+
|
|
540
|
+
# Calculate visible area (intersection with viewport)
|
|
541
|
+
visible_min_x = max(0, min_x)
|
|
542
|
+
visible_max_x = min(viewport_width, max_x)
|
|
543
|
+
visible_min_y = max(0, min_y)
|
|
544
|
+
visible_max_y = min(viewport_height, max_y)
|
|
545
|
+
|
|
546
|
+
visible_width = visible_max_x - visible_min_x
|
|
547
|
+
visible_height = visible_max_y - visible_min_y
|
|
548
|
+
visible_area = visible_width * visible_height
|
|
549
|
+
|
|
550
|
+
if visible_area > best_area:
|
|
551
|
+
best_area = visible_area
|
|
552
|
+
best_quad = quad
|
|
553
|
+
|
|
554
|
+
if not best_quad:
|
|
555
|
+
# No visible quad found, use the first quad anyway
|
|
556
|
+
best_quad = quads[0]
|
|
557
|
+
self.logger.warning('No visible quad found, using first quad')
|
|
558
|
+
|
|
559
|
+
# Calculate center point of the best quad
|
|
560
|
+
center_x = sum(best_quad[i] for i in range(0, 8, 2)) / 4
|
|
561
|
+
center_y = sum(best_quad[i] for i in range(1, 8, 2)) / 4
|
|
562
|
+
|
|
563
|
+
# Ensure click point is within viewport bounds
|
|
564
|
+
center_x = max(0, min(viewport_width - 1, center_x))
|
|
565
|
+
center_y = max(0, min(viewport_height - 1, center_y))
|
|
566
|
+
|
|
567
|
+
# Check for occlusion before attempting CDP click
|
|
568
|
+
is_occluded = await self._check_element_occlusion(backend_node_id, center_x, center_y, cdp_session)
|
|
569
|
+
|
|
570
|
+
if is_occluded:
|
|
571
|
+
self.logger.debug('🚫 Element is occluded, falling back to JavaScript click')
|
|
572
|
+
try:
|
|
573
|
+
result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
574
|
+
params={'backendNodeId': backend_node_id},
|
|
575
|
+
session_id=session_id,
|
|
576
|
+
)
|
|
577
|
+
assert 'object' in result and 'objectId' in result['object'], (
|
|
578
|
+
'Failed to find DOM element based on backendNodeId'
|
|
579
|
+
)
|
|
580
|
+
object_id = result['object']['objectId']
|
|
581
|
+
|
|
582
|
+
await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
583
|
+
params={
|
|
584
|
+
'functionDeclaration': 'function() { this.click(); }',
|
|
585
|
+
'objectId': object_id,
|
|
586
|
+
},
|
|
587
|
+
session_id=session_id,
|
|
588
|
+
)
|
|
589
|
+
await asyncio.sleep(0.05)
|
|
590
|
+
return None
|
|
591
|
+
except Exception as js_e:
|
|
592
|
+
self.logger.error(f'JavaScript click fallback failed: {js_e}')
|
|
593
|
+
raise Exception(f'Failed to click occluded element: {js_e}')
|
|
594
|
+
|
|
595
|
+
# Perform the click using CDP (element is not occluded)
|
|
596
|
+
try:
|
|
597
|
+
self.logger.debug(f'👆 Dragging mouse over element before clicking x: {center_x}px y: {center_y}px ...')
|
|
598
|
+
# Move mouse to element
|
|
599
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
600
|
+
params={
|
|
601
|
+
'type': 'mouseMoved',
|
|
602
|
+
'x': center_x,
|
|
603
|
+
'y': center_y,
|
|
604
|
+
},
|
|
605
|
+
session_id=session_id,
|
|
606
|
+
)
|
|
607
|
+
await asyncio.sleep(0.05)
|
|
608
|
+
|
|
609
|
+
# Mouse down
|
|
610
|
+
self.logger.debug(f'👆🏾 Clicking x: {center_x}px y: {center_y}px ...')
|
|
611
|
+
try:
|
|
612
|
+
await asyncio.wait_for(
|
|
613
|
+
cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
614
|
+
params={
|
|
615
|
+
'type': 'mousePressed',
|
|
616
|
+
'x': center_x,
|
|
617
|
+
'y': center_y,
|
|
618
|
+
'button': 'left',
|
|
619
|
+
'clickCount': 1,
|
|
620
|
+
},
|
|
621
|
+
session_id=session_id,
|
|
622
|
+
),
|
|
623
|
+
timeout=3.0, # 3 second timeout for mousePressed
|
|
624
|
+
)
|
|
625
|
+
await asyncio.sleep(0.08)
|
|
626
|
+
except TimeoutError:
|
|
627
|
+
self.logger.debug('⏱️ Mouse down timed out (likely due to dialog), continuing...')
|
|
628
|
+
# Don't sleep if we timed out
|
|
629
|
+
|
|
630
|
+
# Mouse up
|
|
631
|
+
try:
|
|
632
|
+
await asyncio.wait_for(
|
|
633
|
+
cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
634
|
+
params={
|
|
635
|
+
'type': 'mouseReleased',
|
|
636
|
+
'x': center_x,
|
|
637
|
+
'y': center_y,
|
|
638
|
+
'button': 'left',
|
|
639
|
+
'clickCount': 1,
|
|
640
|
+
},
|
|
641
|
+
session_id=session_id,
|
|
642
|
+
),
|
|
643
|
+
timeout=5.0, # 5 second timeout for mouseReleased
|
|
644
|
+
)
|
|
645
|
+
except TimeoutError:
|
|
646
|
+
self.logger.debug('⏱️ Mouse up timed out (possibly due to lag or dialog popup), continuing...')
|
|
647
|
+
|
|
648
|
+
self.logger.debug('🖱️ Clicked successfully using x,y coordinates')
|
|
649
|
+
|
|
650
|
+
# Return coordinates as dict for metadata
|
|
651
|
+
return {'click_x': center_x, 'click_y': center_y}
|
|
652
|
+
|
|
653
|
+
except Exception as e:
|
|
654
|
+
self.logger.warning(f'CDP click failed: {type(e).__name__}: {e}')
|
|
655
|
+
# Fall back to JavaScript click via CDP
|
|
656
|
+
try:
|
|
657
|
+
result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
658
|
+
params={'backendNodeId': backend_node_id},
|
|
659
|
+
session_id=session_id,
|
|
660
|
+
)
|
|
661
|
+
assert 'object' in result and 'objectId' in result['object'], (
|
|
662
|
+
'Failed to find DOM element based on backendNodeId, maybe page content changed?'
|
|
663
|
+
)
|
|
664
|
+
object_id = result['object']['objectId']
|
|
665
|
+
|
|
666
|
+
await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
667
|
+
params={
|
|
668
|
+
'functionDeclaration': 'function() { this.click(); }',
|
|
669
|
+
'objectId': object_id,
|
|
670
|
+
},
|
|
671
|
+
session_id=session_id,
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
# Small delay for dialog dismissal
|
|
675
|
+
await asyncio.sleep(0.1)
|
|
676
|
+
|
|
677
|
+
return None
|
|
678
|
+
except Exception as js_e:
|
|
679
|
+
self.logger.error(f'CDP JavaScript click also failed: {js_e}')
|
|
680
|
+
raise Exception(f'Failed to click element: {e}')
|
|
681
|
+
finally:
|
|
682
|
+
# Always re-focus back to original top-level page session context in case click opened a new tab/popup/window/dialog/etc.
|
|
683
|
+
# Use timeout to prevent hanging if dialog is blocking
|
|
684
|
+
try:
|
|
685
|
+
cdp_session = await asyncio.wait_for(self.browser_session.get_or_create_cdp_session(focus=True), timeout=3.0)
|
|
686
|
+
await asyncio.wait_for(
|
|
687
|
+
cdp_session.cdp_client.send.Runtime.runIfWaitingForDebugger(session_id=cdp_session.session_id),
|
|
688
|
+
timeout=2.0,
|
|
689
|
+
)
|
|
690
|
+
except TimeoutError:
|
|
691
|
+
self.logger.debug('⏱️ Refocus after click timed out (page may be blocked by dialog). Continuing...')
|
|
692
|
+
except Exception as e:
|
|
693
|
+
self.logger.debug(f'⚠️ Refocus error (non-critical): {type(e).__name__}: {e}')
|
|
694
|
+
|
|
695
|
+
except URLNotAllowedError as e:
|
|
696
|
+
raise e
|
|
697
|
+
except BrowserError as e:
|
|
698
|
+
raise e
|
|
699
|
+
except Exception as e:
|
|
700
|
+
# Extract key element info for error message
|
|
701
|
+
element_info = f'<{element_node.tag_name or "unknown"}'
|
|
702
|
+
if element_node.backend_node_id:
|
|
703
|
+
element_info += f' index={element_node.backend_node_id}'
|
|
704
|
+
element_info += '>'
|
|
705
|
+
|
|
706
|
+
# Create helpful error message based on context
|
|
707
|
+
error_detail = f'Failed to click element {element_info}. The element may not be interactable or visible.'
|
|
708
|
+
|
|
709
|
+
# Add hint if element has index (common in code-use mode)
|
|
710
|
+
if element_node.backend_node_id:
|
|
711
|
+
error_detail += f' If the page changed after navigation/interaction, the index [{element_node.backend_node_id}] may be stale. Get fresh browser state before retrying.'
|
|
712
|
+
|
|
713
|
+
raise BrowserError(
|
|
714
|
+
message=f'Failed to click element: {e}',
|
|
715
|
+
long_term_memory=error_detail,
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
async def _type_to_page(self, text: str):
|
|
719
|
+
"""
|
|
720
|
+
Type text to the page (whatever element currently has focus).
|
|
721
|
+
This is used when index is 0 or when an element can't be found.
|
|
722
|
+
"""
|
|
723
|
+
try:
|
|
724
|
+
# Get CDP client and session
|
|
725
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=None, focus=True)
|
|
726
|
+
|
|
727
|
+
# Type the text character by character to the focused element
|
|
728
|
+
for char in text:
|
|
729
|
+
# Handle newline characters as Enter key
|
|
730
|
+
if char == '\n':
|
|
731
|
+
# Send proper Enter key sequence
|
|
732
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
733
|
+
params={
|
|
734
|
+
'type': 'keyDown',
|
|
735
|
+
'key': 'Enter',
|
|
736
|
+
'code': 'Enter',
|
|
737
|
+
'windowsVirtualKeyCode': 13,
|
|
738
|
+
},
|
|
739
|
+
session_id=cdp_session.session_id,
|
|
740
|
+
)
|
|
741
|
+
# Send char event with carriage return
|
|
742
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
743
|
+
params={
|
|
744
|
+
'type': 'char',
|
|
745
|
+
'text': '\r',
|
|
746
|
+
},
|
|
747
|
+
session_id=cdp_session.session_id,
|
|
748
|
+
)
|
|
749
|
+
# Send keyup
|
|
750
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
751
|
+
params={
|
|
752
|
+
'type': 'keyUp',
|
|
753
|
+
'key': 'Enter',
|
|
754
|
+
'code': 'Enter',
|
|
755
|
+
'windowsVirtualKeyCode': 13,
|
|
756
|
+
},
|
|
757
|
+
session_id=cdp_session.session_id,
|
|
758
|
+
)
|
|
759
|
+
else:
|
|
760
|
+
# Handle regular characters
|
|
761
|
+
# Send keydown
|
|
762
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
763
|
+
params={
|
|
764
|
+
'type': 'keyDown',
|
|
765
|
+
'key': char,
|
|
766
|
+
},
|
|
767
|
+
session_id=cdp_session.session_id,
|
|
768
|
+
)
|
|
769
|
+
# Send char for actual text input
|
|
770
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
771
|
+
params={
|
|
772
|
+
'type': 'char',
|
|
773
|
+
'text': char,
|
|
774
|
+
},
|
|
775
|
+
session_id=cdp_session.session_id,
|
|
776
|
+
)
|
|
777
|
+
# Send keyup
|
|
778
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
779
|
+
params={
|
|
780
|
+
'type': 'keyUp',
|
|
781
|
+
'key': char,
|
|
782
|
+
},
|
|
783
|
+
session_id=cdp_session.session_id,
|
|
784
|
+
)
|
|
785
|
+
# Add 18ms delay between keystrokes
|
|
786
|
+
await asyncio.sleep(0.018)
|
|
787
|
+
|
|
788
|
+
except Exception as e:
|
|
789
|
+
raise Exception(f'Failed to type to page: {str(e)}')
|
|
790
|
+
|
|
791
|
+
def _get_char_modifiers_and_vk(self, char: str) -> tuple[int, int, str]:
|
|
792
|
+
"""Get modifiers, virtual key code, and base key for a character.
|
|
793
|
+
|
|
794
|
+
Returns:
|
|
795
|
+
(modifiers, windowsVirtualKeyCode, base_key)
|
|
796
|
+
"""
|
|
797
|
+
# Characters that require Shift modifier
|
|
798
|
+
shift_chars = {
|
|
799
|
+
'!': ('1', 49),
|
|
800
|
+
'@': ('2', 50),
|
|
801
|
+
'#': ('3', 51),
|
|
802
|
+
'$': ('4', 52),
|
|
803
|
+
'%': ('5', 53),
|
|
804
|
+
'^': ('6', 54),
|
|
805
|
+
'&': ('7', 55),
|
|
806
|
+
'*': ('8', 56),
|
|
807
|
+
'(': ('9', 57),
|
|
808
|
+
')': ('0', 48),
|
|
809
|
+
'_': ('-', 189),
|
|
810
|
+
'+': ('=', 187),
|
|
811
|
+
'{': ('[', 219),
|
|
812
|
+
'}': (']', 221),
|
|
813
|
+
'|': ('\\', 220),
|
|
814
|
+
':': (';', 186),
|
|
815
|
+
'"': ("'", 222),
|
|
816
|
+
'<': (',', 188),
|
|
817
|
+
'>': ('.', 190),
|
|
818
|
+
'?': ('/', 191),
|
|
819
|
+
'~': ('`', 192),
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
# Check if character requires Shift
|
|
823
|
+
if char in shift_chars:
|
|
824
|
+
base_key, vk_code = shift_chars[char]
|
|
825
|
+
return (8, vk_code, base_key) # Shift=8
|
|
826
|
+
|
|
827
|
+
# Uppercase letters require Shift
|
|
828
|
+
if char.isupper():
|
|
829
|
+
return (8, ord(char), char.lower()) # Shift=8
|
|
830
|
+
|
|
831
|
+
# Lowercase letters
|
|
832
|
+
if char.islower():
|
|
833
|
+
return (0, ord(char.upper()), char)
|
|
834
|
+
|
|
835
|
+
# Numbers
|
|
836
|
+
if char.isdigit():
|
|
837
|
+
return (0, ord(char), char)
|
|
838
|
+
|
|
839
|
+
# Special characters without Shift
|
|
840
|
+
no_shift_chars = {
|
|
841
|
+
' ': 32,
|
|
842
|
+
'-': 189,
|
|
843
|
+
'=': 187,
|
|
844
|
+
'[': 219,
|
|
845
|
+
']': 221,
|
|
846
|
+
'\\': 220,
|
|
847
|
+
';': 186,
|
|
848
|
+
"'": 222,
|
|
849
|
+
',': 188,
|
|
850
|
+
'.': 190,
|
|
851
|
+
'/': 191,
|
|
852
|
+
'`': 192,
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
if char in no_shift_chars:
|
|
856
|
+
return (0, no_shift_chars[char], char)
|
|
857
|
+
|
|
858
|
+
# Fallback
|
|
859
|
+
return (0, ord(char.upper()) if char.isalpha() else ord(char), char)
|
|
860
|
+
|
|
861
|
+
def _get_key_code_for_char(self, char: str) -> str:
|
|
862
|
+
"""Get the proper key code for a character (like Playwright does)."""
|
|
863
|
+
# Key code mapping for common characters (using proper base keys + modifiers)
|
|
864
|
+
key_codes = {
|
|
865
|
+
' ': 'Space',
|
|
866
|
+
'.': 'Period',
|
|
867
|
+
',': 'Comma',
|
|
868
|
+
'-': 'Minus',
|
|
869
|
+
'_': 'Minus', # Underscore uses Minus with Shift
|
|
870
|
+
'@': 'Digit2', # @ uses Digit2 with Shift
|
|
871
|
+
'!': 'Digit1', # ! uses Digit1 with Shift (not 'Exclamation')
|
|
872
|
+
'?': 'Slash', # ? uses Slash with Shift
|
|
873
|
+
':': 'Semicolon', # : uses Semicolon with Shift
|
|
874
|
+
';': 'Semicolon',
|
|
875
|
+
'(': 'Digit9', # ( uses Digit9 with Shift
|
|
876
|
+
')': 'Digit0', # ) uses Digit0 with Shift
|
|
877
|
+
'[': 'BracketLeft',
|
|
878
|
+
']': 'BracketRight',
|
|
879
|
+
'{': 'BracketLeft', # { uses BracketLeft with Shift
|
|
880
|
+
'}': 'BracketRight', # } uses BracketRight with Shift
|
|
881
|
+
'/': 'Slash',
|
|
882
|
+
'\\': 'Backslash',
|
|
883
|
+
'=': 'Equal',
|
|
884
|
+
'+': 'Equal', # + uses Equal with Shift
|
|
885
|
+
'*': 'Digit8', # * uses Digit8 with Shift
|
|
886
|
+
'&': 'Digit7', # & uses Digit7 with Shift
|
|
887
|
+
'%': 'Digit5', # % uses Digit5 with Shift
|
|
888
|
+
'$': 'Digit4', # $ uses Digit4 with Shift
|
|
889
|
+
'#': 'Digit3', # # uses Digit3 with Shift
|
|
890
|
+
'^': 'Digit6', # ^ uses Digit6 with Shift
|
|
891
|
+
'~': 'Backquote', # ~ uses Backquote with Shift
|
|
892
|
+
'`': 'Backquote',
|
|
893
|
+
"'": 'Quote',
|
|
894
|
+
'"': 'Quote', # " uses Quote with Shift
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
# Numbers
|
|
898
|
+
if char.isdigit():
|
|
899
|
+
return f'Digit{char}'
|
|
900
|
+
|
|
901
|
+
# Letters
|
|
902
|
+
if char.isalpha():
|
|
903
|
+
return f'Key{char.upper()}'
|
|
904
|
+
|
|
905
|
+
# Special characters
|
|
906
|
+
if char in key_codes:
|
|
907
|
+
return key_codes[char]
|
|
908
|
+
|
|
909
|
+
# Fallback for unknown characters
|
|
910
|
+
return f'Key{char.upper()}'
|
|
911
|
+
|
|
912
|
+
async def _clear_text_field(self, object_id: str, cdp_session) -> bool:
|
|
913
|
+
"""Clear text field using multiple strategies, starting with the most reliable."""
|
|
914
|
+
try:
|
|
915
|
+
# Strategy 1: Direct JavaScript value/content setting (handles both inputs and contenteditable)
|
|
916
|
+
self.logger.debug('🧹 Clearing text field using JavaScript value setting')
|
|
917
|
+
|
|
918
|
+
clear_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
919
|
+
params={
|
|
920
|
+
'functionDeclaration': """
|
|
921
|
+
function() {
|
|
922
|
+
// Check if it's a contenteditable element
|
|
923
|
+
const hasContentEditable = this.getAttribute('contenteditable') === 'true' ||
|
|
924
|
+
this.getAttribute('contenteditable') === '' ||
|
|
925
|
+
this.isContentEditable === true;
|
|
926
|
+
|
|
927
|
+
if (hasContentEditable) {
|
|
928
|
+
// For contenteditable elements, clear all content
|
|
929
|
+
while (this.firstChild) {
|
|
930
|
+
this.removeChild(this.firstChild);
|
|
931
|
+
}
|
|
932
|
+
this.textContent = "";
|
|
933
|
+
this.innerHTML = "";
|
|
934
|
+
|
|
935
|
+
// Focus and position cursor at the beginning
|
|
936
|
+
this.focus();
|
|
937
|
+
const selection = window.getSelection();
|
|
938
|
+
const range = document.createRange();
|
|
939
|
+
range.setStart(this, 0);
|
|
940
|
+
range.setEnd(this, 0);
|
|
941
|
+
selection.removeAllRanges();
|
|
942
|
+
selection.addRange(range);
|
|
943
|
+
|
|
944
|
+
// Dispatch events
|
|
945
|
+
this.dispatchEvent(new Event("input", { bubbles: true }));
|
|
946
|
+
this.dispatchEvent(new Event("change", { bubbles: true }));
|
|
947
|
+
|
|
948
|
+
return {cleared: true, method: 'contenteditable', finalText: this.textContent};
|
|
949
|
+
} else if (this.value !== undefined) {
|
|
950
|
+
// For regular inputs with value property
|
|
951
|
+
try {
|
|
952
|
+
this.select();
|
|
953
|
+
} catch (e) {
|
|
954
|
+
// ignore
|
|
955
|
+
}
|
|
956
|
+
this.value = "";
|
|
957
|
+
this.dispatchEvent(new Event("input", { bubbles: true }));
|
|
958
|
+
this.dispatchEvent(new Event("change", { bubbles: true }));
|
|
959
|
+
return {cleared: true, method: 'value', finalText: this.value};
|
|
960
|
+
} else {
|
|
961
|
+
return {cleared: false, method: 'none', error: 'Not a supported input type'};
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
""",
|
|
965
|
+
'objectId': object_id,
|
|
966
|
+
'returnByValue': True,
|
|
967
|
+
},
|
|
968
|
+
session_id=cdp_session.session_id,
|
|
969
|
+
)
|
|
970
|
+
|
|
971
|
+
# Check the clear result
|
|
972
|
+
clear_info = clear_result.get('result', {}).get('value', {})
|
|
973
|
+
self.logger.debug(f'Clear result: {clear_info}')
|
|
974
|
+
|
|
975
|
+
if clear_info.get('cleared'):
|
|
976
|
+
final_text = clear_info.get('finalText', '')
|
|
977
|
+
if not final_text or not final_text.strip():
|
|
978
|
+
self.logger.debug(f'✅ Text field cleared successfully using {clear_info.get("method")}')
|
|
979
|
+
return True
|
|
980
|
+
else:
|
|
981
|
+
self.logger.debug(f'⚠️ JavaScript clear partially failed, field still contains: "{final_text}"')
|
|
982
|
+
return False
|
|
983
|
+
else:
|
|
984
|
+
self.logger.debug(f'❌ JavaScript clear failed: {clear_info.get("error", "Unknown error")}')
|
|
985
|
+
return False
|
|
986
|
+
|
|
987
|
+
except Exception as e:
|
|
988
|
+
self.logger.debug(f'JavaScript clear failed with exception: {e}')
|
|
989
|
+
return False
|
|
990
|
+
|
|
991
|
+
# Strategy 2: Triple-click + Delete (fallback for stubborn fields)
|
|
992
|
+
try:
|
|
993
|
+
self.logger.debug('🧹 Fallback: Clearing using triple-click + Delete')
|
|
994
|
+
|
|
995
|
+
# Get element center coordinates for triple-click
|
|
996
|
+
bounds_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
997
|
+
params={
|
|
998
|
+
'functionDeclaration': 'function() { return this.getBoundingClientRect(); }',
|
|
999
|
+
'objectId': object_id,
|
|
1000
|
+
'returnByValue': True,
|
|
1001
|
+
},
|
|
1002
|
+
session_id=cdp_session.session_id,
|
|
1003
|
+
)
|
|
1004
|
+
|
|
1005
|
+
if bounds_result.get('result', {}).get('value'):
|
|
1006
|
+
bounds = bounds_result['result']['value']
|
|
1007
|
+
center_x = bounds['x'] + bounds['width'] / 2
|
|
1008
|
+
center_y = bounds['y'] + bounds['height'] / 2
|
|
1009
|
+
|
|
1010
|
+
# Triple-click to select all text
|
|
1011
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
1012
|
+
params={
|
|
1013
|
+
'type': 'mousePressed',
|
|
1014
|
+
'x': center_x,
|
|
1015
|
+
'y': center_y,
|
|
1016
|
+
'button': 'left',
|
|
1017
|
+
'clickCount': 3,
|
|
1018
|
+
},
|
|
1019
|
+
session_id=cdp_session.session_id,
|
|
1020
|
+
)
|
|
1021
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
1022
|
+
params={
|
|
1023
|
+
'type': 'mouseReleased',
|
|
1024
|
+
'x': center_x,
|
|
1025
|
+
'y': center_y,
|
|
1026
|
+
'button': 'left',
|
|
1027
|
+
'clickCount': 3,
|
|
1028
|
+
},
|
|
1029
|
+
session_id=cdp_session.session_id,
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
# Delete selected text
|
|
1033
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1034
|
+
params={
|
|
1035
|
+
'type': 'keyDown',
|
|
1036
|
+
'key': 'Delete',
|
|
1037
|
+
'code': 'Delete',
|
|
1038
|
+
},
|
|
1039
|
+
session_id=cdp_session.session_id,
|
|
1040
|
+
)
|
|
1041
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1042
|
+
params={
|
|
1043
|
+
'type': 'keyUp',
|
|
1044
|
+
'key': 'Delete',
|
|
1045
|
+
'code': 'Delete',
|
|
1046
|
+
},
|
|
1047
|
+
session_id=cdp_session.session_id,
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
self.logger.debug('✅ Text field cleared using triple-click + Delete')
|
|
1051
|
+
return True
|
|
1052
|
+
|
|
1053
|
+
except Exception as e:
|
|
1054
|
+
self.logger.debug(f'Triple-click clear failed: {e}')
|
|
1055
|
+
|
|
1056
|
+
# Strategy 3: Keyboard shortcuts (last resort)
|
|
1057
|
+
try:
|
|
1058
|
+
import platform
|
|
1059
|
+
|
|
1060
|
+
is_macos = platform.system() == 'Darwin'
|
|
1061
|
+
select_all_modifier = 4 if is_macos else 2 # Meta=4 (Cmd), Ctrl=2
|
|
1062
|
+
modifier_name = 'Cmd' if is_macos else 'Ctrl'
|
|
1063
|
+
|
|
1064
|
+
self.logger.debug(f'🧹 Last resort: Clearing using {modifier_name}+A + Backspace')
|
|
1065
|
+
|
|
1066
|
+
# Select all text (Ctrl/Cmd+A)
|
|
1067
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1068
|
+
params={
|
|
1069
|
+
'type': 'keyDown',
|
|
1070
|
+
'key': 'a',
|
|
1071
|
+
'code': 'KeyA',
|
|
1072
|
+
'modifiers': select_all_modifier,
|
|
1073
|
+
},
|
|
1074
|
+
session_id=cdp_session.session_id,
|
|
1075
|
+
)
|
|
1076
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1077
|
+
params={
|
|
1078
|
+
'type': 'keyUp',
|
|
1079
|
+
'key': 'a',
|
|
1080
|
+
'code': 'KeyA',
|
|
1081
|
+
'modifiers': select_all_modifier,
|
|
1082
|
+
},
|
|
1083
|
+
session_id=cdp_session.session_id,
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
# Delete selected text (Backspace)
|
|
1087
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1088
|
+
params={
|
|
1089
|
+
'type': 'keyDown',
|
|
1090
|
+
'key': 'Backspace',
|
|
1091
|
+
'code': 'Backspace',
|
|
1092
|
+
},
|
|
1093
|
+
session_id=cdp_session.session_id,
|
|
1094
|
+
)
|
|
1095
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1096
|
+
params={
|
|
1097
|
+
'type': 'keyUp',
|
|
1098
|
+
'key': 'Backspace',
|
|
1099
|
+
'code': 'Backspace',
|
|
1100
|
+
},
|
|
1101
|
+
session_id=cdp_session.session_id,
|
|
1102
|
+
)
|
|
1103
|
+
|
|
1104
|
+
self.logger.debug('✅ Text field cleared using keyboard shortcuts')
|
|
1105
|
+
return True
|
|
1106
|
+
|
|
1107
|
+
except Exception as e:
|
|
1108
|
+
self.logger.debug(f'All clearing strategies failed: {e}')
|
|
1109
|
+
return False
|
|
1110
|
+
|
|
1111
|
+
async def _focus_element_simple(
|
|
1112
|
+
self, backend_node_id: int, object_id: str, cdp_session, input_coordinates: dict | None = None
|
|
1113
|
+
) -> bool:
|
|
1114
|
+
"""Simple focus strategy: CDP first, then click if failed."""
|
|
1115
|
+
|
|
1116
|
+
# Strategy 1: Try CDP DOM.focus first
|
|
1117
|
+
try:
|
|
1118
|
+
result = await cdp_session.cdp_client.send.DOM.focus(
|
|
1119
|
+
params={'backendNodeId': backend_node_id},
|
|
1120
|
+
session_id=cdp_session.session_id,
|
|
1121
|
+
)
|
|
1122
|
+
self.logger.debug(f'Element focused using CDP DOM.focus (result: {result})')
|
|
1123
|
+
return True
|
|
1124
|
+
|
|
1125
|
+
except Exception as e:
|
|
1126
|
+
self.logger.debug(f'❌ CDP DOM.focus threw exception: {type(e).__name__}: {e}')
|
|
1127
|
+
|
|
1128
|
+
# Strategy 2: Try click to focus if CDP failed
|
|
1129
|
+
if input_coordinates and 'input_x' in input_coordinates and 'input_y' in input_coordinates:
|
|
1130
|
+
try:
|
|
1131
|
+
click_x = input_coordinates['input_x']
|
|
1132
|
+
click_y = input_coordinates['input_y']
|
|
1133
|
+
|
|
1134
|
+
self.logger.debug(f'🎯 Attempting click-to-focus at ({click_x:.1f}, {click_y:.1f})')
|
|
1135
|
+
|
|
1136
|
+
# Click to focus
|
|
1137
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
1138
|
+
params={
|
|
1139
|
+
'type': 'mousePressed',
|
|
1140
|
+
'x': click_x,
|
|
1141
|
+
'y': click_y,
|
|
1142
|
+
'button': 'left',
|
|
1143
|
+
'clickCount': 1,
|
|
1144
|
+
},
|
|
1145
|
+
session_id=cdp_session.session_id,
|
|
1146
|
+
)
|
|
1147
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
1148
|
+
params={
|
|
1149
|
+
'type': 'mouseReleased',
|
|
1150
|
+
'x': click_x,
|
|
1151
|
+
'y': click_y,
|
|
1152
|
+
'button': 'left',
|
|
1153
|
+
'clickCount': 1,
|
|
1154
|
+
},
|
|
1155
|
+
session_id=cdp_session.session_id,
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
self.logger.debug('✅ Element focused using click method')
|
|
1159
|
+
return True
|
|
1160
|
+
|
|
1161
|
+
except Exception as e:
|
|
1162
|
+
self.logger.debug(f'Click focus failed: {e}')
|
|
1163
|
+
|
|
1164
|
+
# Both strategies failed
|
|
1165
|
+
self.logger.debug('Focus strategies failed, will attempt typing anyway')
|
|
1166
|
+
return False
|
|
1167
|
+
|
|
1168
|
+
def _requires_direct_value_assignment(self, element_node: EnhancedDOMTreeNode) -> bool:
|
|
1169
|
+
"""
|
|
1170
|
+
Check if an element requires direct value assignment instead of character-by-character typing.
|
|
1171
|
+
|
|
1172
|
+
Certain input types have compound components, custom plugins, or special requirements
|
|
1173
|
+
that make character-by-character typing unreliable. These need direct .value assignment:
|
|
1174
|
+
|
|
1175
|
+
Native HTML5:
|
|
1176
|
+
- date, time, datetime-local: Have spinbutton components (ISO format required)
|
|
1177
|
+
- month, week: Similar compound structure
|
|
1178
|
+
- color: Expects hex format #RRGGBB
|
|
1179
|
+
- range: Needs numeric value within min/max
|
|
1180
|
+
|
|
1181
|
+
jQuery/Bootstrap Datepickers:
|
|
1182
|
+
- Detected by class names or data attributes
|
|
1183
|
+
- Often expect specific date formats (MM/DD/YYYY, DD/MM/YYYY, etc.)
|
|
1184
|
+
|
|
1185
|
+
Note: We use direct assignment because:
|
|
1186
|
+
1. Typing triggers intermediate validation that might reject partial values
|
|
1187
|
+
2. Compound components (like date spinbuttons) don't work with sequential typing
|
|
1188
|
+
3. It's much faster and more reliable
|
|
1189
|
+
4. We dispatch proper input/change events afterward to trigger listeners
|
|
1190
|
+
"""
|
|
1191
|
+
if not element_node.tag_name or not element_node.attributes:
|
|
1192
|
+
return False
|
|
1193
|
+
|
|
1194
|
+
tag_name = element_node.tag_name.lower()
|
|
1195
|
+
|
|
1196
|
+
# Check for native HTML5 inputs that need direct assignment
|
|
1197
|
+
if tag_name == 'input':
|
|
1198
|
+
input_type = element_node.attributes.get('type', '').lower()
|
|
1199
|
+
|
|
1200
|
+
# Native HTML5 inputs with compound components or strict formats
|
|
1201
|
+
if input_type in {'date', 'time', 'datetime-local', 'month', 'week', 'color', 'range'}:
|
|
1202
|
+
return True
|
|
1203
|
+
|
|
1204
|
+
# Detect jQuery/Bootstrap datepickers (text inputs with datepicker plugins)
|
|
1205
|
+
if input_type in {'text', ''}:
|
|
1206
|
+
# Check for common datepicker indicators
|
|
1207
|
+
class_attr = element_node.attributes.get('class', '').lower()
|
|
1208
|
+
if any(
|
|
1209
|
+
indicator in class_attr
|
|
1210
|
+
for indicator in ['datepicker', 'daterangepicker', 'datetimepicker', 'bootstrap-datepicker']
|
|
1211
|
+
):
|
|
1212
|
+
return True
|
|
1213
|
+
|
|
1214
|
+
# Check for data attributes indicating datepickers
|
|
1215
|
+
if any(attr in element_node.attributes for attr in ['data-datepicker', 'data-date-format', 'data-provide']):
|
|
1216
|
+
return True
|
|
1217
|
+
|
|
1218
|
+
return False
|
|
1219
|
+
|
|
1220
|
+
async def _set_value_directly(self, element_node: EnhancedDOMTreeNode, text: str, object_id: str, cdp_session) -> None:
|
|
1221
|
+
"""
|
|
1222
|
+
Set element value directly using JavaScript for inputs that don't support typing.
|
|
1223
|
+
|
|
1224
|
+
This is used for:
|
|
1225
|
+
- Date/time inputs where character-by-character typing doesn't work
|
|
1226
|
+
- jQuery datepickers that need direct value assignment
|
|
1227
|
+
- Color/range inputs that need specific formats
|
|
1228
|
+
- Any input with custom plugins that intercept typing
|
|
1229
|
+
|
|
1230
|
+
After setting the value, we dispatch comprehensive events to ensure all frameworks
|
|
1231
|
+
and plugins recognize the change (React, Vue, Angular, jQuery, etc.)
|
|
1232
|
+
"""
|
|
1233
|
+
try:
|
|
1234
|
+
# Set the value using JavaScript with comprehensive event dispatching
|
|
1235
|
+
# callFunctionOn expects a function body (not a self-invoking function)
|
|
1236
|
+
set_value_js = f"""
|
|
1237
|
+
function() {{
|
|
1238
|
+
// Store old value for comparison
|
|
1239
|
+
const oldValue = this.value;
|
|
1240
|
+
|
|
1241
|
+
// REACT-COMPATIBLE VALUE SETTING:
|
|
1242
|
+
// React uses Object.getOwnPropertyDescriptor to track input changes
|
|
1243
|
+
// We need to use the native setter to bypass React's tracking and then trigger events
|
|
1244
|
+
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
|
|
1245
|
+
window.HTMLInputElement.prototype,
|
|
1246
|
+
'value'
|
|
1247
|
+
).set;
|
|
1248
|
+
|
|
1249
|
+
// Set the value using the native setter (bypasses React's control)
|
|
1250
|
+
nativeInputValueSetter.call(this, {json.dumps(text)});
|
|
1251
|
+
|
|
1252
|
+
// Dispatch comprehensive events to ensure all frameworks detect the change
|
|
1253
|
+
// Order matters: focus -> input -> change -> blur (mimics user interaction)
|
|
1254
|
+
|
|
1255
|
+
// 1. Focus event (in case element isn't focused)
|
|
1256
|
+
this.dispatchEvent(new FocusEvent('focus', {{ bubbles: true }}));
|
|
1257
|
+
|
|
1258
|
+
// 2. Input event (CRITICAL for React onChange)
|
|
1259
|
+
// React listens to 'input' events on the document and checks for value changes
|
|
1260
|
+
const inputEvent = new Event('input', {{ bubbles: true, cancelable: true }});
|
|
1261
|
+
this.dispatchEvent(inputEvent);
|
|
1262
|
+
|
|
1263
|
+
// 3. Change event (for form handling, traditional listeners)
|
|
1264
|
+
const changeEvent = new Event('change', {{ bubbles: true, cancelable: true }});
|
|
1265
|
+
this.dispatchEvent(changeEvent);
|
|
1266
|
+
|
|
1267
|
+
// 4. Blur event (triggers final validation in some libraries)
|
|
1268
|
+
this.dispatchEvent(new FocusEvent('blur', {{ bubbles: true }}));
|
|
1269
|
+
|
|
1270
|
+
// 5. jQuery-specific events (if jQuery is present)
|
|
1271
|
+
if (typeof jQuery !== 'undefined' && jQuery.fn) {{
|
|
1272
|
+
try {{
|
|
1273
|
+
jQuery(this).trigger('change');
|
|
1274
|
+
// Trigger datepicker-specific events if it's a datepicker
|
|
1275
|
+
if (jQuery(this).data('datepicker')) {{
|
|
1276
|
+
jQuery(this).datepicker('update');
|
|
1277
|
+
}}
|
|
1278
|
+
}} catch (e) {{
|
|
1279
|
+
// jQuery not available or error, continue anyway
|
|
1280
|
+
}}
|
|
1281
|
+
}}
|
|
1282
|
+
|
|
1283
|
+
return this.value;
|
|
1284
|
+
}}
|
|
1285
|
+
"""
|
|
1286
|
+
|
|
1287
|
+
result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
1288
|
+
params={
|
|
1289
|
+
'objectId': object_id,
|
|
1290
|
+
'functionDeclaration': set_value_js,
|
|
1291
|
+
'returnByValue': True,
|
|
1292
|
+
},
|
|
1293
|
+
session_id=cdp_session.session_id,
|
|
1294
|
+
)
|
|
1295
|
+
|
|
1296
|
+
# Verify the value was set correctly
|
|
1297
|
+
if 'result' in result and 'value' in result['result']:
|
|
1298
|
+
actual_value = result['result']['value']
|
|
1299
|
+
self.logger.debug(f'✅ Value set directly to: "{actual_value}"')
|
|
1300
|
+
else:
|
|
1301
|
+
self.logger.warning('⚠️ Could not verify value was set correctly')
|
|
1302
|
+
|
|
1303
|
+
except Exception as e:
|
|
1304
|
+
self.logger.error(f'❌ Failed to set value directly: {e}')
|
|
1305
|
+
raise
|
|
1306
|
+
|
|
1307
|
+
async def _input_text_element_node_impl(
|
|
1308
|
+
self, element_node: EnhancedDOMTreeNode, text: str, clear: bool = True, is_sensitive: bool = False
|
|
1309
|
+
) -> dict | None:
|
|
1310
|
+
"""
|
|
1311
|
+
Input text into an element using pure CDP with improved focus fallbacks.
|
|
1312
|
+
|
|
1313
|
+
For date/time inputs, uses direct value assignment instead of typing.
|
|
1314
|
+
"""
|
|
1315
|
+
|
|
1316
|
+
try:
|
|
1317
|
+
# Get CDP client
|
|
1318
|
+
cdp_client = self.browser_session.cdp_client
|
|
1319
|
+
|
|
1320
|
+
# Get the correct session ID for the element's iframe
|
|
1321
|
+
# session_id = await self._get_session_id_for_element(element_node)
|
|
1322
|
+
|
|
1323
|
+
# cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=element_node.target_id, focus=True)
|
|
1324
|
+
cdp_session = await self.browser_session.cdp_client_for_node(element_node)
|
|
1325
|
+
|
|
1326
|
+
# Get element info
|
|
1327
|
+
backend_node_id = element_node.backend_node_id
|
|
1328
|
+
|
|
1329
|
+
# Track coordinates for metadata
|
|
1330
|
+
input_coordinates = None
|
|
1331
|
+
|
|
1332
|
+
# Scroll element into view
|
|
1333
|
+
try:
|
|
1334
|
+
await cdp_session.cdp_client.send.DOM.scrollIntoViewIfNeeded(
|
|
1335
|
+
params={'backendNodeId': backend_node_id}, session_id=cdp_session.session_id
|
|
1336
|
+
)
|
|
1337
|
+
await asyncio.sleep(0.01)
|
|
1338
|
+
except Exception as e:
|
|
1339
|
+
# Node detached errors are common with shadow DOM and dynamic content
|
|
1340
|
+
# The element can still be interacted with even if scrolling fails
|
|
1341
|
+
error_str = str(e)
|
|
1342
|
+
if 'Node is detached from document' in error_str or 'detached from document' in error_str:
|
|
1343
|
+
self.logger.debug(
|
|
1344
|
+
f'Element node temporarily detached during scroll (common with shadow DOM), continuing: {element_node}'
|
|
1345
|
+
)
|
|
1346
|
+
else:
|
|
1347
|
+
self.logger.debug(f'Failed to scroll element {element_node} into view before typing: {type(e).__name__}: {e}')
|
|
1348
|
+
|
|
1349
|
+
# Get object ID for the element
|
|
1350
|
+
result = await cdp_client.send.DOM.resolveNode(
|
|
1351
|
+
params={'backendNodeId': backend_node_id},
|
|
1352
|
+
session_id=cdp_session.session_id,
|
|
1353
|
+
)
|
|
1354
|
+
assert 'object' in result and 'objectId' in result['object'], (
|
|
1355
|
+
'Failed to find DOM element based on backendNodeId, maybe page content changed?'
|
|
1356
|
+
)
|
|
1357
|
+
object_id = result['object']['objectId']
|
|
1358
|
+
|
|
1359
|
+
# Get current coordinates using unified method
|
|
1360
|
+
coords = await self.browser_session.get_element_coordinates(backend_node_id, cdp_session)
|
|
1361
|
+
if coords:
|
|
1362
|
+
center_x = coords.x + coords.width / 2
|
|
1363
|
+
center_y = coords.y + coords.height / 2
|
|
1364
|
+
|
|
1365
|
+
# Check for occlusion before using coordinates for focus
|
|
1366
|
+
is_occluded = await self._check_element_occlusion(backend_node_id, center_x, center_y, cdp_session)
|
|
1367
|
+
|
|
1368
|
+
if is_occluded:
|
|
1369
|
+
self.logger.debug('🚫 Input element is occluded, skipping coordinate-based focus')
|
|
1370
|
+
input_coordinates = None # Force fallback to CDP-only focus
|
|
1371
|
+
else:
|
|
1372
|
+
input_coordinates = {'input_x': center_x, 'input_y': center_y}
|
|
1373
|
+
self.logger.debug(f'Using unified coordinates: x={center_x:.1f}, y={center_y:.1f}')
|
|
1374
|
+
else:
|
|
1375
|
+
input_coordinates = None
|
|
1376
|
+
self.logger.debug('No coordinates found for element')
|
|
1377
|
+
|
|
1378
|
+
# Ensure we have a valid object_id before proceeding
|
|
1379
|
+
if not object_id:
|
|
1380
|
+
raise ValueError('Could not get object_id for element')
|
|
1381
|
+
|
|
1382
|
+
# Step 1: Focus the element using simple strategy
|
|
1383
|
+
focused_successfully = await self._focus_element_simple(
|
|
1384
|
+
backend_node_id=backend_node_id, object_id=object_id, cdp_session=cdp_session, input_coordinates=input_coordinates
|
|
1385
|
+
)
|
|
1386
|
+
|
|
1387
|
+
# Step 2: Check if this element requires direct value assignment (date/time inputs)
|
|
1388
|
+
requires_direct_assignment = self._requires_direct_value_assignment(element_node)
|
|
1389
|
+
|
|
1390
|
+
if requires_direct_assignment:
|
|
1391
|
+
# Date/time inputs: use direct value assignment instead of typing
|
|
1392
|
+
self.logger.debug(
|
|
1393
|
+
f'🎯 Element type={element_node.attributes.get("type")} requires direct value assignment, setting value directly'
|
|
1394
|
+
)
|
|
1395
|
+
await self._set_value_directly(element_node, text, object_id, cdp_session)
|
|
1396
|
+
|
|
1397
|
+
# Return input coordinates for metadata
|
|
1398
|
+
return input_coordinates
|
|
1399
|
+
|
|
1400
|
+
# Step 3: Clear existing text if requested (only for regular inputs that support typing)
|
|
1401
|
+
if clear:
|
|
1402
|
+
cleared_successfully = await self._clear_text_field(object_id=object_id, cdp_session=cdp_session)
|
|
1403
|
+
if not cleared_successfully:
|
|
1404
|
+
self.logger.warning('⚠️ Text field clearing failed, typing may append to existing text')
|
|
1405
|
+
|
|
1406
|
+
# Step 4: Type the text character by character using proper human-like key events
|
|
1407
|
+
# This emulates exactly how a human would type, which modern websites expect
|
|
1408
|
+
if is_sensitive:
|
|
1409
|
+
# Note: sensitive_key_name is not passed to this low-level method,
|
|
1410
|
+
# but we could extend the signature if needed for more granular logging
|
|
1411
|
+
self.logger.debug('🎯 Typing <sensitive> character by character')
|
|
1412
|
+
else:
|
|
1413
|
+
self.logger.debug(f'🎯 Typing text character by character: "{text}"')
|
|
1414
|
+
|
|
1415
|
+
for i, char in enumerate(text):
|
|
1416
|
+
# Handle newline characters as Enter key
|
|
1417
|
+
if char == '\n':
|
|
1418
|
+
# Send proper Enter key sequence
|
|
1419
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1420
|
+
params={
|
|
1421
|
+
'type': 'keyDown',
|
|
1422
|
+
'key': 'Enter',
|
|
1423
|
+
'code': 'Enter',
|
|
1424
|
+
'windowsVirtualKeyCode': 13,
|
|
1425
|
+
},
|
|
1426
|
+
session_id=cdp_session.session_id,
|
|
1427
|
+
)
|
|
1428
|
+
|
|
1429
|
+
# Small delay to emulate human typing speed
|
|
1430
|
+
await asyncio.sleep(0.001)
|
|
1431
|
+
|
|
1432
|
+
# Send char event with carriage return
|
|
1433
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1434
|
+
params={
|
|
1435
|
+
'type': 'char',
|
|
1436
|
+
'text': '\r',
|
|
1437
|
+
'key': 'Enter',
|
|
1438
|
+
},
|
|
1439
|
+
session_id=cdp_session.session_id,
|
|
1440
|
+
)
|
|
1441
|
+
|
|
1442
|
+
# Send keyUp event
|
|
1443
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1444
|
+
params={
|
|
1445
|
+
'type': 'keyUp',
|
|
1446
|
+
'key': 'Enter',
|
|
1447
|
+
'code': 'Enter',
|
|
1448
|
+
'windowsVirtualKeyCode': 13,
|
|
1449
|
+
},
|
|
1450
|
+
session_id=cdp_session.session_id,
|
|
1451
|
+
)
|
|
1452
|
+
else:
|
|
1453
|
+
# Handle regular characters
|
|
1454
|
+
# Get proper modifiers, VK code, and base key for the character
|
|
1455
|
+
modifiers, vk_code, base_key = self._get_char_modifiers_and_vk(char)
|
|
1456
|
+
key_code = self._get_key_code_for_char(base_key)
|
|
1457
|
+
|
|
1458
|
+
# self.logger.debug(f'🎯 Typing character {i + 1}/{len(text)}: "{char}" (base_key: {base_key}, code: {key_code}, modifiers: {modifiers}, vk: {vk_code})')
|
|
1459
|
+
|
|
1460
|
+
# Step 1: Send keyDown event (NO text parameter)
|
|
1461
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1462
|
+
params={
|
|
1463
|
+
'type': 'keyDown',
|
|
1464
|
+
'key': base_key,
|
|
1465
|
+
'code': key_code,
|
|
1466
|
+
'modifiers': modifiers,
|
|
1467
|
+
'windowsVirtualKeyCode': vk_code,
|
|
1468
|
+
},
|
|
1469
|
+
session_id=cdp_session.session_id,
|
|
1470
|
+
)
|
|
1471
|
+
|
|
1472
|
+
# Small delay to emulate human typing speed
|
|
1473
|
+
await asyncio.sleep(0.005)
|
|
1474
|
+
|
|
1475
|
+
# Step 2: Send char event (WITH text parameter) - this is crucial for text input
|
|
1476
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1477
|
+
params={
|
|
1478
|
+
'type': 'char',
|
|
1479
|
+
'text': char,
|
|
1480
|
+
'key': char,
|
|
1481
|
+
},
|
|
1482
|
+
session_id=cdp_session.session_id,
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1485
|
+
# Step 3: Send keyUp event (NO text parameter)
|
|
1486
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
1487
|
+
params={
|
|
1488
|
+
'type': 'keyUp',
|
|
1489
|
+
'key': base_key,
|
|
1490
|
+
'code': key_code,
|
|
1491
|
+
'modifiers': modifiers,
|
|
1492
|
+
'windowsVirtualKeyCode': vk_code,
|
|
1493
|
+
},
|
|
1494
|
+
session_id=cdp_session.session_id,
|
|
1495
|
+
)
|
|
1496
|
+
|
|
1497
|
+
# Small delay between characters to look human (realistic typing speed)
|
|
1498
|
+
await asyncio.sleep(0.001)
|
|
1499
|
+
|
|
1500
|
+
# Step 4: Trigger framework-aware DOM events after typing completion
|
|
1501
|
+
# Modern JavaScript frameworks (React, Vue, Angular) rely on these events
|
|
1502
|
+
# to update their internal state and trigger re-renders
|
|
1503
|
+
await self._trigger_framework_events(object_id=object_id, cdp_session=cdp_session)
|
|
1504
|
+
|
|
1505
|
+
# Return coordinates metadata if available
|
|
1506
|
+
return input_coordinates
|
|
1507
|
+
|
|
1508
|
+
except Exception as e:
|
|
1509
|
+
self.logger.error(f'Failed to input text via CDP: {type(e).__name__}: {e}')
|
|
1510
|
+
raise BrowserError(f'Failed to input text into element: {repr(element_node)}')
|
|
1511
|
+
|
|
1512
|
+
async def _trigger_framework_events(self, object_id: str, cdp_session) -> None:
|
|
1513
|
+
"""
|
|
1514
|
+
Trigger framework-aware DOM events after text input completion.
|
|
1515
|
+
|
|
1516
|
+
This is critical for modern JavaScript frameworks (React, Vue, Angular, etc.)
|
|
1517
|
+
that rely on DOM events to update their internal state and trigger re-renders.
|
|
1518
|
+
|
|
1519
|
+
Args:
|
|
1520
|
+
object_id: CDP object ID of the input element
|
|
1521
|
+
cdp_session: CDP session for the element's context
|
|
1522
|
+
"""
|
|
1523
|
+
try:
|
|
1524
|
+
# Execute JavaScript to trigger comprehensive event sequence
|
|
1525
|
+
framework_events_script = """
|
|
1526
|
+
(function() {
|
|
1527
|
+
// Find the target element (available as 'this' when using objectId)
|
|
1528
|
+
const element = this;
|
|
1529
|
+
if (!element) return false;
|
|
1530
|
+
|
|
1531
|
+
// Ensure element is focused
|
|
1532
|
+
element.focus();
|
|
1533
|
+
|
|
1534
|
+
// Comprehensive event sequence for maximum framework compatibility
|
|
1535
|
+
const events = [
|
|
1536
|
+
// Input event - primary event for React controlled components
|
|
1537
|
+
{ type: 'input', bubbles: true, cancelable: true },
|
|
1538
|
+
// Change event - important for form validation and Vue v-model
|
|
1539
|
+
{ type: 'change', bubbles: true, cancelable: true },
|
|
1540
|
+
// Blur event - triggers validation in many frameworks
|
|
1541
|
+
{ type: 'blur', bubbles: true, cancelable: true }
|
|
1542
|
+
];
|
|
1543
|
+
|
|
1544
|
+
let success = true;
|
|
1545
|
+
|
|
1546
|
+
events.forEach(eventConfig => {
|
|
1547
|
+
try {
|
|
1548
|
+
const event = new Event(eventConfig.type, {
|
|
1549
|
+
bubbles: eventConfig.bubbles,
|
|
1550
|
+
cancelable: eventConfig.cancelable
|
|
1551
|
+
});
|
|
1552
|
+
|
|
1553
|
+
// Special handling for InputEvent (more specific than Event)
|
|
1554
|
+
if (eventConfig.type === 'input') {
|
|
1555
|
+
const inputEvent = new InputEvent('input', {
|
|
1556
|
+
bubbles: true,
|
|
1557
|
+
cancelable: true,
|
|
1558
|
+
data: element.value,
|
|
1559
|
+
inputType: 'insertText'
|
|
1560
|
+
});
|
|
1561
|
+
element.dispatchEvent(inputEvent);
|
|
1562
|
+
} else {
|
|
1563
|
+
element.dispatchEvent(event);
|
|
1564
|
+
}
|
|
1565
|
+
} catch (e) {
|
|
1566
|
+
success = false;
|
|
1567
|
+
console.warn('Framework event dispatch failed:', eventConfig.type, e);
|
|
1568
|
+
}
|
|
1569
|
+
});
|
|
1570
|
+
|
|
1571
|
+
// Special React synthetic event handling
|
|
1572
|
+
// React uses internal fiber properties for event system
|
|
1573
|
+
if (element._reactInternalFiber || element._reactInternalInstance || element.__reactInternalInstance) {
|
|
1574
|
+
try {
|
|
1575
|
+
// Trigger React's synthetic event system
|
|
1576
|
+
const syntheticInputEvent = new InputEvent('input', {
|
|
1577
|
+
bubbles: true,
|
|
1578
|
+
cancelable: true,
|
|
1579
|
+
data: element.value
|
|
1580
|
+
});
|
|
1581
|
+
|
|
1582
|
+
// Force React to process this as a synthetic event
|
|
1583
|
+
Object.defineProperty(syntheticInputEvent, 'isTrusted', { value: true });
|
|
1584
|
+
element.dispatchEvent(syntheticInputEvent);
|
|
1585
|
+
} catch (e) {
|
|
1586
|
+
console.warn('React synthetic event failed:', e);
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
// Special Vue reactivity trigger
|
|
1591
|
+
// Vue uses __vueParentComponent or __vue__ for component access
|
|
1592
|
+
if (element.__vue__ || element._vnode || element.__vueParentComponent) {
|
|
1593
|
+
try {
|
|
1594
|
+
// Vue often needs explicit input event with proper timing
|
|
1595
|
+
const vueEvent = new Event('input', { bubbles: true });
|
|
1596
|
+
setTimeout(() => element.dispatchEvent(vueEvent), 0);
|
|
1597
|
+
} catch (e) {
|
|
1598
|
+
console.warn('Vue reactivity trigger failed:', e);
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
return success;
|
|
1603
|
+
})();
|
|
1604
|
+
"""
|
|
1605
|
+
|
|
1606
|
+
# Execute the framework events script
|
|
1607
|
+
result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
1608
|
+
params={
|
|
1609
|
+
'objectId': object_id,
|
|
1610
|
+
'functionDeclaration': framework_events_script,
|
|
1611
|
+
'returnByValue': True,
|
|
1612
|
+
},
|
|
1613
|
+
session_id=cdp_session.session_id,
|
|
1614
|
+
)
|
|
1615
|
+
|
|
1616
|
+
success = result.get('result', {}).get('value', False)
|
|
1617
|
+
|
|
1618
|
+
except Exception as e:
|
|
1619
|
+
self.logger.warning(f'⚠️ Failed to trigger framework events: {type(e).__name__}: {e}')
|
|
1620
|
+
# Don't raise - framework events are a best-effort enhancement
|
|
1621
|
+
|
|
1622
|
+
async def _scroll_with_cdp_gesture(self, pixels: int) -> bool:
|
|
1623
|
+
"""
|
|
1624
|
+
Scroll using CDP Input.dispatchMouseEvent to simulate mouse wheel.
|
|
1625
|
+
|
|
1626
|
+
Args:
|
|
1627
|
+
pixels: Number of pixels to scroll (positive = down, negative = up)
|
|
1628
|
+
|
|
1629
|
+
Returns:
|
|
1630
|
+
True if successful, False if failed
|
|
1631
|
+
"""
|
|
1632
|
+
try:
|
|
1633
|
+
# Get CDP client and session
|
|
1634
|
+
assert self.browser_session.agent_focus is not None, 'CDP session not initialized - browser may not be connected yet'
|
|
1635
|
+
cdp_client = self.browser_session.agent_focus.cdp_client
|
|
1636
|
+
session_id = self.browser_session.agent_focus.session_id
|
|
1637
|
+
|
|
1638
|
+
# Get viewport dimensions
|
|
1639
|
+
layout_metrics = await cdp_client.send.Page.getLayoutMetrics(session_id=session_id)
|
|
1640
|
+
viewport_width = layout_metrics['layoutViewport']['clientWidth']
|
|
1641
|
+
viewport_height = layout_metrics['layoutViewport']['clientHeight']
|
|
1642
|
+
|
|
1643
|
+
# Calculate center of viewport
|
|
1644
|
+
center_x = viewport_width / 2
|
|
1645
|
+
center_y = viewport_height / 2
|
|
1646
|
+
|
|
1647
|
+
# For mouse wheel, positive deltaY scrolls down, negative scrolls up
|
|
1648
|
+
delta_y = pixels
|
|
1649
|
+
|
|
1650
|
+
# Dispatch mouse wheel event
|
|
1651
|
+
await cdp_client.send.Input.dispatchMouseEvent(
|
|
1652
|
+
params={
|
|
1653
|
+
'type': 'mouseWheel',
|
|
1654
|
+
'x': center_x,
|
|
1655
|
+
'y': center_y,
|
|
1656
|
+
'deltaX': 0,
|
|
1657
|
+
'deltaY': delta_y,
|
|
1658
|
+
},
|
|
1659
|
+
session_id=session_id,
|
|
1660
|
+
)
|
|
1661
|
+
|
|
1662
|
+
self.logger.debug(f'📄 Scrolled via CDP mouse wheel: {pixels}px')
|
|
1663
|
+
return True
|
|
1664
|
+
|
|
1665
|
+
except Exception as e:
|
|
1666
|
+
self.logger.warning(f'❌ Scrolling via CDP failed: {type(e).__name__}: {e}')
|
|
1667
|
+
return False
|
|
1668
|
+
|
|
1669
|
+
async def _scroll_element_container(self, element_node, pixels: int) -> bool:
|
|
1670
|
+
"""Try to scroll an element's container using CDP."""
|
|
1671
|
+
try:
|
|
1672
|
+
cdp_session = await self.browser_session.cdp_client_for_node(element_node)
|
|
1673
|
+
|
|
1674
|
+
# Check if this is an iframe - if so, scroll its content directly
|
|
1675
|
+
if element_node.tag_name and element_node.tag_name.upper() == 'IFRAME':
|
|
1676
|
+
# For iframes, we need to scroll the content document, not the iframe element itself
|
|
1677
|
+
# Use JavaScript to directly scroll the iframe's content
|
|
1678
|
+
backend_node_id = element_node.backend_node_id
|
|
1679
|
+
|
|
1680
|
+
# Resolve the node to get an object ID
|
|
1681
|
+
result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
1682
|
+
params={'backendNodeId': backend_node_id},
|
|
1683
|
+
session_id=cdp_session.session_id,
|
|
1684
|
+
)
|
|
1685
|
+
|
|
1686
|
+
if 'object' in result and 'objectId' in result['object']:
|
|
1687
|
+
object_id = result['object']['objectId']
|
|
1688
|
+
|
|
1689
|
+
# Scroll the iframe's content directly
|
|
1690
|
+
scroll_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
1691
|
+
params={
|
|
1692
|
+
'functionDeclaration': f"""
|
|
1693
|
+
function() {{
|
|
1694
|
+
try {{
|
|
1695
|
+
const doc = this.contentDocument || this.contentWindow.document;
|
|
1696
|
+
if (doc) {{
|
|
1697
|
+
const scrollElement = doc.documentElement || doc.body;
|
|
1698
|
+
if (scrollElement) {{
|
|
1699
|
+
const oldScrollTop = scrollElement.scrollTop;
|
|
1700
|
+
scrollElement.scrollTop += {pixels};
|
|
1701
|
+
const newScrollTop = scrollElement.scrollTop;
|
|
1702
|
+
return {{
|
|
1703
|
+
success: true,
|
|
1704
|
+
oldScrollTop: oldScrollTop,
|
|
1705
|
+
newScrollTop: newScrollTop,
|
|
1706
|
+
scrolled: newScrollTop - oldScrollTop
|
|
1707
|
+
}};
|
|
1708
|
+
}}
|
|
1709
|
+
}}
|
|
1710
|
+
return {{success: false, error: 'Could not access iframe content'}};
|
|
1711
|
+
}} catch (e) {{
|
|
1712
|
+
return {{success: false, error: e.toString()}};
|
|
1713
|
+
}}
|
|
1714
|
+
}}
|
|
1715
|
+
""",
|
|
1716
|
+
'objectId': object_id,
|
|
1717
|
+
'returnByValue': True,
|
|
1718
|
+
},
|
|
1719
|
+
session_id=cdp_session.session_id,
|
|
1720
|
+
)
|
|
1721
|
+
|
|
1722
|
+
if scroll_result and 'result' in scroll_result and 'value' in scroll_result['result']:
|
|
1723
|
+
result_value = scroll_result['result']['value']
|
|
1724
|
+
if result_value.get('success'):
|
|
1725
|
+
self.logger.debug(f'Successfully scrolled iframe content by {result_value.get("scrolled", 0)}px')
|
|
1726
|
+
return True
|
|
1727
|
+
else:
|
|
1728
|
+
self.logger.debug(f'Failed to scroll iframe: {result_value.get("error", "Unknown error")}')
|
|
1729
|
+
|
|
1730
|
+
# For non-iframe elements, use the standard mouse wheel approach
|
|
1731
|
+
# Get element bounds to know where to scroll
|
|
1732
|
+
backend_node_id = element_node.backend_node_id
|
|
1733
|
+
box_model = await cdp_session.cdp_client.send.DOM.getBoxModel(
|
|
1734
|
+
params={'backendNodeId': backend_node_id}, session_id=cdp_session.session_id
|
|
1735
|
+
)
|
|
1736
|
+
content_quad = box_model['model']['content']
|
|
1737
|
+
|
|
1738
|
+
# Calculate center point
|
|
1739
|
+
center_x = (content_quad[0] + content_quad[2] + content_quad[4] + content_quad[6]) / 4
|
|
1740
|
+
center_y = (content_quad[1] + content_quad[3] + content_quad[5] + content_quad[7]) / 4
|
|
1741
|
+
|
|
1742
|
+
# Dispatch mouse wheel event at element location
|
|
1743
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
1744
|
+
params={
|
|
1745
|
+
'type': 'mouseWheel',
|
|
1746
|
+
'x': center_x,
|
|
1747
|
+
'y': center_y,
|
|
1748
|
+
'deltaX': 0,
|
|
1749
|
+
'deltaY': pixels,
|
|
1750
|
+
},
|
|
1751
|
+
session_id=cdp_session.session_id,
|
|
1752
|
+
)
|
|
1753
|
+
|
|
1754
|
+
return True
|
|
1755
|
+
except Exception as e:
|
|
1756
|
+
self.logger.debug(f'Failed to scroll element container via CDP: {e}')
|
|
1757
|
+
return False
|
|
1758
|
+
|
|
1759
|
+
async def _get_session_id_for_element(self, element_node: EnhancedDOMTreeNode) -> str | None:
|
|
1760
|
+
"""Get the appropriate CDP session ID for an element based on its frame."""
|
|
1761
|
+
if element_node.frame_id:
|
|
1762
|
+
# Element is in an iframe, need to get session for that frame
|
|
1763
|
+
try:
|
|
1764
|
+
# Get all targets
|
|
1765
|
+
targets = await self.browser_session.cdp_client.send.Target.getTargets()
|
|
1766
|
+
|
|
1767
|
+
# Find the target for this frame
|
|
1768
|
+
for target in targets['targetInfos']:
|
|
1769
|
+
if target['type'] == 'iframe' and element_node.frame_id in str(target.get('targetId', '')):
|
|
1770
|
+
# Create temporary session for iframe target without switching focus
|
|
1771
|
+
target_id = target['targetId']
|
|
1772
|
+
temp_session = await self.browser_session.get_or_create_cdp_session(target_id, focus=False)
|
|
1773
|
+
return temp_session.session_id
|
|
1774
|
+
|
|
1775
|
+
# If frame not found in targets, use main target session
|
|
1776
|
+
self.logger.debug(f'Frame {element_node.frame_id} not found in targets, using main session')
|
|
1777
|
+
except Exception as e:
|
|
1778
|
+
self.logger.debug(f'Error getting frame session: {e}, using main session')
|
|
1779
|
+
|
|
1780
|
+
# Use main target session
|
|
1781
|
+
assert self.browser_session.agent_focus is not None, 'CDP session not initialized - browser may not be connected yet'
|
|
1782
|
+
return self.browser_session.agent_focus.session_id
|
|
1783
|
+
|
|
1784
|
+
async def on_GoBackEvent(self, event: GoBackEvent) -> None:
|
|
1785
|
+
"""Handle navigate back request with CDP."""
|
|
1786
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session()
|
|
1787
|
+
try:
|
|
1788
|
+
# Get CDP client and session
|
|
1789
|
+
|
|
1790
|
+
# Get navigation history
|
|
1791
|
+
history = await cdp_session.cdp_client.send.Page.getNavigationHistory(session_id=cdp_session.session_id)
|
|
1792
|
+
current_index = history['currentIndex']
|
|
1793
|
+
entries = history['entries']
|
|
1794
|
+
|
|
1795
|
+
# Check if we can go back
|
|
1796
|
+
if current_index <= 0:
|
|
1797
|
+
self.logger.warning('⚠️ Cannot go back - no previous entry in history')
|
|
1798
|
+
return
|
|
1799
|
+
|
|
1800
|
+
# Navigate to the previous entry
|
|
1801
|
+
previous_entry_id = entries[current_index - 1]['id']
|
|
1802
|
+
await cdp_session.cdp_client.send.Page.navigateToHistoryEntry(
|
|
1803
|
+
params={'entryId': previous_entry_id}, session_id=cdp_session.session_id
|
|
1804
|
+
)
|
|
1805
|
+
|
|
1806
|
+
# Wait for navigation
|
|
1807
|
+
await asyncio.sleep(0.5)
|
|
1808
|
+
# Navigation is handled by BrowserSession via events
|
|
1809
|
+
|
|
1810
|
+
self.logger.info(f'🔙 Navigated back to {entries[current_index - 1]["url"]}')
|
|
1811
|
+
except Exception as e:
|
|
1812
|
+
raise
|
|
1813
|
+
|
|
1814
|
+
async def on_GoForwardEvent(self, event: GoForwardEvent) -> None:
|
|
1815
|
+
"""Handle navigate forward request with CDP."""
|
|
1816
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session()
|
|
1817
|
+
try:
|
|
1818
|
+
# Get navigation history
|
|
1819
|
+
history = await cdp_session.cdp_client.send.Page.getNavigationHistory(session_id=cdp_session.session_id)
|
|
1820
|
+
current_index = history['currentIndex']
|
|
1821
|
+
entries = history['entries']
|
|
1822
|
+
|
|
1823
|
+
# Check if we can go forward
|
|
1824
|
+
if current_index >= len(entries) - 1:
|
|
1825
|
+
self.logger.warning('⚠️ Cannot go forward - no next entry in history')
|
|
1826
|
+
return
|
|
1827
|
+
|
|
1828
|
+
# Navigate to the next entry
|
|
1829
|
+
next_entry_id = entries[current_index + 1]['id']
|
|
1830
|
+
await cdp_session.cdp_client.send.Page.navigateToHistoryEntry(
|
|
1831
|
+
params={'entryId': next_entry_id}, session_id=cdp_session.session_id
|
|
1832
|
+
)
|
|
1833
|
+
|
|
1834
|
+
# Wait for navigation
|
|
1835
|
+
await asyncio.sleep(0.5)
|
|
1836
|
+
# Navigation is handled by BrowserSession via events
|
|
1837
|
+
|
|
1838
|
+
self.logger.info(f'🔜 Navigated forward to {entries[current_index + 1]["url"]}')
|
|
1839
|
+
except Exception as e:
|
|
1840
|
+
raise
|
|
1841
|
+
|
|
1842
|
+
async def on_RefreshEvent(self, event: RefreshEvent) -> None:
|
|
1843
|
+
"""Handle target refresh request with CDP."""
|
|
1844
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session()
|
|
1845
|
+
try:
|
|
1846
|
+
# Reload the target
|
|
1847
|
+
await cdp_session.cdp_client.send.Page.reload(session_id=cdp_session.session_id)
|
|
1848
|
+
|
|
1849
|
+
# Wait for reload
|
|
1850
|
+
await asyncio.sleep(1.0)
|
|
1851
|
+
|
|
1852
|
+
# Note: We don't clear cached state here - let the next state fetch rebuild as needed
|
|
1853
|
+
|
|
1854
|
+
# Navigation is handled by BrowserSession via events
|
|
1855
|
+
|
|
1856
|
+
self.logger.info('🔄 Target refreshed')
|
|
1857
|
+
except Exception as e:
|
|
1858
|
+
raise
|
|
1859
|
+
|
|
1860
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='wait_event_handler')
|
|
1861
|
+
async def on_WaitEvent(self, event: WaitEvent) -> None:
|
|
1862
|
+
"""Handle wait request."""
|
|
1863
|
+
try:
|
|
1864
|
+
# Cap wait time at maximum
|
|
1865
|
+
actual_seconds = min(max(event.seconds, 0), event.max_seconds)
|
|
1866
|
+
if actual_seconds != event.seconds:
|
|
1867
|
+
self.logger.info(f'🕒 Waiting for {actual_seconds} seconds (capped from {event.seconds}s)')
|
|
1868
|
+
else:
|
|
1869
|
+
self.logger.info(f'🕒 Waiting for {actual_seconds} seconds')
|
|
1870
|
+
|
|
1871
|
+
await asyncio.sleep(actual_seconds)
|
|
1872
|
+
except Exception as e:
|
|
1873
|
+
raise
|
|
1874
|
+
|
|
1875
|
+
async def _dispatch_key_event(self, cdp_session, event_type: str, key: str, modifiers: int = 0) -> None:
|
|
1876
|
+
"""Helper to dispatch a keyboard event with proper key codes."""
|
|
1877
|
+
code, vk_code = get_key_info(key)
|
|
1878
|
+
params: DispatchKeyEventParameters = {
|
|
1879
|
+
'type': event_type,
|
|
1880
|
+
'key': key,
|
|
1881
|
+
'code': code,
|
|
1882
|
+
}
|
|
1883
|
+
if modifiers:
|
|
1884
|
+
params['modifiers'] = modifiers
|
|
1885
|
+
if vk_code is not None:
|
|
1886
|
+
params['windowsVirtualKeyCode'] = vk_code
|
|
1887
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(params=params, session_id=cdp_session.session_id)
|
|
1888
|
+
|
|
1889
|
+
async def on_SendKeysEvent(self, event: SendKeysEvent) -> None:
|
|
1890
|
+
"""Handle send keys request with CDP."""
|
|
1891
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
|
|
1892
|
+
try:
|
|
1893
|
+
# Normalize key names from common aliases
|
|
1894
|
+
key_aliases = {
|
|
1895
|
+
'ctrl': 'Control',
|
|
1896
|
+
'control': 'Control',
|
|
1897
|
+
'alt': 'Alt',
|
|
1898
|
+
'option': 'Alt',
|
|
1899
|
+
'meta': 'Meta',
|
|
1900
|
+
'cmd': 'Meta',
|
|
1901
|
+
'command': 'Meta',
|
|
1902
|
+
'shift': 'Shift',
|
|
1903
|
+
'enter': 'Enter',
|
|
1904
|
+
'return': 'Enter',
|
|
1905
|
+
'tab': 'Tab',
|
|
1906
|
+
'delete': 'Delete',
|
|
1907
|
+
'backspace': 'Backspace',
|
|
1908
|
+
'escape': 'Escape',
|
|
1909
|
+
'esc': 'Escape',
|
|
1910
|
+
'space': ' ',
|
|
1911
|
+
'up': 'ArrowUp',
|
|
1912
|
+
'down': 'ArrowDown',
|
|
1913
|
+
'left': 'ArrowLeft',
|
|
1914
|
+
'right': 'ArrowRight',
|
|
1915
|
+
'pageup': 'PageUp',
|
|
1916
|
+
'pagedown': 'PageDown',
|
|
1917
|
+
'home': 'Home',
|
|
1918
|
+
'end': 'End',
|
|
1919
|
+
}
|
|
1920
|
+
|
|
1921
|
+
# Parse and normalize the key string
|
|
1922
|
+
keys = event.keys
|
|
1923
|
+
if '+' in keys:
|
|
1924
|
+
# Handle key combinations like "ctrl+a"
|
|
1925
|
+
parts = keys.split('+')
|
|
1926
|
+
normalized_parts = []
|
|
1927
|
+
for part in parts:
|
|
1928
|
+
part_lower = part.strip().lower()
|
|
1929
|
+
normalized = key_aliases.get(part_lower, part)
|
|
1930
|
+
normalized_parts.append(normalized)
|
|
1931
|
+
normalized_keys = '+'.join(normalized_parts)
|
|
1932
|
+
else:
|
|
1933
|
+
# Single key
|
|
1934
|
+
keys_lower = keys.strip().lower()
|
|
1935
|
+
normalized_keys = key_aliases.get(keys_lower, keys)
|
|
1936
|
+
|
|
1937
|
+
# Handle key combinations like "Control+A"
|
|
1938
|
+
if '+' in normalized_keys:
|
|
1939
|
+
parts = normalized_keys.split('+')
|
|
1940
|
+
modifiers = parts[:-1]
|
|
1941
|
+
main_key = parts[-1]
|
|
1942
|
+
|
|
1943
|
+
# Calculate modifier bitmask
|
|
1944
|
+
modifier_value = 0
|
|
1945
|
+
modifier_map = {'Alt': 1, 'Control': 2, 'Meta': 4, 'Shift': 8}
|
|
1946
|
+
for mod in modifiers:
|
|
1947
|
+
modifier_value |= modifier_map.get(mod, 0)
|
|
1948
|
+
|
|
1949
|
+
# Press modifier keys
|
|
1950
|
+
for mod in modifiers:
|
|
1951
|
+
await self._dispatch_key_event(cdp_session, 'keyDown', mod)
|
|
1952
|
+
|
|
1953
|
+
# Press main key with modifiers bitmask
|
|
1954
|
+
await self._dispatch_key_event(cdp_session, 'keyDown', main_key, modifier_value)
|
|
1955
|
+
|
|
1956
|
+
await self._dispatch_key_event(cdp_session, 'keyUp', main_key, modifier_value)
|
|
1957
|
+
|
|
1958
|
+
# Release modifier keys
|
|
1959
|
+
for mod in reversed(modifiers):
|
|
1960
|
+
await self._dispatch_key_event(cdp_session, 'keyUp', mod)
|
|
1961
|
+
else:
|
|
1962
|
+
# Check if this is a text string or special key
|
|
1963
|
+
special_keys = {
|
|
1964
|
+
'Enter',
|
|
1965
|
+
'Tab',
|
|
1966
|
+
'Delete',
|
|
1967
|
+
'Backspace',
|
|
1968
|
+
'Escape',
|
|
1969
|
+
'ArrowUp',
|
|
1970
|
+
'ArrowDown',
|
|
1971
|
+
'ArrowLeft',
|
|
1972
|
+
'ArrowRight',
|
|
1973
|
+
'PageUp',
|
|
1974
|
+
'PageDown',
|
|
1975
|
+
'Home',
|
|
1976
|
+
'End',
|
|
1977
|
+
'Control',
|
|
1978
|
+
'Alt',
|
|
1979
|
+
'Meta',
|
|
1980
|
+
'Shift',
|
|
1981
|
+
'F1',
|
|
1982
|
+
'F2',
|
|
1983
|
+
'F3',
|
|
1984
|
+
'F4',
|
|
1985
|
+
'F5',
|
|
1986
|
+
'F6',
|
|
1987
|
+
'F7',
|
|
1988
|
+
'F8',
|
|
1989
|
+
'F9',
|
|
1990
|
+
'F10',
|
|
1991
|
+
'F11',
|
|
1992
|
+
'F12',
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1995
|
+
# If it's a special key, use original logic
|
|
1996
|
+
if normalized_keys in special_keys:
|
|
1997
|
+
await self._dispatch_key_event(cdp_session, 'keyDown', normalized_keys)
|
|
1998
|
+
# For Enter key, also dispatch a char event to trigger keypress listeners
|
|
1999
|
+
if normalized_keys == 'Enter':
|
|
2000
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
2001
|
+
params={
|
|
2002
|
+
'type': 'char',
|
|
2003
|
+
'text': '\r',
|
|
2004
|
+
'key': 'Enter',
|
|
2005
|
+
},
|
|
2006
|
+
session_id=cdp_session.session_id,
|
|
2007
|
+
)
|
|
2008
|
+
await self._dispatch_key_event(cdp_session, 'keyUp', normalized_keys)
|
|
2009
|
+
else:
|
|
2010
|
+
# It's text (single character or string) - send each character as text input
|
|
2011
|
+
# This is crucial for text to appear in focused input fields
|
|
2012
|
+
for char in normalized_keys:
|
|
2013
|
+
# Special-case newline characters to dispatch as Enter
|
|
2014
|
+
if char in ('\n', '\r'):
|
|
2015
|
+
await self._dispatch_key_event(cdp_session, 'keyDown', 'Enter')
|
|
2016
|
+
await self._dispatch_key_event(cdp_session, 'keyUp', 'Enter')
|
|
2017
|
+
continue
|
|
2018
|
+
|
|
2019
|
+
# Get proper modifiers and key info for the character
|
|
2020
|
+
modifiers, vk_code, base_key = self._get_char_modifiers_and_vk(char)
|
|
2021
|
+
key_code = self._get_key_code_for_char(base_key)
|
|
2022
|
+
|
|
2023
|
+
# Send keyDown
|
|
2024
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
2025
|
+
params={
|
|
2026
|
+
'type': 'keyDown',
|
|
2027
|
+
'key': base_key,
|
|
2028
|
+
'code': key_code,
|
|
2029
|
+
'modifiers': modifiers,
|
|
2030
|
+
'windowsVirtualKeyCode': vk_code,
|
|
2031
|
+
},
|
|
2032
|
+
session_id=cdp_session.session_id,
|
|
2033
|
+
)
|
|
2034
|
+
|
|
2035
|
+
# Send char event with text - this is what makes text appear in input fields
|
|
2036
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
2037
|
+
params={
|
|
2038
|
+
'type': 'char',
|
|
2039
|
+
'text': char,
|
|
2040
|
+
'key': char,
|
|
2041
|
+
},
|
|
2042
|
+
session_id=cdp_session.session_id,
|
|
2043
|
+
)
|
|
2044
|
+
|
|
2045
|
+
# Send keyUp
|
|
2046
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
2047
|
+
params={
|
|
2048
|
+
'type': 'keyUp',
|
|
2049
|
+
'key': base_key,
|
|
2050
|
+
'code': key_code,
|
|
2051
|
+
'modifiers': modifiers,
|
|
2052
|
+
'windowsVirtualKeyCode': vk_code,
|
|
2053
|
+
},
|
|
2054
|
+
session_id=cdp_session.session_id,
|
|
2055
|
+
)
|
|
2056
|
+
|
|
2057
|
+
# Small delay between characters (18ms like _type_to_page)
|
|
2058
|
+
await asyncio.sleep(0.018)
|
|
2059
|
+
|
|
2060
|
+
self.logger.info(f'⌨️ Sent keys: {event.keys}')
|
|
2061
|
+
|
|
2062
|
+
# Note: We don't clear cached state on Enter; multi_act will detect DOM changes
|
|
2063
|
+
# and rebuild explicitly. We still wait briefly for potential navigation.
|
|
2064
|
+
if 'enter' in event.keys.lower() or 'return' in event.keys.lower():
|
|
2065
|
+
await asyncio.sleep(0.1)
|
|
2066
|
+
except Exception as e:
|
|
2067
|
+
raise
|
|
2068
|
+
|
|
2069
|
+
async def on_UploadFileEvent(self, event: UploadFileEvent) -> None:
|
|
2070
|
+
"""Handle file upload request with CDP."""
|
|
2071
|
+
try:
|
|
2072
|
+
# Use the provided node
|
|
2073
|
+
element_node = event.node
|
|
2074
|
+
index_for_logging = element_node.backend_node_id or 'unknown'
|
|
2075
|
+
|
|
2076
|
+
# Check if it's a file input
|
|
2077
|
+
if not self.browser_session.is_file_input(element_node):
|
|
2078
|
+
msg = f'Upload failed - element {index_for_logging} is not a file input.'
|
|
2079
|
+
raise BrowserError(message=msg, long_term_memory=msg)
|
|
2080
|
+
|
|
2081
|
+
# Get CDP client and session
|
|
2082
|
+
cdp_client = self.browser_session.cdp_client
|
|
2083
|
+
session_id = await self._get_session_id_for_element(element_node)
|
|
2084
|
+
|
|
2085
|
+
# Set file(s) to upload
|
|
2086
|
+
backend_node_id = element_node.backend_node_id
|
|
2087
|
+
await cdp_client.send.DOM.setFileInputFiles(
|
|
2088
|
+
params={
|
|
2089
|
+
'files': [event.file_path],
|
|
2090
|
+
'backendNodeId': backend_node_id,
|
|
2091
|
+
},
|
|
2092
|
+
session_id=session_id,
|
|
2093
|
+
)
|
|
2094
|
+
|
|
2095
|
+
self.logger.info(f'📎 Uploaded file {event.file_path} to element {index_for_logging}')
|
|
2096
|
+
except Exception as e:
|
|
2097
|
+
raise
|
|
2098
|
+
|
|
2099
|
+
async def on_ScrollToTextEvent(self, event: ScrollToTextEvent) -> None:
|
|
2100
|
+
"""Handle scroll to text request with CDP. Raises exception if text not found."""
|
|
2101
|
+
|
|
2102
|
+
# TODO: handle looking for text inside cross-origin iframes as well
|
|
2103
|
+
|
|
2104
|
+
# Get CDP client and session
|
|
2105
|
+
cdp_client = self.browser_session.cdp_client
|
|
2106
|
+
if self.browser_session.agent_focus is None:
|
|
2107
|
+
raise BrowserError('CDP session not initialized - browser may not be connected yet')
|
|
2108
|
+
session_id = self.browser_session.agent_focus.session_id
|
|
2109
|
+
|
|
2110
|
+
# Enable DOM
|
|
2111
|
+
await cdp_client.send.DOM.enable(session_id=session_id)
|
|
2112
|
+
|
|
2113
|
+
# Get document
|
|
2114
|
+
doc = await cdp_client.send.DOM.getDocument(params={'depth': -1}, session_id=session_id)
|
|
2115
|
+
root_node_id = doc['root']['nodeId']
|
|
2116
|
+
|
|
2117
|
+
# Search for text using XPath
|
|
2118
|
+
search_queries = [
|
|
2119
|
+
f'//*[contains(text(), "{event.text}")]',
|
|
2120
|
+
f'//*[contains(., "{event.text}")]',
|
|
2121
|
+
f'//*[@*[contains(., "{event.text}")]]',
|
|
2122
|
+
]
|
|
2123
|
+
|
|
2124
|
+
found = False
|
|
2125
|
+
for query in search_queries:
|
|
2126
|
+
try:
|
|
2127
|
+
# Perform search
|
|
2128
|
+
search_result = await cdp_client.send.DOM.performSearch(params={'query': query}, session_id=session_id)
|
|
2129
|
+
search_id = search_result['searchId']
|
|
2130
|
+
result_count = search_result['resultCount']
|
|
2131
|
+
|
|
2132
|
+
if result_count > 0:
|
|
2133
|
+
# Get the first match
|
|
2134
|
+
node_ids = await cdp_client.send.DOM.getSearchResults(
|
|
2135
|
+
params={'searchId': search_id, 'fromIndex': 0, 'toIndex': 1},
|
|
2136
|
+
session_id=session_id,
|
|
2137
|
+
)
|
|
2138
|
+
|
|
2139
|
+
if node_ids['nodeIds']:
|
|
2140
|
+
node_id = node_ids['nodeIds'][0]
|
|
2141
|
+
|
|
2142
|
+
# Scroll the element into view
|
|
2143
|
+
await cdp_client.send.DOM.scrollIntoViewIfNeeded(params={'nodeId': node_id}, session_id=session_id)
|
|
2144
|
+
|
|
2145
|
+
found = True
|
|
2146
|
+
self.logger.debug(f'📜 Scrolled to text: "{event.text}"')
|
|
2147
|
+
break
|
|
2148
|
+
|
|
2149
|
+
# Clean up search
|
|
2150
|
+
await cdp_client.send.DOM.discardSearchResults(params={'searchId': search_id}, session_id=session_id)
|
|
2151
|
+
except Exception as e:
|
|
2152
|
+
self.logger.debug(f'Search query failed: {query}, error: {e}')
|
|
2153
|
+
continue
|
|
2154
|
+
|
|
2155
|
+
if not found:
|
|
2156
|
+
# Fallback: Try JavaScript search
|
|
2157
|
+
js_result = await cdp_client.send.Runtime.evaluate(
|
|
2158
|
+
params={
|
|
2159
|
+
'expression': f'''
|
|
2160
|
+
(() => {{
|
|
2161
|
+
const walker = document.createTreeWalker(
|
|
2162
|
+
document.body,
|
|
2163
|
+
NodeFilter.SHOW_TEXT,
|
|
2164
|
+
null,
|
|
2165
|
+
false
|
|
2166
|
+
);
|
|
2167
|
+
let node;
|
|
2168
|
+
while (node = walker.nextNode()) {{
|
|
2169
|
+
if (node.textContent.includes("{event.text}")) {{
|
|
2170
|
+
node.parentElement.scrollIntoView({{behavior: 'smooth', block: 'center'}});
|
|
2171
|
+
return true;
|
|
2172
|
+
}}
|
|
2173
|
+
}}
|
|
2174
|
+
return false;
|
|
2175
|
+
}})()
|
|
2176
|
+
'''
|
|
2177
|
+
},
|
|
2178
|
+
session_id=session_id,
|
|
2179
|
+
)
|
|
2180
|
+
|
|
2181
|
+
if js_result.get('result', {}).get('value'):
|
|
2182
|
+
self.logger.debug(f'📜 Scrolled to text: "{event.text}" (via JS)')
|
|
2183
|
+
return None
|
|
2184
|
+
else:
|
|
2185
|
+
self.logger.warning(f'⚠️ Text not found: "{event.text}"')
|
|
2186
|
+
raise BrowserError(f'Text not found: "{event.text}"', details={'text': event.text})
|
|
2187
|
+
|
|
2188
|
+
# If we got here and found is True, return None (success)
|
|
2189
|
+
if found:
|
|
2190
|
+
return None
|
|
2191
|
+
else:
|
|
2192
|
+
raise BrowserError(f'Text not found: "{event.text}"', details={'text': event.text})
|
|
2193
|
+
|
|
2194
|
+
async def on_GetDropdownOptionsEvent(self, event: GetDropdownOptionsEvent) -> dict[str, str]:
|
|
2195
|
+
"""Handle get dropdown options request with CDP."""
|
|
2196
|
+
try:
|
|
2197
|
+
# Use the provided node
|
|
2198
|
+
element_node = event.node
|
|
2199
|
+
index_for_logging = element_node.backend_node_id or 'unknown'
|
|
2200
|
+
|
|
2201
|
+
# Get CDP session for this node
|
|
2202
|
+
cdp_session = await self.browser_session.cdp_client_for_node(element_node)
|
|
2203
|
+
|
|
2204
|
+
# Convert node to object ID for CDP operations
|
|
2205
|
+
try:
|
|
2206
|
+
object_result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
2207
|
+
params={'backendNodeId': element_node.backend_node_id}, session_id=cdp_session.session_id
|
|
2208
|
+
)
|
|
2209
|
+
remote_object = object_result.get('object', {})
|
|
2210
|
+
object_id = remote_object.get('objectId')
|
|
2211
|
+
if not object_id:
|
|
2212
|
+
raise ValueError('Could not get object ID from resolved node')
|
|
2213
|
+
except Exception as e:
|
|
2214
|
+
raise ValueError(f'Failed to resolve node to object: {e}') from e
|
|
2215
|
+
|
|
2216
|
+
# Use JavaScript to extract dropdown options
|
|
2217
|
+
options_script = """
|
|
2218
|
+
function() {
|
|
2219
|
+
const startElement = this;
|
|
2220
|
+
|
|
2221
|
+
// Function to check if an element is a dropdown and extract options
|
|
2222
|
+
function checkDropdownElement(element) {
|
|
2223
|
+
// Check if it's a native select element
|
|
2224
|
+
if (element.tagName.toLowerCase() === 'select') {
|
|
2225
|
+
return {
|
|
2226
|
+
type: 'select',
|
|
2227
|
+
options: Array.from(element.options).map((opt, idx) => ({
|
|
2228
|
+
text: opt.text.trim(),
|
|
2229
|
+
value: opt.value,
|
|
2230
|
+
index: idx,
|
|
2231
|
+
selected: opt.selected
|
|
2232
|
+
})),
|
|
2233
|
+
id: element.id || '',
|
|
2234
|
+
name: element.name || '',
|
|
2235
|
+
source: 'target'
|
|
2236
|
+
};
|
|
2237
|
+
}
|
|
2238
|
+
|
|
2239
|
+
// Check if it's an ARIA dropdown/menu
|
|
2240
|
+
const role = element.getAttribute('role');
|
|
2241
|
+
if (role === 'menu' || role === 'listbox' || role === 'combobox') {
|
|
2242
|
+
// Find all menu items/options
|
|
2243
|
+
const menuItems = element.querySelectorAll('[role="menuitem"], [role="option"]');
|
|
2244
|
+
const options = [];
|
|
2245
|
+
|
|
2246
|
+
menuItems.forEach((item, idx) => {
|
|
2247
|
+
const text = item.textContent ? item.textContent.trim() : '';
|
|
2248
|
+
if (text) {
|
|
2249
|
+
options.push({
|
|
2250
|
+
text: text,
|
|
2251
|
+
value: item.getAttribute('data-value') || text,
|
|
2252
|
+
index: idx,
|
|
2253
|
+
selected: item.getAttribute('aria-selected') === 'true' || item.classList.contains('selected')
|
|
2254
|
+
});
|
|
2255
|
+
}
|
|
2256
|
+
});
|
|
2257
|
+
|
|
2258
|
+
return {
|
|
2259
|
+
type: 'aria',
|
|
2260
|
+
options: options,
|
|
2261
|
+
id: element.id || '',
|
|
2262
|
+
name: element.getAttribute('aria-label') || '',
|
|
2263
|
+
source: 'target'
|
|
2264
|
+
};
|
|
2265
|
+
}
|
|
2266
|
+
|
|
2267
|
+
// Check if it's a Semantic UI dropdown or similar
|
|
2268
|
+
if (element.classList.contains('dropdown') || element.classList.contains('ui')) {
|
|
2269
|
+
const menuItems = element.querySelectorAll('.item, .option, [data-value]');
|
|
2270
|
+
const options = [];
|
|
2271
|
+
|
|
2272
|
+
menuItems.forEach((item, idx) => {
|
|
2273
|
+
const text = item.textContent ? item.textContent.trim() : '';
|
|
2274
|
+
if (text) {
|
|
2275
|
+
options.push({
|
|
2276
|
+
text: text,
|
|
2277
|
+
value: item.getAttribute('data-value') || text,
|
|
2278
|
+
index: idx,
|
|
2279
|
+
selected: item.classList.contains('selected') || item.classList.contains('active')
|
|
2280
|
+
});
|
|
2281
|
+
}
|
|
2282
|
+
});
|
|
2283
|
+
|
|
2284
|
+
if (options.length > 0) {
|
|
2285
|
+
return {
|
|
2286
|
+
type: 'custom',
|
|
2287
|
+
options: options,
|
|
2288
|
+
id: element.id || '',
|
|
2289
|
+
name: element.getAttribute('aria-label') || '',
|
|
2290
|
+
source: 'target'
|
|
2291
|
+
};
|
|
2292
|
+
}
|
|
2293
|
+
}
|
|
2294
|
+
|
|
2295
|
+
return null;
|
|
2296
|
+
}
|
|
2297
|
+
|
|
2298
|
+
// Function to recursively search children up to specified depth
|
|
2299
|
+
function searchChildrenForDropdowns(element, maxDepth, currentDepth = 0) {
|
|
2300
|
+
if (currentDepth >= maxDepth) return null;
|
|
2301
|
+
|
|
2302
|
+
// Check all direct children
|
|
2303
|
+
for (let child of element.children) {
|
|
2304
|
+
// Check if this child is a dropdown
|
|
2305
|
+
const result = checkDropdownElement(child);
|
|
2306
|
+
if (result) {
|
|
2307
|
+
result.source = `child-depth-${currentDepth + 1}`;
|
|
2308
|
+
return result;
|
|
2309
|
+
}
|
|
2310
|
+
|
|
2311
|
+
// Recursively check this child's children
|
|
2312
|
+
const childResult = searchChildrenForDropdowns(child, maxDepth, currentDepth + 1);
|
|
2313
|
+
if (childResult) {
|
|
2314
|
+
return childResult;
|
|
2315
|
+
}
|
|
2316
|
+
}
|
|
2317
|
+
|
|
2318
|
+
return null;
|
|
2319
|
+
}
|
|
2320
|
+
|
|
2321
|
+
// First check the target element itself
|
|
2322
|
+
let dropdownResult = checkDropdownElement(startElement);
|
|
2323
|
+
if (dropdownResult) {
|
|
2324
|
+
return dropdownResult;
|
|
2325
|
+
}
|
|
2326
|
+
|
|
2327
|
+
// If target element is not a dropdown, search children up to depth 4
|
|
2328
|
+
dropdownResult = searchChildrenForDropdowns(startElement, 4);
|
|
2329
|
+
if (dropdownResult) {
|
|
2330
|
+
return dropdownResult;
|
|
2331
|
+
}
|
|
2332
|
+
|
|
2333
|
+
return {
|
|
2334
|
+
error: `Element and its children (depth 4) are not recognizable dropdown types (tag: ${startElement.tagName}, role: ${startElement.getAttribute('role')}, classes: ${startElement.className})`
|
|
2335
|
+
};
|
|
2336
|
+
}
|
|
2337
|
+
"""
|
|
2338
|
+
|
|
2339
|
+
result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
2340
|
+
params={
|
|
2341
|
+
'functionDeclaration': options_script,
|
|
2342
|
+
'objectId': object_id,
|
|
2343
|
+
'returnByValue': True,
|
|
2344
|
+
},
|
|
2345
|
+
session_id=cdp_session.session_id,
|
|
2346
|
+
)
|
|
2347
|
+
|
|
2348
|
+
dropdown_data = result.get('result', {}).get('value', {})
|
|
2349
|
+
|
|
2350
|
+
if dropdown_data.get('error'):
|
|
2351
|
+
raise BrowserError(message=dropdown_data['error'], long_term_memory=dropdown_data['error'])
|
|
2352
|
+
|
|
2353
|
+
if not dropdown_data.get('options'):
|
|
2354
|
+
msg = f'No options found in dropdown at index {index_for_logging}'
|
|
2355
|
+
return {
|
|
2356
|
+
'error': msg,
|
|
2357
|
+
'short_term_memory': msg,
|
|
2358
|
+
'long_term_memory': msg,
|
|
2359
|
+
'backend_node_id': str(index_for_logging),
|
|
2360
|
+
}
|
|
2361
|
+
|
|
2362
|
+
# Format options for display
|
|
2363
|
+
formatted_options = []
|
|
2364
|
+
for opt in dropdown_data['options']:
|
|
2365
|
+
# Use JSON encoding to ensure exact string matching
|
|
2366
|
+
encoded_text = json.dumps(opt['text'])
|
|
2367
|
+
status = ' (selected)' if opt.get('selected') else ''
|
|
2368
|
+
formatted_options.append(f'{opt["index"]}: text={encoded_text}, value={json.dumps(opt["value"])}{status}')
|
|
2369
|
+
|
|
2370
|
+
dropdown_type = dropdown_data.get('type', 'select')
|
|
2371
|
+
element_info = f'Index: {index_for_logging}, Type: {dropdown_type}, ID: {dropdown_data.get("id", "none")}, Name: {dropdown_data.get("name", "none")}'
|
|
2372
|
+
source_info = dropdown_data.get('source', 'unknown')
|
|
2373
|
+
|
|
2374
|
+
if source_info == 'target':
|
|
2375
|
+
msg = f'Found {dropdown_type} dropdown ({element_info}):\n' + '\n'.join(formatted_options)
|
|
2376
|
+
else:
|
|
2377
|
+
msg = f'Found {dropdown_type} dropdown in {source_info} ({element_info}):\n' + '\n'.join(formatted_options)
|
|
2378
|
+
msg += (
|
|
2379
|
+
f'\n\nUse the exact text or value string (without quotes) in select_dropdown(index={index_for_logging}, text=...)'
|
|
2380
|
+
)
|
|
2381
|
+
|
|
2382
|
+
if source_info == 'target':
|
|
2383
|
+
self.logger.info(f'📋 Found {len(dropdown_data["options"])} dropdown options for index {index_for_logging}')
|
|
2384
|
+
else:
|
|
2385
|
+
self.logger.info(
|
|
2386
|
+
f'📋 Found {len(dropdown_data["options"])} dropdown options for index {index_for_logging} in {source_info}'
|
|
2387
|
+
)
|
|
2388
|
+
|
|
2389
|
+
# Create structured memory for the response
|
|
2390
|
+
short_term_memory = msg
|
|
2391
|
+
long_term_memory = f'Got dropdown options for index {index_for_logging}'
|
|
2392
|
+
|
|
2393
|
+
# Return the dropdown data as a dict with structured memory
|
|
2394
|
+
return {
|
|
2395
|
+
'type': dropdown_type,
|
|
2396
|
+
'options': json.dumps(dropdown_data['options']), # Convert list to JSON string for dict[str, str] type
|
|
2397
|
+
'element_info': element_info,
|
|
2398
|
+
'source': source_info,
|
|
2399
|
+
'formatted_options': '\n'.join(formatted_options),
|
|
2400
|
+
'message': msg,
|
|
2401
|
+
'short_term_memory': short_term_memory,
|
|
2402
|
+
'long_term_memory': long_term_memory,
|
|
2403
|
+
'backend_node_id': str(index_for_logging),
|
|
2404
|
+
}
|
|
2405
|
+
|
|
2406
|
+
except BrowserError:
|
|
2407
|
+
# Re-raise BrowserError as-is to preserve structured memory
|
|
2408
|
+
raise
|
|
2409
|
+
except TimeoutError:
|
|
2410
|
+
msg = f'Failed to get dropdown options for index {index_for_logging} due to timeout.'
|
|
2411
|
+
self.logger.error(msg)
|
|
2412
|
+
raise BrowserError(message=msg, long_term_memory=msg)
|
|
2413
|
+
except Exception as e:
|
|
2414
|
+
msg = 'Failed to get dropdown options'
|
|
2415
|
+
error_msg = f'{msg}: {str(e)}'
|
|
2416
|
+
self.logger.error(error_msg)
|
|
2417
|
+
raise BrowserError(
|
|
2418
|
+
message=error_msg, long_term_memory=f'Failed to get dropdown options for index {index_for_logging}.'
|
|
2419
|
+
)
|
|
2420
|
+
|
|
2421
|
+
async def on_SelectDropdownOptionEvent(self, event: SelectDropdownOptionEvent) -> dict[str, str]:
|
|
2422
|
+
"""Handle select dropdown option request with CDP."""
|
|
2423
|
+
try:
|
|
2424
|
+
# Use the provided node
|
|
2425
|
+
element_node = event.node
|
|
2426
|
+
index_for_logging = element_node.backend_node_id or 'unknown'
|
|
2427
|
+
target_text = event.text
|
|
2428
|
+
|
|
2429
|
+
# Get CDP session for this node
|
|
2430
|
+
cdp_session = await self.browser_session.cdp_client_for_node(element_node)
|
|
2431
|
+
|
|
2432
|
+
# Convert node to object ID for CDP operations
|
|
2433
|
+
try:
|
|
2434
|
+
object_result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
2435
|
+
params={'backendNodeId': element_node.backend_node_id}, session_id=cdp_session.session_id
|
|
2436
|
+
)
|
|
2437
|
+
remote_object = object_result.get('object', {})
|
|
2438
|
+
object_id = remote_object.get('objectId')
|
|
2439
|
+
if not object_id:
|
|
2440
|
+
raise ValueError('Could not get object ID from resolved node')
|
|
2441
|
+
except Exception as e:
|
|
2442
|
+
raise ValueError(f'Failed to resolve node to object: {e}') from e
|
|
2443
|
+
|
|
2444
|
+
try:
|
|
2445
|
+
# Use JavaScript to select the option
|
|
2446
|
+
selection_script = """
|
|
2447
|
+
function(targetText) {
|
|
2448
|
+
const startElement = this;
|
|
2449
|
+
|
|
2450
|
+
// Function to attempt selection on a dropdown element
|
|
2451
|
+
function attemptSelection(element) {
|
|
2452
|
+
// Handle native select elements
|
|
2453
|
+
if (element.tagName.toLowerCase() === 'select') {
|
|
2454
|
+
const options = Array.from(element.options);
|
|
2455
|
+
const targetTextLower = targetText.toLowerCase();
|
|
2456
|
+
|
|
2457
|
+
for (const option of options) {
|
|
2458
|
+
const optionTextLower = option.text.trim().toLowerCase();
|
|
2459
|
+
const optionValueLower = option.value.toLowerCase();
|
|
2460
|
+
|
|
2461
|
+
// Match against both text and value (case-insensitive)
|
|
2462
|
+
if (optionTextLower === targetTextLower || optionValueLower === targetTextLower) {
|
|
2463
|
+
// Focus the element FIRST (important for Svelte/Vue/React and other reactive frameworks)
|
|
2464
|
+
// This simulates the user focusing on the dropdown before changing it
|
|
2465
|
+
element.focus();
|
|
2466
|
+
|
|
2467
|
+
// Then set the value
|
|
2468
|
+
element.value = option.value;
|
|
2469
|
+
option.selected = true;
|
|
2470
|
+
|
|
2471
|
+
// Trigger all necessary events for reactive frameworks
|
|
2472
|
+
// 1. input event - critical for Vue's v-model and Svelte's bind:value
|
|
2473
|
+
const inputEvent = new Event('input', { bubbles: true, cancelable: true });
|
|
2474
|
+
element.dispatchEvent(inputEvent);
|
|
2475
|
+
|
|
2476
|
+
// 2. change event - traditional form validation and framework reactivity
|
|
2477
|
+
const changeEvent = new Event('change', { bubbles: true, cancelable: true });
|
|
2478
|
+
element.dispatchEvent(changeEvent);
|
|
2479
|
+
|
|
2480
|
+
// 3. blur event - completes the interaction, triggers validation
|
|
2481
|
+
element.blur();
|
|
2482
|
+
|
|
2483
|
+
return {
|
|
2484
|
+
success: true,
|
|
2485
|
+
message: `Selected option: ${option.text.trim()} (value: ${option.value})`,
|
|
2486
|
+
value: option.value
|
|
2487
|
+
};
|
|
2488
|
+
}
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
// Return available options as separate field
|
|
2492
|
+
const availableOptions = options.map(opt => ({
|
|
2493
|
+
text: opt.text.trim(),
|
|
2494
|
+
value: opt.value
|
|
2495
|
+
}));
|
|
2496
|
+
|
|
2497
|
+
return {
|
|
2498
|
+
success: false,
|
|
2499
|
+
error: `Option with text or value '${targetText}' not found in select element`,
|
|
2500
|
+
availableOptions: availableOptions
|
|
2501
|
+
};
|
|
2502
|
+
}
|
|
2503
|
+
|
|
2504
|
+
// Handle ARIA dropdowns/menus
|
|
2505
|
+
const role = element.getAttribute('role');
|
|
2506
|
+
if (role === 'menu' || role === 'listbox' || role === 'combobox') {
|
|
2507
|
+
const menuItems = element.querySelectorAll('[role="menuitem"], [role="option"]');
|
|
2508
|
+
const targetTextLower = targetText.toLowerCase();
|
|
2509
|
+
|
|
2510
|
+
for (const item of menuItems) {
|
|
2511
|
+
if (item.textContent) {
|
|
2512
|
+
const itemTextLower = item.textContent.trim().toLowerCase();
|
|
2513
|
+
const itemValueLower = (item.getAttribute('data-value') || '').toLowerCase();
|
|
2514
|
+
|
|
2515
|
+
// Match against both text and data-value (case-insensitive)
|
|
2516
|
+
if (itemTextLower === targetTextLower || itemValueLower === targetTextLower) {
|
|
2517
|
+
// Clear previous selections
|
|
2518
|
+
menuItems.forEach(mi => {
|
|
2519
|
+
mi.setAttribute('aria-selected', 'false');
|
|
2520
|
+
mi.classList.remove('selected');
|
|
2521
|
+
});
|
|
2522
|
+
|
|
2523
|
+
// Select this item
|
|
2524
|
+
item.setAttribute('aria-selected', 'true');
|
|
2525
|
+
item.classList.add('selected');
|
|
2526
|
+
|
|
2527
|
+
// Trigger click and change events
|
|
2528
|
+
item.click();
|
|
2529
|
+
const clickEvent = new MouseEvent('click', { view: window, bubbles: true, cancelable: true });
|
|
2530
|
+
item.dispatchEvent(clickEvent);
|
|
2531
|
+
|
|
2532
|
+
return {
|
|
2533
|
+
success: true,
|
|
2534
|
+
message: `Selected ARIA menu item: ${item.textContent.trim()}`
|
|
2535
|
+
};
|
|
2536
|
+
}
|
|
2537
|
+
}
|
|
2538
|
+
}
|
|
2539
|
+
|
|
2540
|
+
// Return available options as separate field
|
|
2541
|
+
const availableOptions = Array.from(menuItems).map(item => ({
|
|
2542
|
+
text: item.textContent ? item.textContent.trim() : '',
|
|
2543
|
+
value: item.getAttribute('data-value') || ''
|
|
2544
|
+
})).filter(opt => opt.text || opt.value);
|
|
2545
|
+
|
|
2546
|
+
return {
|
|
2547
|
+
success: false,
|
|
2548
|
+
error: `Menu item with text or value '${targetText}' not found`,
|
|
2549
|
+
availableOptions: availableOptions
|
|
2550
|
+
};
|
|
2551
|
+
}
|
|
2552
|
+
|
|
2553
|
+
// Handle Semantic UI or custom dropdowns
|
|
2554
|
+
if (element.classList.contains('dropdown') || element.classList.contains('ui')) {
|
|
2555
|
+
const menuItems = element.querySelectorAll('.item, .option, [data-value]');
|
|
2556
|
+
const targetTextLower = targetText.toLowerCase();
|
|
2557
|
+
|
|
2558
|
+
for (const item of menuItems) {
|
|
2559
|
+
if (item.textContent) {
|
|
2560
|
+
const itemTextLower = item.textContent.trim().toLowerCase();
|
|
2561
|
+
const itemValueLower = (item.getAttribute('data-value') || '').toLowerCase();
|
|
2562
|
+
|
|
2563
|
+
// Match against both text and data-value (case-insensitive)
|
|
2564
|
+
if (itemTextLower === targetTextLower || itemValueLower === targetTextLower) {
|
|
2565
|
+
// Clear previous selections
|
|
2566
|
+
menuItems.forEach(mi => {
|
|
2567
|
+
mi.classList.remove('selected', 'active');
|
|
2568
|
+
});
|
|
2569
|
+
|
|
2570
|
+
// Select this item
|
|
2571
|
+
item.classList.add('selected', 'active');
|
|
2572
|
+
|
|
2573
|
+
// Update dropdown text if there's a text element
|
|
2574
|
+
const textElement = element.querySelector('.text');
|
|
2575
|
+
if (textElement) {
|
|
2576
|
+
textElement.textContent = item.textContent.trim();
|
|
2577
|
+
}
|
|
2578
|
+
|
|
2579
|
+
// Trigger click and change events
|
|
2580
|
+
item.click();
|
|
2581
|
+
const clickEvent = new MouseEvent('click', { view: window, bubbles: true, cancelable: true });
|
|
2582
|
+
item.dispatchEvent(clickEvent);
|
|
2583
|
+
|
|
2584
|
+
// Also dispatch on the main dropdown element
|
|
2585
|
+
const dropdownChangeEvent = new Event('change', { bubbles: true });
|
|
2586
|
+
element.dispatchEvent(dropdownChangeEvent);
|
|
2587
|
+
|
|
2588
|
+
return {
|
|
2589
|
+
success: true,
|
|
2590
|
+
message: `Selected custom dropdown item: ${item.textContent.trim()}`
|
|
2591
|
+
};
|
|
2592
|
+
}
|
|
2593
|
+
}
|
|
2594
|
+
}
|
|
2595
|
+
|
|
2596
|
+
// Return available options as separate field
|
|
2597
|
+
const availableOptions = Array.from(menuItems).map(item => ({
|
|
2598
|
+
text: item.textContent ? item.textContent.trim() : '',
|
|
2599
|
+
value: item.getAttribute('data-value') || ''
|
|
2600
|
+
})).filter(opt => opt.text || opt.value);
|
|
2601
|
+
|
|
2602
|
+
return {
|
|
2603
|
+
success: false,
|
|
2604
|
+
error: `Custom dropdown item with text or value '${targetText}' not found`,
|
|
2605
|
+
availableOptions: availableOptions
|
|
2606
|
+
};
|
|
2607
|
+
}
|
|
2608
|
+
|
|
2609
|
+
return null; // Not a dropdown element
|
|
2610
|
+
}
|
|
2611
|
+
|
|
2612
|
+
// Function to recursively search children for dropdowns
|
|
2613
|
+
function searchChildrenForSelection(element, maxDepth, currentDepth = 0) {
|
|
2614
|
+
if (currentDepth >= maxDepth) return null;
|
|
2615
|
+
|
|
2616
|
+
// Check all direct children
|
|
2617
|
+
for (let child of element.children) {
|
|
2618
|
+
// Try selection on this child
|
|
2619
|
+
const result = attemptSelection(child);
|
|
2620
|
+
if (result && result.success) {
|
|
2621
|
+
return result;
|
|
2622
|
+
}
|
|
2623
|
+
|
|
2624
|
+
// Recursively check this child's children
|
|
2625
|
+
const childResult = searchChildrenForSelection(child, maxDepth, currentDepth + 1);
|
|
2626
|
+
if (childResult && childResult.success) {
|
|
2627
|
+
return childResult;
|
|
2628
|
+
}
|
|
2629
|
+
}
|
|
2630
|
+
|
|
2631
|
+
return null;
|
|
2632
|
+
}
|
|
2633
|
+
|
|
2634
|
+
// First try the target element itself
|
|
2635
|
+
let selectionResult = attemptSelection(startElement);
|
|
2636
|
+
if (selectionResult) {
|
|
2637
|
+
// If attemptSelection returned a result (success or failure), use it
|
|
2638
|
+
// Don't search children if we found a dropdown element but selection failed
|
|
2639
|
+
return selectionResult;
|
|
2640
|
+
}
|
|
2641
|
+
|
|
2642
|
+
// Only search children if target element is not a dropdown element
|
|
2643
|
+
selectionResult = searchChildrenForSelection(startElement, 4);
|
|
2644
|
+
if (selectionResult && selectionResult.success) {
|
|
2645
|
+
return selectionResult;
|
|
2646
|
+
}
|
|
2647
|
+
|
|
2648
|
+
return {
|
|
2649
|
+
success: false,
|
|
2650
|
+
error: `Element and its children (depth 4) do not contain a dropdown with option '${targetText}' (tag: ${startElement.tagName}, role: ${startElement.getAttribute('role')}, classes: ${startElement.className})`
|
|
2651
|
+
};
|
|
2652
|
+
}
|
|
2653
|
+
"""
|
|
2654
|
+
|
|
2655
|
+
result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
2656
|
+
params={
|
|
2657
|
+
'functionDeclaration': selection_script,
|
|
2658
|
+
'arguments': [{'value': target_text}],
|
|
2659
|
+
'objectId': object_id,
|
|
2660
|
+
'returnByValue': True,
|
|
2661
|
+
},
|
|
2662
|
+
session_id=cdp_session.session_id,
|
|
2663
|
+
)
|
|
2664
|
+
|
|
2665
|
+
selection_result = result.get('result', {}).get('value', {})
|
|
2666
|
+
|
|
2667
|
+
if selection_result.get('success'):
|
|
2668
|
+
msg = selection_result.get('message', f'Selected option: {target_text}')
|
|
2669
|
+
self.logger.debug(f'{msg}')
|
|
2670
|
+
|
|
2671
|
+
# Return the result as a dict
|
|
2672
|
+
return {
|
|
2673
|
+
'success': 'true',
|
|
2674
|
+
'message': msg,
|
|
2675
|
+
'value': selection_result.get('value', target_text),
|
|
2676
|
+
'backend_node_id': str(index_for_logging),
|
|
2677
|
+
}
|
|
2678
|
+
else:
|
|
2679
|
+
error_msg = selection_result.get('error', f'Failed to select option: {target_text}')
|
|
2680
|
+
available_options = selection_result.get('availableOptions', [])
|
|
2681
|
+
self.logger.error(f'❌ {error_msg}')
|
|
2682
|
+
self.logger.debug(f'Available options from JavaScript: {available_options}')
|
|
2683
|
+
|
|
2684
|
+
# If we have available options, return structured error data
|
|
2685
|
+
if available_options:
|
|
2686
|
+
# Format options for short_term_memory (simple bulleted list)
|
|
2687
|
+
short_term_options = []
|
|
2688
|
+
for opt in available_options:
|
|
2689
|
+
if isinstance(opt, dict):
|
|
2690
|
+
text = opt.get('text', '').strip()
|
|
2691
|
+
value = opt.get('value', '').strip()
|
|
2692
|
+
if text:
|
|
2693
|
+
short_term_options.append(f'- {text}')
|
|
2694
|
+
elif value:
|
|
2695
|
+
short_term_options.append(f'- {value}')
|
|
2696
|
+
elif isinstance(opt, str):
|
|
2697
|
+
short_term_options.append(f'- {opt}')
|
|
2698
|
+
|
|
2699
|
+
if short_term_options:
|
|
2700
|
+
short_term_memory = 'Available dropdown options are:\n' + '\n'.join(short_term_options)
|
|
2701
|
+
long_term_memory = (
|
|
2702
|
+
f"Couldn't select the dropdown option as '{target_text}' is not one of the available options."
|
|
2703
|
+
)
|
|
2704
|
+
|
|
2705
|
+
# Return error result with structured memory instead of raising exception
|
|
2706
|
+
return {
|
|
2707
|
+
'success': 'false',
|
|
2708
|
+
'error': error_msg,
|
|
2709
|
+
'short_term_memory': short_term_memory,
|
|
2710
|
+
'long_term_memory': long_term_memory,
|
|
2711
|
+
'backend_node_id': str(index_for_logging),
|
|
2712
|
+
}
|
|
2713
|
+
|
|
2714
|
+
# Fallback to regular error result if no available options
|
|
2715
|
+
return {
|
|
2716
|
+
'success': 'false',
|
|
2717
|
+
'error': error_msg,
|
|
2718
|
+
'backend_node_id': str(index_for_logging),
|
|
2719
|
+
}
|
|
2720
|
+
|
|
2721
|
+
except Exception as e:
|
|
2722
|
+
error_msg = f'Failed to select dropdown option: {str(e)}'
|
|
2723
|
+
self.logger.error(error_msg)
|
|
2724
|
+
raise ValueError(error_msg) from e
|
|
2725
|
+
|
|
2726
|
+
except Exception as e:
|
|
2727
|
+
error_msg = f'Failed to select dropdown option "{target_text}" for element {index_for_logging}: {str(e)}'
|
|
2728
|
+
self.logger.error(error_msg)
|
|
2729
|
+
raise ValueError(error_msg) from e
|