optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,2729 @@
1
+ """Default browser action handlers using CDP."""
2
+
3
+ import asyncio
4
+ import json
5
+
6
+ from cdp_use.cdp.input.commands import DispatchKeyEventParameters
7
+
8
+ from browser_use.actor.utils import get_key_info
9
+ from browser_use.browser.events import (
10
+ ClickElementEvent,
11
+ GetDropdownOptionsEvent,
12
+ GoBackEvent,
13
+ GoForwardEvent,
14
+ RefreshEvent,
15
+ ScrollEvent,
16
+ ScrollToTextEvent,
17
+ SelectDropdownOptionEvent,
18
+ SendKeysEvent,
19
+ TypeTextEvent,
20
+ UploadFileEvent,
21
+ WaitEvent,
22
+ )
23
+ from browser_use.browser.views import BrowserError, URLNotAllowedError
24
+ from browser_use.browser.watchdog_base import BaseWatchdog
25
+ from browser_use.dom.service import EnhancedDOMTreeNode
26
+ from browser_use.observability import observe_debug
27
+
28
+ # Import EnhancedDOMTreeNode and rebuild event models that have forward references to it
29
+ # This must be done after all imports are complete
30
+ ClickElementEvent.model_rebuild()
31
+ GetDropdownOptionsEvent.model_rebuild()
32
+ SelectDropdownOptionEvent.model_rebuild()
33
+ TypeTextEvent.model_rebuild()
34
+ ScrollEvent.model_rebuild()
35
+ UploadFileEvent.model_rebuild()
36
+
37
+
38
+ class DefaultActionWatchdog(BaseWatchdog):
39
+ """Handles default browser actions like click, type, and scroll using CDP."""
40
+
41
+ def _is_print_related_element(self, element_node: EnhancedDOMTreeNode) -> bool:
42
+ """Check if an element is related to printing (print buttons, print dialogs, etc.).
43
+
44
+ Primary check: onclick attribute (most reliable for print detection)
45
+ Fallback: button text/value (for cases without onclick)
46
+ """
47
+ # Primary: Check onclick attribute for print-related functions (most reliable)
48
+ onclick = element_node.attributes.get('onclick', '').lower() if element_node.attributes else ''
49
+ if onclick and 'print' in onclick:
50
+ # Matches: window.print(), PrintElem(), print(), etc.
51
+ return True
52
+
53
+ return False
54
+
55
+ async def _handle_print_button_click(self, element_node: EnhancedDOMTreeNode) -> dict | None:
56
+ """Handle print button by directly generating PDF via CDP instead of opening dialog.
57
+
58
+ Returns:
59
+ Metadata dict with download path if successful, None otherwise
60
+ """
61
+ try:
62
+ import base64
63
+ import os
64
+ from pathlib import Path
65
+
66
+ # Get CDP session
67
+ cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
68
+
69
+ # Generate PDF using CDP Page.printToPDF
70
+ result = await asyncio.wait_for(
71
+ cdp_session.cdp_client.send.Page.printToPDF(
72
+ params={
73
+ 'printBackground': True,
74
+ 'preferCSSPageSize': True,
75
+ },
76
+ session_id=cdp_session.session_id,
77
+ ),
78
+ timeout=15.0, # 15 second timeout for PDF generation
79
+ )
80
+
81
+ pdf_data = result.get('data')
82
+ if not pdf_data:
83
+ self.logger.warning('⚠️ PDF generation returned no data')
84
+ return None
85
+
86
+ # Decode base64 PDF data
87
+ pdf_bytes = base64.b64decode(pdf_data)
88
+
89
+ # Get downloads path
90
+ downloads_path = self.browser_session.browser_profile.downloads_path
91
+ if not downloads_path:
92
+ self.logger.warning('⚠️ No downloads path configured, cannot save PDF')
93
+ return None
94
+
95
+ # Generate filename from page title or URL
96
+ try:
97
+ page_title = await asyncio.wait_for(self.browser_session.get_current_page_title(), timeout=2.0)
98
+ # Sanitize title for filename
99
+ import re
100
+
101
+ safe_title = re.sub(r'[^\w\s-]', '', page_title)[:50] # Max 50 chars
102
+ filename = f'{safe_title}.pdf' if safe_title else 'print.pdf'
103
+ except Exception:
104
+ filename = 'print.pdf'
105
+
106
+ # Ensure downloads directory exists
107
+ downloads_dir = Path(downloads_path).expanduser().resolve()
108
+ downloads_dir.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Generate unique filename if file exists
111
+ final_path = downloads_dir / filename
112
+ if final_path.exists():
113
+ base, ext = os.path.splitext(filename)
114
+ counter = 1
115
+ while (downloads_dir / f'{base} ({counter}){ext}').exists():
116
+ counter += 1
117
+ final_path = downloads_dir / f'{base} ({counter}){ext}'
118
+
119
+ # Write PDF to file
120
+ import anyio
121
+
122
+ async with await anyio.open_file(final_path, 'wb') as f:
123
+ await f.write(pdf_bytes)
124
+
125
+ file_size = final_path.stat().st_size
126
+ self.logger.info(f'✅ Generated PDF via CDP: {final_path} ({file_size:,} bytes)')
127
+
128
+ # Dispatch FileDownloadedEvent
129
+ from browser_use.browser.events import FileDownloadedEvent
130
+
131
+ page_url = await self.browser_session.get_current_page_url()
132
+ self.browser_session.event_bus.dispatch(
133
+ FileDownloadedEvent(
134
+ url=page_url,
135
+ path=str(final_path),
136
+ file_name=final_path.name,
137
+ file_size=file_size,
138
+ file_type='pdf',
139
+ mime_type='application/pdf',
140
+ auto_download=False, # This was intentional (user clicked print)
141
+ )
142
+ )
143
+
144
+ return {'pdf_generated': True, 'path': str(final_path)}
145
+
146
+ except TimeoutError:
147
+ self.logger.warning('⏱️ PDF generation timed out')
148
+ return None
149
+ except Exception as e:
150
+ self.logger.warning(f'⚠️ Failed to generate PDF via CDP: {type(e).__name__}: {e}')
151
+ return None
152
+
153
+ @observe_debug(ignore_input=True, ignore_output=True, name='click_element_event')
154
+ async def on_ClickElementEvent(self, event: ClickElementEvent) -> dict | None:
155
+ """Handle click request with CDP."""
156
+ try:
157
+ # Check if session is alive before attempting any operations
158
+ if not self.browser_session.agent_focus or not self.browser_session.agent_focus.target_id:
159
+ error_msg = 'Cannot execute click: browser session is corrupted (target_id=None). Session may have crashed.'
160
+ self.logger.error(f'{error_msg}')
161
+ raise BrowserError(error_msg)
162
+
163
+ # Use the provided node
164
+ element_node = event.node
165
+ index_for_logging = element_node.backend_node_id or 'unknown'
166
+ starting_target_id = self.browser_session.agent_focus.target_id
167
+
168
+ # Check if element is a file input (should not be clicked)
169
+ if self.browser_session.is_file_input(element_node):
170
+ msg = f'Index {index_for_logging} - has an element which opens file upload dialog. To upload files please use a specific function to upload files'
171
+ self.logger.info(f'{msg}')
172
+ # Return validation error instead of raising to avoid ERROR logs
173
+ return {'validation_error': msg}
174
+
175
+ # Detect print-related elements and handle them specially
176
+ is_print_element = self._is_print_related_element(element_node)
177
+ if is_print_element:
178
+ self.logger.info(
179
+ f'🖨️ Detected print button (index {index_for_logging}), generating PDF directly instead of opening dialog...'
180
+ )
181
+
182
+ # Instead of clicking, directly generate PDF via CDP
183
+ click_metadata = await self._handle_print_button_click(element_node)
184
+
185
+ if click_metadata and click_metadata.get('pdf_generated'):
186
+ msg = f'Generated PDF: {click_metadata.get("path")}'
187
+ self.logger.info(f'💾 {msg}')
188
+ return click_metadata
189
+ else:
190
+ # Fallback to regular click if PDF generation failed
191
+ self.logger.warning('⚠️ PDF generation failed, falling back to regular click')
192
+
193
+ # Perform the actual click using internal implementation
194
+ click_metadata = await self._click_element_node_impl(element_node)
195
+ download_path = None # moved to downloads_watchdog.py
196
+
197
+ # Check for validation errors - return them without raising to avoid ERROR logs
198
+ if isinstance(click_metadata, dict) and 'validation_error' in click_metadata:
199
+ self.logger.info(f'{click_metadata["validation_error"]}')
200
+ return click_metadata
201
+
202
+ # Build success message
203
+ if download_path:
204
+ msg = f'Downloaded file to {download_path}'
205
+ self.logger.info(f'💾 {msg}')
206
+ else:
207
+ msg = f'Clicked button {element_node.node_name}: {element_node.get_all_children_text(max_depth=2)}'
208
+ self.logger.debug(f'🖱️ {msg}')
209
+ self.logger.debug(f'Element xpath: {element_node.xpath}')
210
+
211
+ return click_metadata if isinstance(click_metadata, dict) else None
212
+ except Exception as e:
213
+ raise
214
+
215
+ async def on_TypeTextEvent(self, event: TypeTextEvent) -> dict | None:
216
+ """Handle text input request with CDP."""
217
+ try:
218
+ # Use the provided node
219
+ element_node = event.node
220
+ index_for_logging = element_node.backend_node_id or 'unknown'
221
+
222
+ # Check if this is index 0 or a falsy index - type to the page (whatever has focus)
223
+ if not element_node.backend_node_id or element_node.backend_node_id == 0:
224
+ # Type to the page without focusing any specific element
225
+ await self._type_to_page(event.text)
226
+ # Log with sensitive data protection
227
+ if event.is_sensitive:
228
+ if event.sensitive_key_name:
229
+ self.logger.info(f'⌨️ Typed <{event.sensitive_key_name}> to the page (current focus)')
230
+ else:
231
+ self.logger.info('⌨️ Typed <sensitive> to the page (current focus)')
232
+ else:
233
+ self.logger.info(f'⌨️ Typed "{event.text}" to the page (current focus)')
234
+ return None # No coordinates available for page typing
235
+ else:
236
+ try:
237
+ # Try to type to the specific element
238
+ input_metadata = await self._input_text_element_node_impl(
239
+ element_node,
240
+ event.text,
241
+ clear=event.clear or (not event.text),
242
+ is_sensitive=event.is_sensitive,
243
+ )
244
+ # Log with sensitive data protection
245
+ if event.is_sensitive:
246
+ if event.sensitive_key_name:
247
+ self.logger.info(f'⌨️ Typed <{event.sensitive_key_name}> into element with index {index_for_logging}')
248
+ else:
249
+ self.logger.info(f'⌨️ Typed <sensitive> into element with index {index_for_logging}')
250
+ else:
251
+ self.logger.info(f'⌨️ Typed "{event.text}" into element with index {index_for_logging}')
252
+ self.logger.debug(f'Element xpath: {element_node.xpath}')
253
+ return input_metadata # Return coordinates if available
254
+ except Exception as e:
255
+ # Element not found or error - fall back to typing to the page
256
+ self.logger.warning(f'Failed to type to element {index_for_logging}: {e}. Falling back to page typing.')
257
+ try:
258
+ await asyncio.wait_for(self._click_element_node_impl(element_node), timeout=10.0)
259
+ except Exception as e:
260
+ pass
261
+ await self._type_to_page(event.text)
262
+ # Log with sensitive data protection
263
+ if event.is_sensitive:
264
+ if event.sensitive_key_name:
265
+ self.logger.info(f'⌨️ Typed <{event.sensitive_key_name}> to the page as fallback')
266
+ else:
267
+ self.logger.info('⌨️ Typed <sensitive> to the page as fallback')
268
+ else:
269
+ self.logger.info(f'⌨️ Typed "{event.text}" to the page as fallback')
270
+ return None # No coordinates available for fallback typing
271
+
272
+ # Note: We don't clear cached state here - let multi_act handle DOM change detection
273
+ # by explicitly rebuilding and comparing when needed
274
+ except Exception as e:
275
+ raise
276
+
277
+ async def on_ScrollEvent(self, event: ScrollEvent) -> None:
278
+ """Handle scroll request with CDP."""
279
+ # Check if we have a current target for scrolling
280
+ if not self.browser_session.agent_focus:
281
+ error_msg = 'No active target for scrolling'
282
+ raise BrowserError(error_msg)
283
+
284
+ try:
285
+ # Convert direction and amount to pixels
286
+ # Positive pixels = scroll down, negative = scroll up
287
+ pixels = event.amount if event.direction == 'down' else -event.amount
288
+
289
+ # Element-specific scrolling if node is provided
290
+ if event.node is not None:
291
+ element_node = event.node
292
+ index_for_logging = element_node.backend_node_id or 'unknown'
293
+
294
+ # Check if the element is an iframe
295
+ is_iframe = element_node.tag_name and element_node.tag_name.upper() == 'IFRAME'
296
+
297
+ # Try to scroll the element's container
298
+ success = await self._scroll_element_container(element_node, pixels)
299
+ if success:
300
+ self.logger.debug(
301
+ f'📜 Scrolled element {index_for_logging} container {event.direction} by {event.amount} pixels'
302
+ )
303
+
304
+ # For iframe scrolling, we need to force a full DOM refresh
305
+ # because the iframe's content has changed position
306
+ if is_iframe:
307
+ self.logger.debug('🔄 Forcing DOM refresh after iframe scroll')
308
+ # Note: We don't clear cached state here - let multi_act handle DOM change detection
309
+ # by explicitly rebuilding and comparing when needed
310
+
311
+ # Wait a bit for the scroll to settle and DOM to update
312
+ await asyncio.sleep(0.2)
313
+
314
+ return None
315
+
316
+ # Perform target-level scroll
317
+ await self._scroll_with_cdp_gesture(pixels)
318
+
319
+ # Note: We don't clear cached state here - let multi_act handle DOM change detection
320
+ # by explicitly rebuilding and comparing when needed
321
+
322
+ # Log success
323
+ self.logger.debug(f'📜 Scrolled {event.direction} by {event.amount} pixels')
324
+ return None
325
+ except Exception as e:
326
+ raise
327
+
328
+ # ========== Implementation Methods ==========
329
+
330
+ async def _check_element_occlusion(self, backend_node_id: int, x: float, y: float, cdp_session) -> bool:
331
+ """Check if an element is occluded by other elements at the given coordinates.
332
+
333
+ Args:
334
+ backend_node_id: The backend node ID of the target element
335
+ x: X coordinate to check
336
+ y: Y coordinate to check
337
+ cdp_session: CDP session to use
338
+
339
+ Returns:
340
+ True if element is occluded, False if clickable
341
+ """
342
+ try:
343
+ session_id = cdp_session.session_id
344
+
345
+ # Get target element info for comparison
346
+ target_result = await cdp_session.cdp_client.send.DOM.resolveNode(
347
+ params={'backendNodeId': backend_node_id}, session_id=session_id
348
+ )
349
+
350
+ if 'object' not in target_result:
351
+ self.logger.debug('Could not resolve target element, assuming occluded')
352
+ return True
353
+
354
+ object_id = target_result['object']['objectId']
355
+
356
+ # Get target element info
357
+ target_info_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
358
+ params={
359
+ 'objectId': object_id,
360
+ 'functionDeclaration': """
361
+ function() {
362
+ const getElementInfo = (el) => {
363
+ return {
364
+ tagName: el.tagName,
365
+ id: el.id || '',
366
+ className: el.className || '',
367
+ textContent: (el.textContent || '').substring(0, 100)
368
+ };
369
+ };
370
+
371
+
372
+ const elementAtPoint = document.elementFromPoint(arguments[0], arguments[1]);
373
+ if (!elementAtPoint) {
374
+ return { targetInfo: getElementInfo(this), isClickable: false };
375
+ }
376
+
377
+
378
+ // Simple containment-based clickability logic
379
+ const isClickable = this === elementAtPoint ||
380
+ this.contains(elementAtPoint) ||
381
+ elementAtPoint.contains(this);
382
+
383
+ return {
384
+ targetInfo: getElementInfo(this),
385
+ elementAtPointInfo: getElementInfo(elementAtPoint),
386
+ isClickable: isClickable
387
+ };
388
+ }
389
+ """,
390
+ 'arguments': [{'value': x}, {'value': y}],
391
+ 'returnByValue': True,
392
+ },
393
+ session_id=session_id,
394
+ )
395
+
396
+ if 'result' not in target_info_result or 'value' not in target_info_result['result']:
397
+ self.logger.debug('Could not get target element info, assuming occluded')
398
+ return True
399
+
400
+ target_data = target_info_result['result']['value']
401
+ is_clickable = target_data.get('isClickable', False)
402
+
403
+ if is_clickable:
404
+ self.logger.debug('Element is clickable (target, contained, or semantically related)')
405
+ return False
406
+ else:
407
+ target_info = target_data.get('targetInfo', {})
408
+ element_at_point_info = target_data.get('elementAtPointInfo', {})
409
+ self.logger.debug(
410
+ f'Element is occluded. Target: {target_info.get("tagName", "unknown")} '
411
+ f'(id={target_info.get("id", "none")}), '
412
+ f'ElementAtPoint: {element_at_point_info.get("tagName", "unknown")} '
413
+ f'(id={element_at_point_info.get("id", "none")})'
414
+ )
415
+ return True
416
+
417
+ except Exception as e:
418
+ self.logger.debug(f'Occlusion check failed: {e}, assuming not occluded')
419
+ return False
420
+
421
+ async def _click_element_node_impl(self, element_node) -> dict | None:
422
+ """
423
+ Click an element using pure CDP with multiple fallback methods for getting element geometry.
424
+
425
+ Args:
426
+ element_node: The DOM element to click
427
+ """
428
+
429
+ try:
430
+ # Check if element is a file input or select dropdown - these should not be clicked
431
+ tag_name = element_node.tag_name.lower() if element_node.tag_name else ''
432
+ element_type = element_node.attributes.get('type', '').lower() if element_node.attributes else ''
433
+
434
+ if tag_name == 'select':
435
+ msg = f'Cannot click on <select> elements. Use dropdown_options(index={element_node.backend_node_id}) action instead.'
436
+ # Return error dict instead of raising to avoid ERROR logs
437
+ return {'validation_error': msg}
438
+
439
+ if tag_name == 'input' and element_type == 'file':
440
+ msg = f'Cannot click on file input element (index={element_node.backend_node_id}). File uploads must be handled using upload_file_to_element action.'
441
+ # Return error dict instead of raising to avoid ERROR logs
442
+ return {'validation_error': msg}
443
+
444
+ # Get CDP client
445
+ cdp_session = await self.browser_session.cdp_client_for_node(element_node)
446
+
447
+ # Get the correct session ID for the element's frame
448
+ session_id = cdp_session.session_id
449
+
450
+ # Get element bounds
451
+ backend_node_id = element_node.backend_node_id
452
+
453
+ # Get viewport dimensions for visibility checks
454
+ layout_metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=session_id)
455
+ viewport_width = layout_metrics['layoutViewport']['clientWidth']
456
+ viewport_height = layout_metrics['layoutViewport']['clientHeight']
457
+
458
+ # Scroll element into view FIRST before getting coordinates
459
+ try:
460
+ await cdp_session.cdp_client.send.DOM.scrollIntoViewIfNeeded(
461
+ params={'backendNodeId': backend_node_id}, session_id=session_id
462
+ )
463
+ await asyncio.sleep(0.05) # Wait for scroll to complete
464
+ self.logger.debug('Scrolled element into view before getting coordinates')
465
+ except Exception as e:
466
+ self.logger.debug(f'Failed to scroll element into view: {e}')
467
+
468
+ # Get element coordinates using the unified method AFTER scrolling
469
+ element_rect = await self.browser_session.get_element_coordinates(backend_node_id, cdp_session)
470
+
471
+ # Convert rect to quads format if we got coordinates
472
+ quads = []
473
+ if element_rect:
474
+ # Convert DOMRect to quad format
475
+ x, y, w, h = element_rect.x, element_rect.y, element_rect.width, element_rect.height
476
+ quads = [
477
+ [
478
+ x,
479
+ y, # top-left
480
+ x + w,
481
+ y, # top-right
482
+ x + w,
483
+ y + h, # bottom-right
484
+ x,
485
+ y + h, # bottom-left
486
+ ]
487
+ ]
488
+ self.logger.debug(
489
+ f'Got coordinates from unified method: {element_rect.x}, {element_rect.y}, {element_rect.width}x{element_rect.height}'
490
+ )
491
+
492
+ # If we still don't have quads, fall back to JS click
493
+ if not quads:
494
+ self.logger.warning('Could not get element geometry from any method, falling back to JavaScript click')
495
+ try:
496
+ result = await cdp_session.cdp_client.send.DOM.resolveNode(
497
+ params={'backendNodeId': backend_node_id},
498
+ session_id=session_id,
499
+ )
500
+ assert 'object' in result and 'objectId' in result['object'], (
501
+ 'Failed to find DOM element based on backendNodeId, maybe page content changed?'
502
+ )
503
+ object_id = result['object']['objectId']
504
+
505
+ await cdp_session.cdp_client.send.Runtime.callFunctionOn(
506
+ params={
507
+ 'functionDeclaration': 'function() { this.click(); }',
508
+ 'objectId': object_id,
509
+ },
510
+ session_id=session_id,
511
+ )
512
+ await asyncio.sleep(0.05)
513
+ # Navigation is handled by BrowserSession via events
514
+ return None
515
+ except Exception as js_e:
516
+ self.logger.error(f'CDP JavaScript click also failed: {js_e}')
517
+ if 'No node with given id found' in str(js_e):
518
+ raise Exception('Element with given id not found')
519
+ else:
520
+ raise Exception(f'Failed to click element: {js_e}')
521
+
522
+ # Find the largest visible quad within the viewport
523
+ best_quad = None
524
+ best_area = 0
525
+
526
+ for quad in quads:
527
+ if len(quad) < 8:
528
+ continue
529
+
530
+ # Calculate quad bounds
531
+ xs = [quad[i] for i in range(0, 8, 2)]
532
+ ys = [quad[i] for i in range(1, 8, 2)]
533
+ min_x, max_x = min(xs), max(xs)
534
+ min_y, max_y = min(ys), max(ys)
535
+
536
+ # Check if quad intersects with viewport
537
+ if max_x < 0 or max_y < 0 or min_x > viewport_width or min_y > viewport_height:
538
+ continue # Quad is completely outside viewport
539
+
540
+ # Calculate visible area (intersection with viewport)
541
+ visible_min_x = max(0, min_x)
542
+ visible_max_x = min(viewport_width, max_x)
543
+ visible_min_y = max(0, min_y)
544
+ visible_max_y = min(viewport_height, max_y)
545
+
546
+ visible_width = visible_max_x - visible_min_x
547
+ visible_height = visible_max_y - visible_min_y
548
+ visible_area = visible_width * visible_height
549
+
550
+ if visible_area > best_area:
551
+ best_area = visible_area
552
+ best_quad = quad
553
+
554
+ if not best_quad:
555
+ # No visible quad found, use the first quad anyway
556
+ best_quad = quads[0]
557
+ self.logger.warning('No visible quad found, using first quad')
558
+
559
+ # Calculate center point of the best quad
560
+ center_x = sum(best_quad[i] for i in range(0, 8, 2)) / 4
561
+ center_y = sum(best_quad[i] for i in range(1, 8, 2)) / 4
562
+
563
+ # Ensure click point is within viewport bounds
564
+ center_x = max(0, min(viewport_width - 1, center_x))
565
+ center_y = max(0, min(viewport_height - 1, center_y))
566
+
567
+ # Check for occlusion before attempting CDP click
568
+ is_occluded = await self._check_element_occlusion(backend_node_id, center_x, center_y, cdp_session)
569
+
570
+ if is_occluded:
571
+ self.logger.debug('🚫 Element is occluded, falling back to JavaScript click')
572
+ try:
573
+ result = await cdp_session.cdp_client.send.DOM.resolveNode(
574
+ params={'backendNodeId': backend_node_id},
575
+ session_id=session_id,
576
+ )
577
+ assert 'object' in result and 'objectId' in result['object'], (
578
+ 'Failed to find DOM element based on backendNodeId'
579
+ )
580
+ object_id = result['object']['objectId']
581
+
582
+ await cdp_session.cdp_client.send.Runtime.callFunctionOn(
583
+ params={
584
+ 'functionDeclaration': 'function() { this.click(); }',
585
+ 'objectId': object_id,
586
+ },
587
+ session_id=session_id,
588
+ )
589
+ await asyncio.sleep(0.05)
590
+ return None
591
+ except Exception as js_e:
592
+ self.logger.error(f'JavaScript click fallback failed: {js_e}')
593
+ raise Exception(f'Failed to click occluded element: {js_e}')
594
+
595
+ # Perform the click using CDP (element is not occluded)
596
+ try:
597
+ self.logger.debug(f'👆 Dragging mouse over element before clicking x: {center_x}px y: {center_y}px ...')
598
+ # Move mouse to element
599
+ await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
600
+ params={
601
+ 'type': 'mouseMoved',
602
+ 'x': center_x,
603
+ 'y': center_y,
604
+ },
605
+ session_id=session_id,
606
+ )
607
+ await asyncio.sleep(0.05)
608
+
609
+ # Mouse down
610
+ self.logger.debug(f'👆🏾 Clicking x: {center_x}px y: {center_y}px ...')
611
+ try:
612
+ await asyncio.wait_for(
613
+ cdp_session.cdp_client.send.Input.dispatchMouseEvent(
614
+ params={
615
+ 'type': 'mousePressed',
616
+ 'x': center_x,
617
+ 'y': center_y,
618
+ 'button': 'left',
619
+ 'clickCount': 1,
620
+ },
621
+ session_id=session_id,
622
+ ),
623
+ timeout=3.0, # 3 second timeout for mousePressed
624
+ )
625
+ await asyncio.sleep(0.08)
626
+ except TimeoutError:
627
+ self.logger.debug('⏱️ Mouse down timed out (likely due to dialog), continuing...')
628
+ # Don't sleep if we timed out
629
+
630
+ # Mouse up
631
+ try:
632
+ await asyncio.wait_for(
633
+ cdp_session.cdp_client.send.Input.dispatchMouseEvent(
634
+ params={
635
+ 'type': 'mouseReleased',
636
+ 'x': center_x,
637
+ 'y': center_y,
638
+ 'button': 'left',
639
+ 'clickCount': 1,
640
+ },
641
+ session_id=session_id,
642
+ ),
643
+ timeout=5.0, # 5 second timeout for mouseReleased
644
+ )
645
+ except TimeoutError:
646
+ self.logger.debug('⏱️ Mouse up timed out (possibly due to lag or dialog popup), continuing...')
647
+
648
+ self.logger.debug('🖱️ Clicked successfully using x,y coordinates')
649
+
650
+ # Return coordinates as dict for metadata
651
+ return {'click_x': center_x, 'click_y': center_y}
652
+
653
+ except Exception as e:
654
+ self.logger.warning(f'CDP click failed: {type(e).__name__}: {e}')
655
+ # Fall back to JavaScript click via CDP
656
+ try:
657
+ result = await cdp_session.cdp_client.send.DOM.resolveNode(
658
+ params={'backendNodeId': backend_node_id},
659
+ session_id=session_id,
660
+ )
661
+ assert 'object' in result and 'objectId' in result['object'], (
662
+ 'Failed to find DOM element based on backendNodeId, maybe page content changed?'
663
+ )
664
+ object_id = result['object']['objectId']
665
+
666
+ await cdp_session.cdp_client.send.Runtime.callFunctionOn(
667
+ params={
668
+ 'functionDeclaration': 'function() { this.click(); }',
669
+ 'objectId': object_id,
670
+ },
671
+ session_id=session_id,
672
+ )
673
+
674
+ # Small delay for dialog dismissal
675
+ await asyncio.sleep(0.1)
676
+
677
+ return None
678
+ except Exception as js_e:
679
+ self.logger.error(f'CDP JavaScript click also failed: {js_e}')
680
+ raise Exception(f'Failed to click element: {e}')
681
+ finally:
682
+ # Always re-focus back to original top-level page session context in case click opened a new tab/popup/window/dialog/etc.
683
+ # Use timeout to prevent hanging if dialog is blocking
684
+ try:
685
+ cdp_session = await asyncio.wait_for(self.browser_session.get_or_create_cdp_session(focus=True), timeout=3.0)
686
+ await asyncio.wait_for(
687
+ cdp_session.cdp_client.send.Runtime.runIfWaitingForDebugger(session_id=cdp_session.session_id),
688
+ timeout=2.0,
689
+ )
690
+ except TimeoutError:
691
+ self.logger.debug('⏱️ Refocus after click timed out (page may be blocked by dialog). Continuing...')
692
+ except Exception as e:
693
+ self.logger.debug(f'⚠️ Refocus error (non-critical): {type(e).__name__}: {e}')
694
+
695
+ except URLNotAllowedError as e:
696
+ raise e
697
+ except BrowserError as e:
698
+ raise e
699
+ except Exception as e:
700
+ # Extract key element info for error message
701
+ element_info = f'<{element_node.tag_name or "unknown"}'
702
+ if element_node.backend_node_id:
703
+ element_info += f' index={element_node.backend_node_id}'
704
+ element_info += '>'
705
+
706
+ # Create helpful error message based on context
707
+ error_detail = f'Failed to click element {element_info}. The element may not be interactable or visible.'
708
+
709
+ # Add hint if element has index (common in code-use mode)
710
+ if element_node.backend_node_id:
711
+ error_detail += f' If the page changed after navigation/interaction, the index [{element_node.backend_node_id}] may be stale. Get fresh browser state before retrying.'
712
+
713
+ raise BrowserError(
714
+ message=f'Failed to click element: {e}',
715
+ long_term_memory=error_detail,
716
+ )
717
+
718
+ async def _type_to_page(self, text: str):
719
+ """
720
+ Type text to the page (whatever element currently has focus).
721
+ This is used when index is 0 or when an element can't be found.
722
+ """
723
+ try:
724
+ # Get CDP client and session
725
+ cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=None, focus=True)
726
+
727
+ # Type the text character by character to the focused element
728
+ for char in text:
729
+ # Handle newline characters as Enter key
730
+ if char == '\n':
731
+ # Send proper Enter key sequence
732
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
733
+ params={
734
+ 'type': 'keyDown',
735
+ 'key': 'Enter',
736
+ 'code': 'Enter',
737
+ 'windowsVirtualKeyCode': 13,
738
+ },
739
+ session_id=cdp_session.session_id,
740
+ )
741
+ # Send char event with carriage return
742
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
743
+ params={
744
+ 'type': 'char',
745
+ 'text': '\r',
746
+ },
747
+ session_id=cdp_session.session_id,
748
+ )
749
+ # Send keyup
750
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
751
+ params={
752
+ 'type': 'keyUp',
753
+ 'key': 'Enter',
754
+ 'code': 'Enter',
755
+ 'windowsVirtualKeyCode': 13,
756
+ },
757
+ session_id=cdp_session.session_id,
758
+ )
759
+ else:
760
+ # Handle regular characters
761
+ # Send keydown
762
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
763
+ params={
764
+ 'type': 'keyDown',
765
+ 'key': char,
766
+ },
767
+ session_id=cdp_session.session_id,
768
+ )
769
+ # Send char for actual text input
770
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
771
+ params={
772
+ 'type': 'char',
773
+ 'text': char,
774
+ },
775
+ session_id=cdp_session.session_id,
776
+ )
777
+ # Send keyup
778
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
779
+ params={
780
+ 'type': 'keyUp',
781
+ 'key': char,
782
+ },
783
+ session_id=cdp_session.session_id,
784
+ )
785
+ # Add 18ms delay between keystrokes
786
+ await asyncio.sleep(0.018)
787
+
788
+ except Exception as e:
789
+ raise Exception(f'Failed to type to page: {str(e)}')
790
+
791
+ def _get_char_modifiers_and_vk(self, char: str) -> tuple[int, int, str]:
792
+ """Get modifiers, virtual key code, and base key for a character.
793
+
794
+ Returns:
795
+ (modifiers, windowsVirtualKeyCode, base_key)
796
+ """
797
+ # Characters that require Shift modifier
798
+ shift_chars = {
799
+ '!': ('1', 49),
800
+ '@': ('2', 50),
801
+ '#': ('3', 51),
802
+ '$': ('4', 52),
803
+ '%': ('5', 53),
804
+ '^': ('6', 54),
805
+ '&': ('7', 55),
806
+ '*': ('8', 56),
807
+ '(': ('9', 57),
808
+ ')': ('0', 48),
809
+ '_': ('-', 189),
810
+ '+': ('=', 187),
811
+ '{': ('[', 219),
812
+ '}': (']', 221),
813
+ '|': ('\\', 220),
814
+ ':': (';', 186),
815
+ '"': ("'", 222),
816
+ '<': (',', 188),
817
+ '>': ('.', 190),
818
+ '?': ('/', 191),
819
+ '~': ('`', 192),
820
+ }
821
+
822
+ # Check if character requires Shift
823
+ if char in shift_chars:
824
+ base_key, vk_code = shift_chars[char]
825
+ return (8, vk_code, base_key) # Shift=8
826
+
827
+ # Uppercase letters require Shift
828
+ if char.isupper():
829
+ return (8, ord(char), char.lower()) # Shift=8
830
+
831
+ # Lowercase letters
832
+ if char.islower():
833
+ return (0, ord(char.upper()), char)
834
+
835
+ # Numbers
836
+ if char.isdigit():
837
+ return (0, ord(char), char)
838
+
839
+ # Special characters without Shift
840
+ no_shift_chars = {
841
+ ' ': 32,
842
+ '-': 189,
843
+ '=': 187,
844
+ '[': 219,
845
+ ']': 221,
846
+ '\\': 220,
847
+ ';': 186,
848
+ "'": 222,
849
+ ',': 188,
850
+ '.': 190,
851
+ '/': 191,
852
+ '`': 192,
853
+ }
854
+
855
+ if char in no_shift_chars:
856
+ return (0, no_shift_chars[char], char)
857
+
858
+ # Fallback
859
+ return (0, ord(char.upper()) if char.isalpha() else ord(char), char)
860
+
861
+ def _get_key_code_for_char(self, char: str) -> str:
862
+ """Get the proper key code for a character (like Playwright does)."""
863
+ # Key code mapping for common characters (using proper base keys + modifiers)
864
+ key_codes = {
865
+ ' ': 'Space',
866
+ '.': 'Period',
867
+ ',': 'Comma',
868
+ '-': 'Minus',
869
+ '_': 'Minus', # Underscore uses Minus with Shift
870
+ '@': 'Digit2', # @ uses Digit2 with Shift
871
+ '!': 'Digit1', # ! uses Digit1 with Shift (not 'Exclamation')
872
+ '?': 'Slash', # ? uses Slash with Shift
873
+ ':': 'Semicolon', # : uses Semicolon with Shift
874
+ ';': 'Semicolon',
875
+ '(': 'Digit9', # ( uses Digit9 with Shift
876
+ ')': 'Digit0', # ) uses Digit0 with Shift
877
+ '[': 'BracketLeft',
878
+ ']': 'BracketRight',
879
+ '{': 'BracketLeft', # { uses BracketLeft with Shift
880
+ '}': 'BracketRight', # } uses BracketRight with Shift
881
+ '/': 'Slash',
882
+ '\\': 'Backslash',
883
+ '=': 'Equal',
884
+ '+': 'Equal', # + uses Equal with Shift
885
+ '*': 'Digit8', # * uses Digit8 with Shift
886
+ '&': 'Digit7', # & uses Digit7 with Shift
887
+ '%': 'Digit5', # % uses Digit5 with Shift
888
+ '$': 'Digit4', # $ uses Digit4 with Shift
889
+ '#': 'Digit3', # # uses Digit3 with Shift
890
+ '^': 'Digit6', # ^ uses Digit6 with Shift
891
+ '~': 'Backquote', # ~ uses Backquote with Shift
892
+ '`': 'Backquote',
893
+ "'": 'Quote',
894
+ '"': 'Quote', # " uses Quote with Shift
895
+ }
896
+
897
+ # Numbers
898
+ if char.isdigit():
899
+ return f'Digit{char}'
900
+
901
+ # Letters
902
+ if char.isalpha():
903
+ return f'Key{char.upper()}'
904
+
905
+ # Special characters
906
+ if char in key_codes:
907
+ return key_codes[char]
908
+
909
+ # Fallback for unknown characters
910
+ return f'Key{char.upper()}'
911
+
912
+ async def _clear_text_field(self, object_id: str, cdp_session) -> bool:
913
+ """Clear text field using multiple strategies, starting with the most reliable."""
914
+ try:
915
+ # Strategy 1: Direct JavaScript value/content setting (handles both inputs and contenteditable)
916
+ self.logger.debug('🧹 Clearing text field using JavaScript value setting')
917
+
918
+ clear_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
919
+ params={
920
+ 'functionDeclaration': """
921
+ function() {
922
+ // Check if it's a contenteditable element
923
+ const hasContentEditable = this.getAttribute('contenteditable') === 'true' ||
924
+ this.getAttribute('contenteditable') === '' ||
925
+ this.isContentEditable === true;
926
+
927
+ if (hasContentEditable) {
928
+ // For contenteditable elements, clear all content
929
+ while (this.firstChild) {
930
+ this.removeChild(this.firstChild);
931
+ }
932
+ this.textContent = "";
933
+ this.innerHTML = "";
934
+
935
+ // Focus and position cursor at the beginning
936
+ this.focus();
937
+ const selection = window.getSelection();
938
+ const range = document.createRange();
939
+ range.setStart(this, 0);
940
+ range.setEnd(this, 0);
941
+ selection.removeAllRanges();
942
+ selection.addRange(range);
943
+
944
+ // Dispatch events
945
+ this.dispatchEvent(new Event("input", { bubbles: true }));
946
+ this.dispatchEvent(new Event("change", { bubbles: true }));
947
+
948
+ return {cleared: true, method: 'contenteditable', finalText: this.textContent};
949
+ } else if (this.value !== undefined) {
950
+ // For regular inputs with value property
951
+ try {
952
+ this.select();
953
+ } catch (e) {
954
+ // ignore
955
+ }
956
+ this.value = "";
957
+ this.dispatchEvent(new Event("input", { bubbles: true }));
958
+ this.dispatchEvent(new Event("change", { bubbles: true }));
959
+ return {cleared: true, method: 'value', finalText: this.value};
960
+ } else {
961
+ return {cleared: false, method: 'none', error: 'Not a supported input type'};
962
+ }
963
+ }
964
+ """,
965
+ 'objectId': object_id,
966
+ 'returnByValue': True,
967
+ },
968
+ session_id=cdp_session.session_id,
969
+ )
970
+
971
+ # Check the clear result
972
+ clear_info = clear_result.get('result', {}).get('value', {})
973
+ self.logger.debug(f'Clear result: {clear_info}')
974
+
975
+ if clear_info.get('cleared'):
976
+ final_text = clear_info.get('finalText', '')
977
+ if not final_text or not final_text.strip():
978
+ self.logger.debug(f'✅ Text field cleared successfully using {clear_info.get("method")}')
979
+ return True
980
+ else:
981
+ self.logger.debug(f'⚠️ JavaScript clear partially failed, field still contains: "{final_text}"')
982
+ return False
983
+ else:
984
+ self.logger.debug(f'❌ JavaScript clear failed: {clear_info.get("error", "Unknown error")}')
985
+ return False
986
+
987
+ except Exception as e:
988
+ self.logger.debug(f'JavaScript clear failed with exception: {e}')
989
+ return False
990
+
991
+ # Strategy 2: Triple-click + Delete (fallback for stubborn fields)
992
+ try:
993
+ self.logger.debug('🧹 Fallback: Clearing using triple-click + Delete')
994
+
995
+ # Get element center coordinates for triple-click
996
+ bounds_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
997
+ params={
998
+ 'functionDeclaration': 'function() { return this.getBoundingClientRect(); }',
999
+ 'objectId': object_id,
1000
+ 'returnByValue': True,
1001
+ },
1002
+ session_id=cdp_session.session_id,
1003
+ )
1004
+
1005
+ if bounds_result.get('result', {}).get('value'):
1006
+ bounds = bounds_result['result']['value']
1007
+ center_x = bounds['x'] + bounds['width'] / 2
1008
+ center_y = bounds['y'] + bounds['height'] / 2
1009
+
1010
+ # Triple-click to select all text
1011
+ await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
1012
+ params={
1013
+ 'type': 'mousePressed',
1014
+ 'x': center_x,
1015
+ 'y': center_y,
1016
+ 'button': 'left',
1017
+ 'clickCount': 3,
1018
+ },
1019
+ session_id=cdp_session.session_id,
1020
+ )
1021
+ await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
1022
+ params={
1023
+ 'type': 'mouseReleased',
1024
+ 'x': center_x,
1025
+ 'y': center_y,
1026
+ 'button': 'left',
1027
+ 'clickCount': 3,
1028
+ },
1029
+ session_id=cdp_session.session_id,
1030
+ )
1031
+
1032
+ # Delete selected text
1033
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1034
+ params={
1035
+ 'type': 'keyDown',
1036
+ 'key': 'Delete',
1037
+ 'code': 'Delete',
1038
+ },
1039
+ session_id=cdp_session.session_id,
1040
+ )
1041
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1042
+ params={
1043
+ 'type': 'keyUp',
1044
+ 'key': 'Delete',
1045
+ 'code': 'Delete',
1046
+ },
1047
+ session_id=cdp_session.session_id,
1048
+ )
1049
+
1050
+ self.logger.debug('✅ Text field cleared using triple-click + Delete')
1051
+ return True
1052
+
1053
+ except Exception as e:
1054
+ self.logger.debug(f'Triple-click clear failed: {e}')
1055
+
1056
+ # Strategy 3: Keyboard shortcuts (last resort)
1057
+ try:
1058
+ import platform
1059
+
1060
+ is_macos = platform.system() == 'Darwin'
1061
+ select_all_modifier = 4 if is_macos else 2 # Meta=4 (Cmd), Ctrl=2
1062
+ modifier_name = 'Cmd' if is_macos else 'Ctrl'
1063
+
1064
+ self.logger.debug(f'🧹 Last resort: Clearing using {modifier_name}+A + Backspace')
1065
+
1066
+ # Select all text (Ctrl/Cmd+A)
1067
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1068
+ params={
1069
+ 'type': 'keyDown',
1070
+ 'key': 'a',
1071
+ 'code': 'KeyA',
1072
+ 'modifiers': select_all_modifier,
1073
+ },
1074
+ session_id=cdp_session.session_id,
1075
+ )
1076
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1077
+ params={
1078
+ 'type': 'keyUp',
1079
+ 'key': 'a',
1080
+ 'code': 'KeyA',
1081
+ 'modifiers': select_all_modifier,
1082
+ },
1083
+ session_id=cdp_session.session_id,
1084
+ )
1085
+
1086
+ # Delete selected text (Backspace)
1087
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1088
+ params={
1089
+ 'type': 'keyDown',
1090
+ 'key': 'Backspace',
1091
+ 'code': 'Backspace',
1092
+ },
1093
+ session_id=cdp_session.session_id,
1094
+ )
1095
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1096
+ params={
1097
+ 'type': 'keyUp',
1098
+ 'key': 'Backspace',
1099
+ 'code': 'Backspace',
1100
+ },
1101
+ session_id=cdp_session.session_id,
1102
+ )
1103
+
1104
+ self.logger.debug('✅ Text field cleared using keyboard shortcuts')
1105
+ return True
1106
+
1107
+ except Exception as e:
1108
+ self.logger.debug(f'All clearing strategies failed: {e}')
1109
+ return False
1110
+
1111
+ async def _focus_element_simple(
1112
+ self, backend_node_id: int, object_id: str, cdp_session, input_coordinates: dict | None = None
1113
+ ) -> bool:
1114
+ """Simple focus strategy: CDP first, then click if failed."""
1115
+
1116
+ # Strategy 1: Try CDP DOM.focus first
1117
+ try:
1118
+ result = await cdp_session.cdp_client.send.DOM.focus(
1119
+ params={'backendNodeId': backend_node_id},
1120
+ session_id=cdp_session.session_id,
1121
+ )
1122
+ self.logger.debug(f'Element focused using CDP DOM.focus (result: {result})')
1123
+ return True
1124
+
1125
+ except Exception as e:
1126
+ self.logger.debug(f'❌ CDP DOM.focus threw exception: {type(e).__name__}: {e}')
1127
+
1128
+ # Strategy 2: Try click to focus if CDP failed
1129
+ if input_coordinates and 'input_x' in input_coordinates and 'input_y' in input_coordinates:
1130
+ try:
1131
+ click_x = input_coordinates['input_x']
1132
+ click_y = input_coordinates['input_y']
1133
+
1134
+ self.logger.debug(f'🎯 Attempting click-to-focus at ({click_x:.1f}, {click_y:.1f})')
1135
+
1136
+ # Click to focus
1137
+ await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
1138
+ params={
1139
+ 'type': 'mousePressed',
1140
+ 'x': click_x,
1141
+ 'y': click_y,
1142
+ 'button': 'left',
1143
+ 'clickCount': 1,
1144
+ },
1145
+ session_id=cdp_session.session_id,
1146
+ )
1147
+ await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
1148
+ params={
1149
+ 'type': 'mouseReleased',
1150
+ 'x': click_x,
1151
+ 'y': click_y,
1152
+ 'button': 'left',
1153
+ 'clickCount': 1,
1154
+ },
1155
+ session_id=cdp_session.session_id,
1156
+ )
1157
+
1158
+ self.logger.debug('✅ Element focused using click method')
1159
+ return True
1160
+
1161
+ except Exception as e:
1162
+ self.logger.debug(f'Click focus failed: {e}')
1163
+
1164
+ # Both strategies failed
1165
+ self.logger.debug('Focus strategies failed, will attempt typing anyway')
1166
+ return False
1167
+
1168
+ def _requires_direct_value_assignment(self, element_node: EnhancedDOMTreeNode) -> bool:
1169
+ """
1170
+ Check if an element requires direct value assignment instead of character-by-character typing.
1171
+
1172
+ Certain input types have compound components, custom plugins, or special requirements
1173
+ that make character-by-character typing unreliable. These need direct .value assignment:
1174
+
1175
+ Native HTML5:
1176
+ - date, time, datetime-local: Have spinbutton components (ISO format required)
1177
+ - month, week: Similar compound structure
1178
+ - color: Expects hex format #RRGGBB
1179
+ - range: Needs numeric value within min/max
1180
+
1181
+ jQuery/Bootstrap Datepickers:
1182
+ - Detected by class names or data attributes
1183
+ - Often expect specific date formats (MM/DD/YYYY, DD/MM/YYYY, etc.)
1184
+
1185
+ Note: We use direct assignment because:
1186
+ 1. Typing triggers intermediate validation that might reject partial values
1187
+ 2. Compound components (like date spinbuttons) don't work with sequential typing
1188
+ 3. It's much faster and more reliable
1189
+ 4. We dispatch proper input/change events afterward to trigger listeners
1190
+ """
1191
+ if not element_node.tag_name or not element_node.attributes:
1192
+ return False
1193
+
1194
+ tag_name = element_node.tag_name.lower()
1195
+
1196
+ # Check for native HTML5 inputs that need direct assignment
1197
+ if tag_name == 'input':
1198
+ input_type = element_node.attributes.get('type', '').lower()
1199
+
1200
+ # Native HTML5 inputs with compound components or strict formats
1201
+ if input_type in {'date', 'time', 'datetime-local', 'month', 'week', 'color', 'range'}:
1202
+ return True
1203
+
1204
+ # Detect jQuery/Bootstrap datepickers (text inputs with datepicker plugins)
1205
+ if input_type in {'text', ''}:
1206
+ # Check for common datepicker indicators
1207
+ class_attr = element_node.attributes.get('class', '').lower()
1208
+ if any(
1209
+ indicator in class_attr
1210
+ for indicator in ['datepicker', 'daterangepicker', 'datetimepicker', 'bootstrap-datepicker']
1211
+ ):
1212
+ return True
1213
+
1214
+ # Check for data attributes indicating datepickers
1215
+ if any(attr in element_node.attributes for attr in ['data-datepicker', 'data-date-format', 'data-provide']):
1216
+ return True
1217
+
1218
+ return False
1219
+
1220
+ async def _set_value_directly(self, element_node: EnhancedDOMTreeNode, text: str, object_id: str, cdp_session) -> None:
1221
+ """
1222
+ Set element value directly using JavaScript for inputs that don't support typing.
1223
+
1224
+ This is used for:
1225
+ - Date/time inputs where character-by-character typing doesn't work
1226
+ - jQuery datepickers that need direct value assignment
1227
+ - Color/range inputs that need specific formats
1228
+ - Any input with custom plugins that intercept typing
1229
+
1230
+ After setting the value, we dispatch comprehensive events to ensure all frameworks
1231
+ and plugins recognize the change (React, Vue, Angular, jQuery, etc.)
1232
+ """
1233
+ try:
1234
+ # Set the value using JavaScript with comprehensive event dispatching
1235
+ # callFunctionOn expects a function body (not a self-invoking function)
1236
+ set_value_js = f"""
1237
+ function() {{
1238
+ // Store old value for comparison
1239
+ const oldValue = this.value;
1240
+
1241
+ // REACT-COMPATIBLE VALUE SETTING:
1242
+ // React uses Object.getOwnPropertyDescriptor to track input changes
1243
+ // We need to use the native setter to bypass React's tracking and then trigger events
1244
+ const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
1245
+ window.HTMLInputElement.prototype,
1246
+ 'value'
1247
+ ).set;
1248
+
1249
+ // Set the value using the native setter (bypasses React's control)
1250
+ nativeInputValueSetter.call(this, {json.dumps(text)});
1251
+
1252
+ // Dispatch comprehensive events to ensure all frameworks detect the change
1253
+ // Order matters: focus -> input -> change -> blur (mimics user interaction)
1254
+
1255
+ // 1. Focus event (in case element isn't focused)
1256
+ this.dispatchEvent(new FocusEvent('focus', {{ bubbles: true }}));
1257
+
1258
+ // 2. Input event (CRITICAL for React onChange)
1259
+ // React listens to 'input' events on the document and checks for value changes
1260
+ const inputEvent = new Event('input', {{ bubbles: true, cancelable: true }});
1261
+ this.dispatchEvent(inputEvent);
1262
+
1263
+ // 3. Change event (for form handling, traditional listeners)
1264
+ const changeEvent = new Event('change', {{ bubbles: true, cancelable: true }});
1265
+ this.dispatchEvent(changeEvent);
1266
+
1267
+ // 4. Blur event (triggers final validation in some libraries)
1268
+ this.dispatchEvent(new FocusEvent('blur', {{ bubbles: true }}));
1269
+
1270
+ // 5. jQuery-specific events (if jQuery is present)
1271
+ if (typeof jQuery !== 'undefined' && jQuery.fn) {{
1272
+ try {{
1273
+ jQuery(this).trigger('change');
1274
+ // Trigger datepicker-specific events if it's a datepicker
1275
+ if (jQuery(this).data('datepicker')) {{
1276
+ jQuery(this).datepicker('update');
1277
+ }}
1278
+ }} catch (e) {{
1279
+ // jQuery not available or error, continue anyway
1280
+ }}
1281
+ }}
1282
+
1283
+ return this.value;
1284
+ }}
1285
+ """
1286
+
1287
+ result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
1288
+ params={
1289
+ 'objectId': object_id,
1290
+ 'functionDeclaration': set_value_js,
1291
+ 'returnByValue': True,
1292
+ },
1293
+ session_id=cdp_session.session_id,
1294
+ )
1295
+
1296
+ # Verify the value was set correctly
1297
+ if 'result' in result and 'value' in result['result']:
1298
+ actual_value = result['result']['value']
1299
+ self.logger.debug(f'✅ Value set directly to: "{actual_value}"')
1300
+ else:
1301
+ self.logger.warning('⚠️ Could not verify value was set correctly')
1302
+
1303
+ except Exception as e:
1304
+ self.logger.error(f'❌ Failed to set value directly: {e}')
1305
+ raise
1306
+
1307
+ async def _input_text_element_node_impl(
1308
+ self, element_node: EnhancedDOMTreeNode, text: str, clear: bool = True, is_sensitive: bool = False
1309
+ ) -> dict | None:
1310
+ """
1311
+ Input text into an element using pure CDP with improved focus fallbacks.
1312
+
1313
+ For date/time inputs, uses direct value assignment instead of typing.
1314
+ """
1315
+
1316
+ try:
1317
+ # Get CDP client
1318
+ cdp_client = self.browser_session.cdp_client
1319
+
1320
+ # Get the correct session ID for the element's iframe
1321
+ # session_id = await self._get_session_id_for_element(element_node)
1322
+
1323
+ # cdp_session = await self.browser_session.get_or_create_cdp_session(target_id=element_node.target_id, focus=True)
1324
+ cdp_session = await self.browser_session.cdp_client_for_node(element_node)
1325
+
1326
+ # Get element info
1327
+ backend_node_id = element_node.backend_node_id
1328
+
1329
+ # Track coordinates for metadata
1330
+ input_coordinates = None
1331
+
1332
+ # Scroll element into view
1333
+ try:
1334
+ await cdp_session.cdp_client.send.DOM.scrollIntoViewIfNeeded(
1335
+ params={'backendNodeId': backend_node_id}, session_id=cdp_session.session_id
1336
+ )
1337
+ await asyncio.sleep(0.01)
1338
+ except Exception as e:
1339
+ # Node detached errors are common with shadow DOM and dynamic content
1340
+ # The element can still be interacted with even if scrolling fails
1341
+ error_str = str(e)
1342
+ if 'Node is detached from document' in error_str or 'detached from document' in error_str:
1343
+ self.logger.debug(
1344
+ f'Element node temporarily detached during scroll (common with shadow DOM), continuing: {element_node}'
1345
+ )
1346
+ else:
1347
+ self.logger.debug(f'Failed to scroll element {element_node} into view before typing: {type(e).__name__}: {e}')
1348
+
1349
+ # Get object ID for the element
1350
+ result = await cdp_client.send.DOM.resolveNode(
1351
+ params={'backendNodeId': backend_node_id},
1352
+ session_id=cdp_session.session_id,
1353
+ )
1354
+ assert 'object' in result and 'objectId' in result['object'], (
1355
+ 'Failed to find DOM element based on backendNodeId, maybe page content changed?'
1356
+ )
1357
+ object_id = result['object']['objectId']
1358
+
1359
+ # Get current coordinates using unified method
1360
+ coords = await self.browser_session.get_element_coordinates(backend_node_id, cdp_session)
1361
+ if coords:
1362
+ center_x = coords.x + coords.width / 2
1363
+ center_y = coords.y + coords.height / 2
1364
+
1365
+ # Check for occlusion before using coordinates for focus
1366
+ is_occluded = await self._check_element_occlusion(backend_node_id, center_x, center_y, cdp_session)
1367
+
1368
+ if is_occluded:
1369
+ self.logger.debug('🚫 Input element is occluded, skipping coordinate-based focus')
1370
+ input_coordinates = None # Force fallback to CDP-only focus
1371
+ else:
1372
+ input_coordinates = {'input_x': center_x, 'input_y': center_y}
1373
+ self.logger.debug(f'Using unified coordinates: x={center_x:.1f}, y={center_y:.1f}')
1374
+ else:
1375
+ input_coordinates = None
1376
+ self.logger.debug('No coordinates found for element')
1377
+
1378
+ # Ensure we have a valid object_id before proceeding
1379
+ if not object_id:
1380
+ raise ValueError('Could not get object_id for element')
1381
+
1382
+ # Step 1: Focus the element using simple strategy
1383
+ focused_successfully = await self._focus_element_simple(
1384
+ backend_node_id=backend_node_id, object_id=object_id, cdp_session=cdp_session, input_coordinates=input_coordinates
1385
+ )
1386
+
1387
+ # Step 2: Check if this element requires direct value assignment (date/time inputs)
1388
+ requires_direct_assignment = self._requires_direct_value_assignment(element_node)
1389
+
1390
+ if requires_direct_assignment:
1391
+ # Date/time inputs: use direct value assignment instead of typing
1392
+ self.logger.debug(
1393
+ f'🎯 Element type={element_node.attributes.get("type")} requires direct value assignment, setting value directly'
1394
+ )
1395
+ await self._set_value_directly(element_node, text, object_id, cdp_session)
1396
+
1397
+ # Return input coordinates for metadata
1398
+ return input_coordinates
1399
+
1400
+ # Step 3: Clear existing text if requested (only for regular inputs that support typing)
1401
+ if clear:
1402
+ cleared_successfully = await self._clear_text_field(object_id=object_id, cdp_session=cdp_session)
1403
+ if not cleared_successfully:
1404
+ self.logger.warning('⚠️ Text field clearing failed, typing may append to existing text')
1405
+
1406
+ # Step 4: Type the text character by character using proper human-like key events
1407
+ # This emulates exactly how a human would type, which modern websites expect
1408
+ if is_sensitive:
1409
+ # Note: sensitive_key_name is not passed to this low-level method,
1410
+ # but we could extend the signature if needed for more granular logging
1411
+ self.logger.debug('🎯 Typing <sensitive> character by character')
1412
+ else:
1413
+ self.logger.debug(f'🎯 Typing text character by character: "{text}"')
1414
+
1415
+ for i, char in enumerate(text):
1416
+ # Handle newline characters as Enter key
1417
+ if char == '\n':
1418
+ # Send proper Enter key sequence
1419
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1420
+ params={
1421
+ 'type': 'keyDown',
1422
+ 'key': 'Enter',
1423
+ 'code': 'Enter',
1424
+ 'windowsVirtualKeyCode': 13,
1425
+ },
1426
+ session_id=cdp_session.session_id,
1427
+ )
1428
+
1429
+ # Small delay to emulate human typing speed
1430
+ await asyncio.sleep(0.001)
1431
+
1432
+ # Send char event with carriage return
1433
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1434
+ params={
1435
+ 'type': 'char',
1436
+ 'text': '\r',
1437
+ 'key': 'Enter',
1438
+ },
1439
+ session_id=cdp_session.session_id,
1440
+ )
1441
+
1442
+ # Send keyUp event
1443
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1444
+ params={
1445
+ 'type': 'keyUp',
1446
+ 'key': 'Enter',
1447
+ 'code': 'Enter',
1448
+ 'windowsVirtualKeyCode': 13,
1449
+ },
1450
+ session_id=cdp_session.session_id,
1451
+ )
1452
+ else:
1453
+ # Handle regular characters
1454
+ # Get proper modifiers, VK code, and base key for the character
1455
+ modifiers, vk_code, base_key = self._get_char_modifiers_and_vk(char)
1456
+ key_code = self._get_key_code_for_char(base_key)
1457
+
1458
+ # self.logger.debug(f'🎯 Typing character {i + 1}/{len(text)}: "{char}" (base_key: {base_key}, code: {key_code}, modifiers: {modifiers}, vk: {vk_code})')
1459
+
1460
+ # Step 1: Send keyDown event (NO text parameter)
1461
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1462
+ params={
1463
+ 'type': 'keyDown',
1464
+ 'key': base_key,
1465
+ 'code': key_code,
1466
+ 'modifiers': modifiers,
1467
+ 'windowsVirtualKeyCode': vk_code,
1468
+ },
1469
+ session_id=cdp_session.session_id,
1470
+ )
1471
+
1472
+ # Small delay to emulate human typing speed
1473
+ await asyncio.sleep(0.005)
1474
+
1475
+ # Step 2: Send char event (WITH text parameter) - this is crucial for text input
1476
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1477
+ params={
1478
+ 'type': 'char',
1479
+ 'text': char,
1480
+ 'key': char,
1481
+ },
1482
+ session_id=cdp_session.session_id,
1483
+ )
1484
+
1485
+ # Step 3: Send keyUp event (NO text parameter)
1486
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
1487
+ params={
1488
+ 'type': 'keyUp',
1489
+ 'key': base_key,
1490
+ 'code': key_code,
1491
+ 'modifiers': modifiers,
1492
+ 'windowsVirtualKeyCode': vk_code,
1493
+ },
1494
+ session_id=cdp_session.session_id,
1495
+ )
1496
+
1497
+ # Small delay between characters to look human (realistic typing speed)
1498
+ await asyncio.sleep(0.001)
1499
+
1500
+ # Step 4: Trigger framework-aware DOM events after typing completion
1501
+ # Modern JavaScript frameworks (React, Vue, Angular) rely on these events
1502
+ # to update their internal state and trigger re-renders
1503
+ await self._trigger_framework_events(object_id=object_id, cdp_session=cdp_session)
1504
+
1505
+ # Return coordinates metadata if available
1506
+ return input_coordinates
1507
+
1508
+ except Exception as e:
1509
+ self.logger.error(f'Failed to input text via CDP: {type(e).__name__}: {e}')
1510
+ raise BrowserError(f'Failed to input text into element: {repr(element_node)}')
1511
+
1512
+ async def _trigger_framework_events(self, object_id: str, cdp_session) -> None:
1513
+ """
1514
+ Trigger framework-aware DOM events after text input completion.
1515
+
1516
+ This is critical for modern JavaScript frameworks (React, Vue, Angular, etc.)
1517
+ that rely on DOM events to update their internal state and trigger re-renders.
1518
+
1519
+ Args:
1520
+ object_id: CDP object ID of the input element
1521
+ cdp_session: CDP session for the element's context
1522
+ """
1523
+ try:
1524
+ # Execute JavaScript to trigger comprehensive event sequence
1525
+ framework_events_script = """
1526
+ (function() {
1527
+ // Find the target element (available as 'this' when using objectId)
1528
+ const element = this;
1529
+ if (!element) return false;
1530
+
1531
+ // Ensure element is focused
1532
+ element.focus();
1533
+
1534
+ // Comprehensive event sequence for maximum framework compatibility
1535
+ const events = [
1536
+ // Input event - primary event for React controlled components
1537
+ { type: 'input', bubbles: true, cancelable: true },
1538
+ // Change event - important for form validation and Vue v-model
1539
+ { type: 'change', bubbles: true, cancelable: true },
1540
+ // Blur event - triggers validation in many frameworks
1541
+ { type: 'blur', bubbles: true, cancelable: true }
1542
+ ];
1543
+
1544
+ let success = true;
1545
+
1546
+ events.forEach(eventConfig => {
1547
+ try {
1548
+ const event = new Event(eventConfig.type, {
1549
+ bubbles: eventConfig.bubbles,
1550
+ cancelable: eventConfig.cancelable
1551
+ });
1552
+
1553
+ // Special handling for InputEvent (more specific than Event)
1554
+ if (eventConfig.type === 'input') {
1555
+ const inputEvent = new InputEvent('input', {
1556
+ bubbles: true,
1557
+ cancelable: true,
1558
+ data: element.value,
1559
+ inputType: 'insertText'
1560
+ });
1561
+ element.dispatchEvent(inputEvent);
1562
+ } else {
1563
+ element.dispatchEvent(event);
1564
+ }
1565
+ } catch (e) {
1566
+ success = false;
1567
+ console.warn('Framework event dispatch failed:', eventConfig.type, e);
1568
+ }
1569
+ });
1570
+
1571
+ // Special React synthetic event handling
1572
+ // React uses internal fiber properties for event system
1573
+ if (element._reactInternalFiber || element._reactInternalInstance || element.__reactInternalInstance) {
1574
+ try {
1575
+ // Trigger React's synthetic event system
1576
+ const syntheticInputEvent = new InputEvent('input', {
1577
+ bubbles: true,
1578
+ cancelable: true,
1579
+ data: element.value
1580
+ });
1581
+
1582
+ // Force React to process this as a synthetic event
1583
+ Object.defineProperty(syntheticInputEvent, 'isTrusted', { value: true });
1584
+ element.dispatchEvent(syntheticInputEvent);
1585
+ } catch (e) {
1586
+ console.warn('React synthetic event failed:', e);
1587
+ }
1588
+ }
1589
+
1590
+ // Special Vue reactivity trigger
1591
+ // Vue uses __vueParentComponent or __vue__ for component access
1592
+ if (element.__vue__ || element._vnode || element.__vueParentComponent) {
1593
+ try {
1594
+ // Vue often needs explicit input event with proper timing
1595
+ const vueEvent = new Event('input', { bubbles: true });
1596
+ setTimeout(() => element.dispatchEvent(vueEvent), 0);
1597
+ } catch (e) {
1598
+ console.warn('Vue reactivity trigger failed:', e);
1599
+ }
1600
+ }
1601
+
1602
+ return success;
1603
+ })();
1604
+ """
1605
+
1606
+ # Execute the framework events script
1607
+ result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
1608
+ params={
1609
+ 'objectId': object_id,
1610
+ 'functionDeclaration': framework_events_script,
1611
+ 'returnByValue': True,
1612
+ },
1613
+ session_id=cdp_session.session_id,
1614
+ )
1615
+
1616
+ success = result.get('result', {}).get('value', False)
1617
+
1618
+ except Exception as e:
1619
+ self.logger.warning(f'⚠️ Failed to trigger framework events: {type(e).__name__}: {e}')
1620
+ # Don't raise - framework events are a best-effort enhancement
1621
+
1622
+ async def _scroll_with_cdp_gesture(self, pixels: int) -> bool:
1623
+ """
1624
+ Scroll using CDP Input.dispatchMouseEvent to simulate mouse wheel.
1625
+
1626
+ Args:
1627
+ pixels: Number of pixels to scroll (positive = down, negative = up)
1628
+
1629
+ Returns:
1630
+ True if successful, False if failed
1631
+ """
1632
+ try:
1633
+ # Get CDP client and session
1634
+ assert self.browser_session.agent_focus is not None, 'CDP session not initialized - browser may not be connected yet'
1635
+ cdp_client = self.browser_session.agent_focus.cdp_client
1636
+ session_id = self.browser_session.agent_focus.session_id
1637
+
1638
+ # Get viewport dimensions
1639
+ layout_metrics = await cdp_client.send.Page.getLayoutMetrics(session_id=session_id)
1640
+ viewport_width = layout_metrics['layoutViewport']['clientWidth']
1641
+ viewport_height = layout_metrics['layoutViewport']['clientHeight']
1642
+
1643
+ # Calculate center of viewport
1644
+ center_x = viewport_width / 2
1645
+ center_y = viewport_height / 2
1646
+
1647
+ # For mouse wheel, positive deltaY scrolls down, negative scrolls up
1648
+ delta_y = pixels
1649
+
1650
+ # Dispatch mouse wheel event
1651
+ await cdp_client.send.Input.dispatchMouseEvent(
1652
+ params={
1653
+ 'type': 'mouseWheel',
1654
+ 'x': center_x,
1655
+ 'y': center_y,
1656
+ 'deltaX': 0,
1657
+ 'deltaY': delta_y,
1658
+ },
1659
+ session_id=session_id,
1660
+ )
1661
+
1662
+ self.logger.debug(f'📄 Scrolled via CDP mouse wheel: {pixels}px')
1663
+ return True
1664
+
1665
+ except Exception as e:
1666
+ self.logger.warning(f'❌ Scrolling via CDP failed: {type(e).__name__}: {e}')
1667
+ return False
1668
+
1669
+ async def _scroll_element_container(self, element_node, pixels: int) -> bool:
1670
+ """Try to scroll an element's container using CDP."""
1671
+ try:
1672
+ cdp_session = await self.browser_session.cdp_client_for_node(element_node)
1673
+
1674
+ # Check if this is an iframe - if so, scroll its content directly
1675
+ if element_node.tag_name and element_node.tag_name.upper() == 'IFRAME':
1676
+ # For iframes, we need to scroll the content document, not the iframe element itself
1677
+ # Use JavaScript to directly scroll the iframe's content
1678
+ backend_node_id = element_node.backend_node_id
1679
+
1680
+ # Resolve the node to get an object ID
1681
+ result = await cdp_session.cdp_client.send.DOM.resolveNode(
1682
+ params={'backendNodeId': backend_node_id},
1683
+ session_id=cdp_session.session_id,
1684
+ )
1685
+
1686
+ if 'object' in result and 'objectId' in result['object']:
1687
+ object_id = result['object']['objectId']
1688
+
1689
+ # Scroll the iframe's content directly
1690
+ scroll_result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
1691
+ params={
1692
+ 'functionDeclaration': f"""
1693
+ function() {{
1694
+ try {{
1695
+ const doc = this.contentDocument || this.contentWindow.document;
1696
+ if (doc) {{
1697
+ const scrollElement = doc.documentElement || doc.body;
1698
+ if (scrollElement) {{
1699
+ const oldScrollTop = scrollElement.scrollTop;
1700
+ scrollElement.scrollTop += {pixels};
1701
+ const newScrollTop = scrollElement.scrollTop;
1702
+ return {{
1703
+ success: true,
1704
+ oldScrollTop: oldScrollTop,
1705
+ newScrollTop: newScrollTop,
1706
+ scrolled: newScrollTop - oldScrollTop
1707
+ }};
1708
+ }}
1709
+ }}
1710
+ return {{success: false, error: 'Could not access iframe content'}};
1711
+ }} catch (e) {{
1712
+ return {{success: false, error: e.toString()}};
1713
+ }}
1714
+ }}
1715
+ """,
1716
+ 'objectId': object_id,
1717
+ 'returnByValue': True,
1718
+ },
1719
+ session_id=cdp_session.session_id,
1720
+ )
1721
+
1722
+ if scroll_result and 'result' in scroll_result and 'value' in scroll_result['result']:
1723
+ result_value = scroll_result['result']['value']
1724
+ if result_value.get('success'):
1725
+ self.logger.debug(f'Successfully scrolled iframe content by {result_value.get("scrolled", 0)}px')
1726
+ return True
1727
+ else:
1728
+ self.logger.debug(f'Failed to scroll iframe: {result_value.get("error", "Unknown error")}')
1729
+
1730
+ # For non-iframe elements, use the standard mouse wheel approach
1731
+ # Get element bounds to know where to scroll
1732
+ backend_node_id = element_node.backend_node_id
1733
+ box_model = await cdp_session.cdp_client.send.DOM.getBoxModel(
1734
+ params={'backendNodeId': backend_node_id}, session_id=cdp_session.session_id
1735
+ )
1736
+ content_quad = box_model['model']['content']
1737
+
1738
+ # Calculate center point
1739
+ center_x = (content_quad[0] + content_quad[2] + content_quad[4] + content_quad[6]) / 4
1740
+ center_y = (content_quad[1] + content_quad[3] + content_quad[5] + content_quad[7]) / 4
1741
+
1742
+ # Dispatch mouse wheel event at element location
1743
+ await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
1744
+ params={
1745
+ 'type': 'mouseWheel',
1746
+ 'x': center_x,
1747
+ 'y': center_y,
1748
+ 'deltaX': 0,
1749
+ 'deltaY': pixels,
1750
+ },
1751
+ session_id=cdp_session.session_id,
1752
+ )
1753
+
1754
+ return True
1755
+ except Exception as e:
1756
+ self.logger.debug(f'Failed to scroll element container via CDP: {e}')
1757
+ return False
1758
+
1759
+ async def _get_session_id_for_element(self, element_node: EnhancedDOMTreeNode) -> str | None:
1760
+ """Get the appropriate CDP session ID for an element based on its frame."""
1761
+ if element_node.frame_id:
1762
+ # Element is in an iframe, need to get session for that frame
1763
+ try:
1764
+ # Get all targets
1765
+ targets = await self.browser_session.cdp_client.send.Target.getTargets()
1766
+
1767
+ # Find the target for this frame
1768
+ for target in targets['targetInfos']:
1769
+ if target['type'] == 'iframe' and element_node.frame_id in str(target.get('targetId', '')):
1770
+ # Create temporary session for iframe target without switching focus
1771
+ target_id = target['targetId']
1772
+ temp_session = await self.browser_session.get_or_create_cdp_session(target_id, focus=False)
1773
+ return temp_session.session_id
1774
+
1775
+ # If frame not found in targets, use main target session
1776
+ self.logger.debug(f'Frame {element_node.frame_id} not found in targets, using main session')
1777
+ except Exception as e:
1778
+ self.logger.debug(f'Error getting frame session: {e}, using main session')
1779
+
1780
+ # Use main target session
1781
+ assert self.browser_session.agent_focus is not None, 'CDP session not initialized - browser may not be connected yet'
1782
+ return self.browser_session.agent_focus.session_id
1783
+
1784
+ async def on_GoBackEvent(self, event: GoBackEvent) -> None:
1785
+ """Handle navigate back request with CDP."""
1786
+ cdp_session = await self.browser_session.get_or_create_cdp_session()
1787
+ try:
1788
+ # Get CDP client and session
1789
+
1790
+ # Get navigation history
1791
+ history = await cdp_session.cdp_client.send.Page.getNavigationHistory(session_id=cdp_session.session_id)
1792
+ current_index = history['currentIndex']
1793
+ entries = history['entries']
1794
+
1795
+ # Check if we can go back
1796
+ if current_index <= 0:
1797
+ self.logger.warning('⚠️ Cannot go back - no previous entry in history')
1798
+ return
1799
+
1800
+ # Navigate to the previous entry
1801
+ previous_entry_id = entries[current_index - 1]['id']
1802
+ await cdp_session.cdp_client.send.Page.navigateToHistoryEntry(
1803
+ params={'entryId': previous_entry_id}, session_id=cdp_session.session_id
1804
+ )
1805
+
1806
+ # Wait for navigation
1807
+ await asyncio.sleep(0.5)
1808
+ # Navigation is handled by BrowserSession via events
1809
+
1810
+ self.logger.info(f'🔙 Navigated back to {entries[current_index - 1]["url"]}')
1811
+ except Exception as e:
1812
+ raise
1813
+
1814
+ async def on_GoForwardEvent(self, event: GoForwardEvent) -> None:
1815
+ """Handle navigate forward request with CDP."""
1816
+ cdp_session = await self.browser_session.get_or_create_cdp_session()
1817
+ try:
1818
+ # Get navigation history
1819
+ history = await cdp_session.cdp_client.send.Page.getNavigationHistory(session_id=cdp_session.session_id)
1820
+ current_index = history['currentIndex']
1821
+ entries = history['entries']
1822
+
1823
+ # Check if we can go forward
1824
+ if current_index >= len(entries) - 1:
1825
+ self.logger.warning('⚠️ Cannot go forward - no next entry in history')
1826
+ return
1827
+
1828
+ # Navigate to the next entry
1829
+ next_entry_id = entries[current_index + 1]['id']
1830
+ await cdp_session.cdp_client.send.Page.navigateToHistoryEntry(
1831
+ params={'entryId': next_entry_id}, session_id=cdp_session.session_id
1832
+ )
1833
+
1834
+ # Wait for navigation
1835
+ await asyncio.sleep(0.5)
1836
+ # Navigation is handled by BrowserSession via events
1837
+
1838
+ self.logger.info(f'🔜 Navigated forward to {entries[current_index + 1]["url"]}')
1839
+ except Exception as e:
1840
+ raise
1841
+
1842
+ async def on_RefreshEvent(self, event: RefreshEvent) -> None:
1843
+ """Handle target refresh request with CDP."""
1844
+ cdp_session = await self.browser_session.get_or_create_cdp_session()
1845
+ try:
1846
+ # Reload the target
1847
+ await cdp_session.cdp_client.send.Page.reload(session_id=cdp_session.session_id)
1848
+
1849
+ # Wait for reload
1850
+ await asyncio.sleep(1.0)
1851
+
1852
+ # Note: We don't clear cached state here - let the next state fetch rebuild as needed
1853
+
1854
+ # Navigation is handled by BrowserSession via events
1855
+
1856
+ self.logger.info('🔄 Target refreshed')
1857
+ except Exception as e:
1858
+ raise
1859
+
1860
+ @observe_debug(ignore_input=True, ignore_output=True, name='wait_event_handler')
1861
+ async def on_WaitEvent(self, event: WaitEvent) -> None:
1862
+ """Handle wait request."""
1863
+ try:
1864
+ # Cap wait time at maximum
1865
+ actual_seconds = min(max(event.seconds, 0), event.max_seconds)
1866
+ if actual_seconds != event.seconds:
1867
+ self.logger.info(f'🕒 Waiting for {actual_seconds} seconds (capped from {event.seconds}s)')
1868
+ else:
1869
+ self.logger.info(f'🕒 Waiting for {actual_seconds} seconds')
1870
+
1871
+ await asyncio.sleep(actual_seconds)
1872
+ except Exception as e:
1873
+ raise
1874
+
1875
+ async def _dispatch_key_event(self, cdp_session, event_type: str, key: str, modifiers: int = 0) -> None:
1876
+ """Helper to dispatch a keyboard event with proper key codes."""
1877
+ code, vk_code = get_key_info(key)
1878
+ params: DispatchKeyEventParameters = {
1879
+ 'type': event_type,
1880
+ 'key': key,
1881
+ 'code': code,
1882
+ }
1883
+ if modifiers:
1884
+ params['modifiers'] = modifiers
1885
+ if vk_code is not None:
1886
+ params['windowsVirtualKeyCode'] = vk_code
1887
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(params=params, session_id=cdp_session.session_id)
1888
+
1889
+ async def on_SendKeysEvent(self, event: SendKeysEvent) -> None:
1890
+ """Handle send keys request with CDP."""
1891
+ cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
1892
+ try:
1893
+ # Normalize key names from common aliases
1894
+ key_aliases = {
1895
+ 'ctrl': 'Control',
1896
+ 'control': 'Control',
1897
+ 'alt': 'Alt',
1898
+ 'option': 'Alt',
1899
+ 'meta': 'Meta',
1900
+ 'cmd': 'Meta',
1901
+ 'command': 'Meta',
1902
+ 'shift': 'Shift',
1903
+ 'enter': 'Enter',
1904
+ 'return': 'Enter',
1905
+ 'tab': 'Tab',
1906
+ 'delete': 'Delete',
1907
+ 'backspace': 'Backspace',
1908
+ 'escape': 'Escape',
1909
+ 'esc': 'Escape',
1910
+ 'space': ' ',
1911
+ 'up': 'ArrowUp',
1912
+ 'down': 'ArrowDown',
1913
+ 'left': 'ArrowLeft',
1914
+ 'right': 'ArrowRight',
1915
+ 'pageup': 'PageUp',
1916
+ 'pagedown': 'PageDown',
1917
+ 'home': 'Home',
1918
+ 'end': 'End',
1919
+ }
1920
+
1921
+ # Parse and normalize the key string
1922
+ keys = event.keys
1923
+ if '+' in keys:
1924
+ # Handle key combinations like "ctrl+a"
1925
+ parts = keys.split('+')
1926
+ normalized_parts = []
1927
+ for part in parts:
1928
+ part_lower = part.strip().lower()
1929
+ normalized = key_aliases.get(part_lower, part)
1930
+ normalized_parts.append(normalized)
1931
+ normalized_keys = '+'.join(normalized_parts)
1932
+ else:
1933
+ # Single key
1934
+ keys_lower = keys.strip().lower()
1935
+ normalized_keys = key_aliases.get(keys_lower, keys)
1936
+
1937
+ # Handle key combinations like "Control+A"
1938
+ if '+' in normalized_keys:
1939
+ parts = normalized_keys.split('+')
1940
+ modifiers = parts[:-1]
1941
+ main_key = parts[-1]
1942
+
1943
+ # Calculate modifier bitmask
1944
+ modifier_value = 0
1945
+ modifier_map = {'Alt': 1, 'Control': 2, 'Meta': 4, 'Shift': 8}
1946
+ for mod in modifiers:
1947
+ modifier_value |= modifier_map.get(mod, 0)
1948
+
1949
+ # Press modifier keys
1950
+ for mod in modifiers:
1951
+ await self._dispatch_key_event(cdp_session, 'keyDown', mod)
1952
+
1953
+ # Press main key with modifiers bitmask
1954
+ await self._dispatch_key_event(cdp_session, 'keyDown', main_key, modifier_value)
1955
+
1956
+ await self._dispatch_key_event(cdp_session, 'keyUp', main_key, modifier_value)
1957
+
1958
+ # Release modifier keys
1959
+ for mod in reversed(modifiers):
1960
+ await self._dispatch_key_event(cdp_session, 'keyUp', mod)
1961
+ else:
1962
+ # Check if this is a text string or special key
1963
+ special_keys = {
1964
+ 'Enter',
1965
+ 'Tab',
1966
+ 'Delete',
1967
+ 'Backspace',
1968
+ 'Escape',
1969
+ 'ArrowUp',
1970
+ 'ArrowDown',
1971
+ 'ArrowLeft',
1972
+ 'ArrowRight',
1973
+ 'PageUp',
1974
+ 'PageDown',
1975
+ 'Home',
1976
+ 'End',
1977
+ 'Control',
1978
+ 'Alt',
1979
+ 'Meta',
1980
+ 'Shift',
1981
+ 'F1',
1982
+ 'F2',
1983
+ 'F3',
1984
+ 'F4',
1985
+ 'F5',
1986
+ 'F6',
1987
+ 'F7',
1988
+ 'F8',
1989
+ 'F9',
1990
+ 'F10',
1991
+ 'F11',
1992
+ 'F12',
1993
+ }
1994
+
1995
+ # If it's a special key, use original logic
1996
+ if normalized_keys in special_keys:
1997
+ await self._dispatch_key_event(cdp_session, 'keyDown', normalized_keys)
1998
+ # For Enter key, also dispatch a char event to trigger keypress listeners
1999
+ if normalized_keys == 'Enter':
2000
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
2001
+ params={
2002
+ 'type': 'char',
2003
+ 'text': '\r',
2004
+ 'key': 'Enter',
2005
+ },
2006
+ session_id=cdp_session.session_id,
2007
+ )
2008
+ await self._dispatch_key_event(cdp_session, 'keyUp', normalized_keys)
2009
+ else:
2010
+ # It's text (single character or string) - send each character as text input
2011
+ # This is crucial for text to appear in focused input fields
2012
+ for char in normalized_keys:
2013
+ # Special-case newline characters to dispatch as Enter
2014
+ if char in ('\n', '\r'):
2015
+ await self._dispatch_key_event(cdp_session, 'keyDown', 'Enter')
2016
+ await self._dispatch_key_event(cdp_session, 'keyUp', 'Enter')
2017
+ continue
2018
+
2019
+ # Get proper modifiers and key info for the character
2020
+ modifiers, vk_code, base_key = self._get_char_modifiers_and_vk(char)
2021
+ key_code = self._get_key_code_for_char(base_key)
2022
+
2023
+ # Send keyDown
2024
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
2025
+ params={
2026
+ 'type': 'keyDown',
2027
+ 'key': base_key,
2028
+ 'code': key_code,
2029
+ 'modifiers': modifiers,
2030
+ 'windowsVirtualKeyCode': vk_code,
2031
+ },
2032
+ session_id=cdp_session.session_id,
2033
+ )
2034
+
2035
+ # Send char event with text - this is what makes text appear in input fields
2036
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
2037
+ params={
2038
+ 'type': 'char',
2039
+ 'text': char,
2040
+ 'key': char,
2041
+ },
2042
+ session_id=cdp_session.session_id,
2043
+ )
2044
+
2045
+ # Send keyUp
2046
+ await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
2047
+ params={
2048
+ 'type': 'keyUp',
2049
+ 'key': base_key,
2050
+ 'code': key_code,
2051
+ 'modifiers': modifiers,
2052
+ 'windowsVirtualKeyCode': vk_code,
2053
+ },
2054
+ session_id=cdp_session.session_id,
2055
+ )
2056
+
2057
+ # Small delay between characters (18ms like _type_to_page)
2058
+ await asyncio.sleep(0.018)
2059
+
2060
+ self.logger.info(f'⌨️ Sent keys: {event.keys}')
2061
+
2062
+ # Note: We don't clear cached state on Enter; multi_act will detect DOM changes
2063
+ # and rebuild explicitly. We still wait briefly for potential navigation.
2064
+ if 'enter' in event.keys.lower() or 'return' in event.keys.lower():
2065
+ await asyncio.sleep(0.1)
2066
+ except Exception as e:
2067
+ raise
2068
+
2069
+ async def on_UploadFileEvent(self, event: UploadFileEvent) -> None:
2070
+ """Handle file upload request with CDP."""
2071
+ try:
2072
+ # Use the provided node
2073
+ element_node = event.node
2074
+ index_for_logging = element_node.backend_node_id or 'unknown'
2075
+
2076
+ # Check if it's a file input
2077
+ if not self.browser_session.is_file_input(element_node):
2078
+ msg = f'Upload failed - element {index_for_logging} is not a file input.'
2079
+ raise BrowserError(message=msg, long_term_memory=msg)
2080
+
2081
+ # Get CDP client and session
2082
+ cdp_client = self.browser_session.cdp_client
2083
+ session_id = await self._get_session_id_for_element(element_node)
2084
+
2085
+ # Set file(s) to upload
2086
+ backend_node_id = element_node.backend_node_id
2087
+ await cdp_client.send.DOM.setFileInputFiles(
2088
+ params={
2089
+ 'files': [event.file_path],
2090
+ 'backendNodeId': backend_node_id,
2091
+ },
2092
+ session_id=session_id,
2093
+ )
2094
+
2095
+ self.logger.info(f'📎 Uploaded file {event.file_path} to element {index_for_logging}')
2096
+ except Exception as e:
2097
+ raise
2098
+
2099
+ async def on_ScrollToTextEvent(self, event: ScrollToTextEvent) -> None:
2100
+ """Handle scroll to text request with CDP. Raises exception if text not found."""
2101
+
2102
+ # TODO: handle looking for text inside cross-origin iframes as well
2103
+
2104
+ # Get CDP client and session
2105
+ cdp_client = self.browser_session.cdp_client
2106
+ if self.browser_session.agent_focus is None:
2107
+ raise BrowserError('CDP session not initialized - browser may not be connected yet')
2108
+ session_id = self.browser_session.agent_focus.session_id
2109
+
2110
+ # Enable DOM
2111
+ await cdp_client.send.DOM.enable(session_id=session_id)
2112
+
2113
+ # Get document
2114
+ doc = await cdp_client.send.DOM.getDocument(params={'depth': -1}, session_id=session_id)
2115
+ root_node_id = doc['root']['nodeId']
2116
+
2117
+ # Search for text using XPath
2118
+ search_queries = [
2119
+ f'//*[contains(text(), "{event.text}")]',
2120
+ f'//*[contains(., "{event.text}")]',
2121
+ f'//*[@*[contains(., "{event.text}")]]',
2122
+ ]
2123
+
2124
+ found = False
2125
+ for query in search_queries:
2126
+ try:
2127
+ # Perform search
2128
+ search_result = await cdp_client.send.DOM.performSearch(params={'query': query}, session_id=session_id)
2129
+ search_id = search_result['searchId']
2130
+ result_count = search_result['resultCount']
2131
+
2132
+ if result_count > 0:
2133
+ # Get the first match
2134
+ node_ids = await cdp_client.send.DOM.getSearchResults(
2135
+ params={'searchId': search_id, 'fromIndex': 0, 'toIndex': 1},
2136
+ session_id=session_id,
2137
+ )
2138
+
2139
+ if node_ids['nodeIds']:
2140
+ node_id = node_ids['nodeIds'][0]
2141
+
2142
+ # Scroll the element into view
2143
+ await cdp_client.send.DOM.scrollIntoViewIfNeeded(params={'nodeId': node_id}, session_id=session_id)
2144
+
2145
+ found = True
2146
+ self.logger.debug(f'📜 Scrolled to text: "{event.text}"')
2147
+ break
2148
+
2149
+ # Clean up search
2150
+ await cdp_client.send.DOM.discardSearchResults(params={'searchId': search_id}, session_id=session_id)
2151
+ except Exception as e:
2152
+ self.logger.debug(f'Search query failed: {query}, error: {e}')
2153
+ continue
2154
+
2155
+ if not found:
2156
+ # Fallback: Try JavaScript search
2157
+ js_result = await cdp_client.send.Runtime.evaluate(
2158
+ params={
2159
+ 'expression': f'''
2160
+ (() => {{
2161
+ const walker = document.createTreeWalker(
2162
+ document.body,
2163
+ NodeFilter.SHOW_TEXT,
2164
+ null,
2165
+ false
2166
+ );
2167
+ let node;
2168
+ while (node = walker.nextNode()) {{
2169
+ if (node.textContent.includes("{event.text}")) {{
2170
+ node.parentElement.scrollIntoView({{behavior: 'smooth', block: 'center'}});
2171
+ return true;
2172
+ }}
2173
+ }}
2174
+ return false;
2175
+ }})()
2176
+ '''
2177
+ },
2178
+ session_id=session_id,
2179
+ )
2180
+
2181
+ if js_result.get('result', {}).get('value'):
2182
+ self.logger.debug(f'📜 Scrolled to text: "{event.text}" (via JS)')
2183
+ return None
2184
+ else:
2185
+ self.logger.warning(f'⚠️ Text not found: "{event.text}"')
2186
+ raise BrowserError(f'Text not found: "{event.text}"', details={'text': event.text})
2187
+
2188
+ # If we got here and found is True, return None (success)
2189
+ if found:
2190
+ return None
2191
+ else:
2192
+ raise BrowserError(f'Text not found: "{event.text}"', details={'text': event.text})
2193
+
2194
+ async def on_GetDropdownOptionsEvent(self, event: GetDropdownOptionsEvent) -> dict[str, str]:
2195
+ """Handle get dropdown options request with CDP."""
2196
+ try:
2197
+ # Use the provided node
2198
+ element_node = event.node
2199
+ index_for_logging = element_node.backend_node_id or 'unknown'
2200
+
2201
+ # Get CDP session for this node
2202
+ cdp_session = await self.browser_session.cdp_client_for_node(element_node)
2203
+
2204
+ # Convert node to object ID for CDP operations
2205
+ try:
2206
+ object_result = await cdp_session.cdp_client.send.DOM.resolveNode(
2207
+ params={'backendNodeId': element_node.backend_node_id}, session_id=cdp_session.session_id
2208
+ )
2209
+ remote_object = object_result.get('object', {})
2210
+ object_id = remote_object.get('objectId')
2211
+ if not object_id:
2212
+ raise ValueError('Could not get object ID from resolved node')
2213
+ except Exception as e:
2214
+ raise ValueError(f'Failed to resolve node to object: {e}') from e
2215
+
2216
+ # Use JavaScript to extract dropdown options
2217
+ options_script = """
2218
+ function() {
2219
+ const startElement = this;
2220
+
2221
+ // Function to check if an element is a dropdown and extract options
2222
+ function checkDropdownElement(element) {
2223
+ // Check if it's a native select element
2224
+ if (element.tagName.toLowerCase() === 'select') {
2225
+ return {
2226
+ type: 'select',
2227
+ options: Array.from(element.options).map((opt, idx) => ({
2228
+ text: opt.text.trim(),
2229
+ value: opt.value,
2230
+ index: idx,
2231
+ selected: opt.selected
2232
+ })),
2233
+ id: element.id || '',
2234
+ name: element.name || '',
2235
+ source: 'target'
2236
+ };
2237
+ }
2238
+
2239
+ // Check if it's an ARIA dropdown/menu
2240
+ const role = element.getAttribute('role');
2241
+ if (role === 'menu' || role === 'listbox' || role === 'combobox') {
2242
+ // Find all menu items/options
2243
+ const menuItems = element.querySelectorAll('[role="menuitem"], [role="option"]');
2244
+ const options = [];
2245
+
2246
+ menuItems.forEach((item, idx) => {
2247
+ const text = item.textContent ? item.textContent.trim() : '';
2248
+ if (text) {
2249
+ options.push({
2250
+ text: text,
2251
+ value: item.getAttribute('data-value') || text,
2252
+ index: idx,
2253
+ selected: item.getAttribute('aria-selected') === 'true' || item.classList.contains('selected')
2254
+ });
2255
+ }
2256
+ });
2257
+
2258
+ return {
2259
+ type: 'aria',
2260
+ options: options,
2261
+ id: element.id || '',
2262
+ name: element.getAttribute('aria-label') || '',
2263
+ source: 'target'
2264
+ };
2265
+ }
2266
+
2267
+ // Check if it's a Semantic UI dropdown or similar
2268
+ if (element.classList.contains('dropdown') || element.classList.contains('ui')) {
2269
+ const menuItems = element.querySelectorAll('.item, .option, [data-value]');
2270
+ const options = [];
2271
+
2272
+ menuItems.forEach((item, idx) => {
2273
+ const text = item.textContent ? item.textContent.trim() : '';
2274
+ if (text) {
2275
+ options.push({
2276
+ text: text,
2277
+ value: item.getAttribute('data-value') || text,
2278
+ index: idx,
2279
+ selected: item.classList.contains('selected') || item.classList.contains('active')
2280
+ });
2281
+ }
2282
+ });
2283
+
2284
+ if (options.length > 0) {
2285
+ return {
2286
+ type: 'custom',
2287
+ options: options,
2288
+ id: element.id || '',
2289
+ name: element.getAttribute('aria-label') || '',
2290
+ source: 'target'
2291
+ };
2292
+ }
2293
+ }
2294
+
2295
+ return null;
2296
+ }
2297
+
2298
+ // Function to recursively search children up to specified depth
2299
+ function searchChildrenForDropdowns(element, maxDepth, currentDepth = 0) {
2300
+ if (currentDepth >= maxDepth) return null;
2301
+
2302
+ // Check all direct children
2303
+ for (let child of element.children) {
2304
+ // Check if this child is a dropdown
2305
+ const result = checkDropdownElement(child);
2306
+ if (result) {
2307
+ result.source = `child-depth-${currentDepth + 1}`;
2308
+ return result;
2309
+ }
2310
+
2311
+ // Recursively check this child's children
2312
+ const childResult = searchChildrenForDropdowns(child, maxDepth, currentDepth + 1);
2313
+ if (childResult) {
2314
+ return childResult;
2315
+ }
2316
+ }
2317
+
2318
+ return null;
2319
+ }
2320
+
2321
+ // First check the target element itself
2322
+ let dropdownResult = checkDropdownElement(startElement);
2323
+ if (dropdownResult) {
2324
+ return dropdownResult;
2325
+ }
2326
+
2327
+ // If target element is not a dropdown, search children up to depth 4
2328
+ dropdownResult = searchChildrenForDropdowns(startElement, 4);
2329
+ if (dropdownResult) {
2330
+ return dropdownResult;
2331
+ }
2332
+
2333
+ return {
2334
+ error: `Element and its children (depth 4) are not recognizable dropdown types (tag: ${startElement.tagName}, role: ${startElement.getAttribute('role')}, classes: ${startElement.className})`
2335
+ };
2336
+ }
2337
+ """
2338
+
2339
+ result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
2340
+ params={
2341
+ 'functionDeclaration': options_script,
2342
+ 'objectId': object_id,
2343
+ 'returnByValue': True,
2344
+ },
2345
+ session_id=cdp_session.session_id,
2346
+ )
2347
+
2348
+ dropdown_data = result.get('result', {}).get('value', {})
2349
+
2350
+ if dropdown_data.get('error'):
2351
+ raise BrowserError(message=dropdown_data['error'], long_term_memory=dropdown_data['error'])
2352
+
2353
+ if not dropdown_data.get('options'):
2354
+ msg = f'No options found in dropdown at index {index_for_logging}'
2355
+ return {
2356
+ 'error': msg,
2357
+ 'short_term_memory': msg,
2358
+ 'long_term_memory': msg,
2359
+ 'backend_node_id': str(index_for_logging),
2360
+ }
2361
+
2362
+ # Format options for display
2363
+ formatted_options = []
2364
+ for opt in dropdown_data['options']:
2365
+ # Use JSON encoding to ensure exact string matching
2366
+ encoded_text = json.dumps(opt['text'])
2367
+ status = ' (selected)' if opt.get('selected') else ''
2368
+ formatted_options.append(f'{opt["index"]}: text={encoded_text}, value={json.dumps(opt["value"])}{status}')
2369
+
2370
+ dropdown_type = dropdown_data.get('type', 'select')
2371
+ element_info = f'Index: {index_for_logging}, Type: {dropdown_type}, ID: {dropdown_data.get("id", "none")}, Name: {dropdown_data.get("name", "none")}'
2372
+ source_info = dropdown_data.get('source', 'unknown')
2373
+
2374
+ if source_info == 'target':
2375
+ msg = f'Found {dropdown_type} dropdown ({element_info}):\n' + '\n'.join(formatted_options)
2376
+ else:
2377
+ msg = f'Found {dropdown_type} dropdown in {source_info} ({element_info}):\n' + '\n'.join(formatted_options)
2378
+ msg += (
2379
+ f'\n\nUse the exact text or value string (without quotes) in select_dropdown(index={index_for_logging}, text=...)'
2380
+ )
2381
+
2382
+ if source_info == 'target':
2383
+ self.logger.info(f'📋 Found {len(dropdown_data["options"])} dropdown options for index {index_for_logging}')
2384
+ else:
2385
+ self.logger.info(
2386
+ f'📋 Found {len(dropdown_data["options"])} dropdown options for index {index_for_logging} in {source_info}'
2387
+ )
2388
+
2389
+ # Create structured memory for the response
2390
+ short_term_memory = msg
2391
+ long_term_memory = f'Got dropdown options for index {index_for_logging}'
2392
+
2393
+ # Return the dropdown data as a dict with structured memory
2394
+ return {
2395
+ 'type': dropdown_type,
2396
+ 'options': json.dumps(dropdown_data['options']), # Convert list to JSON string for dict[str, str] type
2397
+ 'element_info': element_info,
2398
+ 'source': source_info,
2399
+ 'formatted_options': '\n'.join(formatted_options),
2400
+ 'message': msg,
2401
+ 'short_term_memory': short_term_memory,
2402
+ 'long_term_memory': long_term_memory,
2403
+ 'backend_node_id': str(index_for_logging),
2404
+ }
2405
+
2406
+ except BrowserError:
2407
+ # Re-raise BrowserError as-is to preserve structured memory
2408
+ raise
2409
+ except TimeoutError:
2410
+ msg = f'Failed to get dropdown options for index {index_for_logging} due to timeout.'
2411
+ self.logger.error(msg)
2412
+ raise BrowserError(message=msg, long_term_memory=msg)
2413
+ except Exception as e:
2414
+ msg = 'Failed to get dropdown options'
2415
+ error_msg = f'{msg}: {str(e)}'
2416
+ self.logger.error(error_msg)
2417
+ raise BrowserError(
2418
+ message=error_msg, long_term_memory=f'Failed to get dropdown options for index {index_for_logging}.'
2419
+ )
2420
+
2421
+ async def on_SelectDropdownOptionEvent(self, event: SelectDropdownOptionEvent) -> dict[str, str]:
2422
+ """Handle select dropdown option request with CDP."""
2423
+ try:
2424
+ # Use the provided node
2425
+ element_node = event.node
2426
+ index_for_logging = element_node.backend_node_id or 'unknown'
2427
+ target_text = event.text
2428
+
2429
+ # Get CDP session for this node
2430
+ cdp_session = await self.browser_session.cdp_client_for_node(element_node)
2431
+
2432
+ # Convert node to object ID for CDP operations
2433
+ try:
2434
+ object_result = await cdp_session.cdp_client.send.DOM.resolveNode(
2435
+ params={'backendNodeId': element_node.backend_node_id}, session_id=cdp_session.session_id
2436
+ )
2437
+ remote_object = object_result.get('object', {})
2438
+ object_id = remote_object.get('objectId')
2439
+ if not object_id:
2440
+ raise ValueError('Could not get object ID from resolved node')
2441
+ except Exception as e:
2442
+ raise ValueError(f'Failed to resolve node to object: {e}') from e
2443
+
2444
+ try:
2445
+ # Use JavaScript to select the option
2446
+ selection_script = """
2447
+ function(targetText) {
2448
+ const startElement = this;
2449
+
2450
+ // Function to attempt selection on a dropdown element
2451
+ function attemptSelection(element) {
2452
+ // Handle native select elements
2453
+ if (element.tagName.toLowerCase() === 'select') {
2454
+ const options = Array.from(element.options);
2455
+ const targetTextLower = targetText.toLowerCase();
2456
+
2457
+ for (const option of options) {
2458
+ const optionTextLower = option.text.trim().toLowerCase();
2459
+ const optionValueLower = option.value.toLowerCase();
2460
+
2461
+ // Match against both text and value (case-insensitive)
2462
+ if (optionTextLower === targetTextLower || optionValueLower === targetTextLower) {
2463
+ // Focus the element FIRST (important for Svelte/Vue/React and other reactive frameworks)
2464
+ // This simulates the user focusing on the dropdown before changing it
2465
+ element.focus();
2466
+
2467
+ // Then set the value
2468
+ element.value = option.value;
2469
+ option.selected = true;
2470
+
2471
+ // Trigger all necessary events for reactive frameworks
2472
+ // 1. input event - critical for Vue's v-model and Svelte's bind:value
2473
+ const inputEvent = new Event('input', { bubbles: true, cancelable: true });
2474
+ element.dispatchEvent(inputEvent);
2475
+
2476
+ // 2. change event - traditional form validation and framework reactivity
2477
+ const changeEvent = new Event('change', { bubbles: true, cancelable: true });
2478
+ element.dispatchEvent(changeEvent);
2479
+
2480
+ // 3. blur event - completes the interaction, triggers validation
2481
+ element.blur();
2482
+
2483
+ return {
2484
+ success: true,
2485
+ message: `Selected option: ${option.text.trim()} (value: ${option.value})`,
2486
+ value: option.value
2487
+ };
2488
+ }
2489
+ }
2490
+
2491
+ // Return available options as separate field
2492
+ const availableOptions = options.map(opt => ({
2493
+ text: opt.text.trim(),
2494
+ value: opt.value
2495
+ }));
2496
+
2497
+ return {
2498
+ success: false,
2499
+ error: `Option with text or value '${targetText}' not found in select element`,
2500
+ availableOptions: availableOptions
2501
+ };
2502
+ }
2503
+
2504
+ // Handle ARIA dropdowns/menus
2505
+ const role = element.getAttribute('role');
2506
+ if (role === 'menu' || role === 'listbox' || role === 'combobox') {
2507
+ const menuItems = element.querySelectorAll('[role="menuitem"], [role="option"]');
2508
+ const targetTextLower = targetText.toLowerCase();
2509
+
2510
+ for (const item of menuItems) {
2511
+ if (item.textContent) {
2512
+ const itemTextLower = item.textContent.trim().toLowerCase();
2513
+ const itemValueLower = (item.getAttribute('data-value') || '').toLowerCase();
2514
+
2515
+ // Match against both text and data-value (case-insensitive)
2516
+ if (itemTextLower === targetTextLower || itemValueLower === targetTextLower) {
2517
+ // Clear previous selections
2518
+ menuItems.forEach(mi => {
2519
+ mi.setAttribute('aria-selected', 'false');
2520
+ mi.classList.remove('selected');
2521
+ });
2522
+
2523
+ // Select this item
2524
+ item.setAttribute('aria-selected', 'true');
2525
+ item.classList.add('selected');
2526
+
2527
+ // Trigger click and change events
2528
+ item.click();
2529
+ const clickEvent = new MouseEvent('click', { view: window, bubbles: true, cancelable: true });
2530
+ item.dispatchEvent(clickEvent);
2531
+
2532
+ return {
2533
+ success: true,
2534
+ message: `Selected ARIA menu item: ${item.textContent.trim()}`
2535
+ };
2536
+ }
2537
+ }
2538
+ }
2539
+
2540
+ // Return available options as separate field
2541
+ const availableOptions = Array.from(menuItems).map(item => ({
2542
+ text: item.textContent ? item.textContent.trim() : '',
2543
+ value: item.getAttribute('data-value') || ''
2544
+ })).filter(opt => opt.text || opt.value);
2545
+
2546
+ return {
2547
+ success: false,
2548
+ error: `Menu item with text or value '${targetText}' not found`,
2549
+ availableOptions: availableOptions
2550
+ };
2551
+ }
2552
+
2553
+ // Handle Semantic UI or custom dropdowns
2554
+ if (element.classList.contains('dropdown') || element.classList.contains('ui')) {
2555
+ const menuItems = element.querySelectorAll('.item, .option, [data-value]');
2556
+ const targetTextLower = targetText.toLowerCase();
2557
+
2558
+ for (const item of menuItems) {
2559
+ if (item.textContent) {
2560
+ const itemTextLower = item.textContent.trim().toLowerCase();
2561
+ const itemValueLower = (item.getAttribute('data-value') || '').toLowerCase();
2562
+
2563
+ // Match against both text and data-value (case-insensitive)
2564
+ if (itemTextLower === targetTextLower || itemValueLower === targetTextLower) {
2565
+ // Clear previous selections
2566
+ menuItems.forEach(mi => {
2567
+ mi.classList.remove('selected', 'active');
2568
+ });
2569
+
2570
+ // Select this item
2571
+ item.classList.add('selected', 'active');
2572
+
2573
+ // Update dropdown text if there's a text element
2574
+ const textElement = element.querySelector('.text');
2575
+ if (textElement) {
2576
+ textElement.textContent = item.textContent.trim();
2577
+ }
2578
+
2579
+ // Trigger click and change events
2580
+ item.click();
2581
+ const clickEvent = new MouseEvent('click', { view: window, bubbles: true, cancelable: true });
2582
+ item.dispatchEvent(clickEvent);
2583
+
2584
+ // Also dispatch on the main dropdown element
2585
+ const dropdownChangeEvent = new Event('change', { bubbles: true });
2586
+ element.dispatchEvent(dropdownChangeEvent);
2587
+
2588
+ return {
2589
+ success: true,
2590
+ message: `Selected custom dropdown item: ${item.textContent.trim()}`
2591
+ };
2592
+ }
2593
+ }
2594
+ }
2595
+
2596
+ // Return available options as separate field
2597
+ const availableOptions = Array.from(menuItems).map(item => ({
2598
+ text: item.textContent ? item.textContent.trim() : '',
2599
+ value: item.getAttribute('data-value') || ''
2600
+ })).filter(opt => opt.text || opt.value);
2601
+
2602
+ return {
2603
+ success: false,
2604
+ error: `Custom dropdown item with text or value '${targetText}' not found`,
2605
+ availableOptions: availableOptions
2606
+ };
2607
+ }
2608
+
2609
+ return null; // Not a dropdown element
2610
+ }
2611
+
2612
+ // Function to recursively search children for dropdowns
2613
+ function searchChildrenForSelection(element, maxDepth, currentDepth = 0) {
2614
+ if (currentDepth >= maxDepth) return null;
2615
+
2616
+ // Check all direct children
2617
+ for (let child of element.children) {
2618
+ // Try selection on this child
2619
+ const result = attemptSelection(child);
2620
+ if (result && result.success) {
2621
+ return result;
2622
+ }
2623
+
2624
+ // Recursively check this child's children
2625
+ const childResult = searchChildrenForSelection(child, maxDepth, currentDepth + 1);
2626
+ if (childResult && childResult.success) {
2627
+ return childResult;
2628
+ }
2629
+ }
2630
+
2631
+ return null;
2632
+ }
2633
+
2634
+ // First try the target element itself
2635
+ let selectionResult = attemptSelection(startElement);
2636
+ if (selectionResult) {
2637
+ // If attemptSelection returned a result (success or failure), use it
2638
+ // Don't search children if we found a dropdown element but selection failed
2639
+ return selectionResult;
2640
+ }
2641
+
2642
+ // Only search children if target element is not a dropdown element
2643
+ selectionResult = searchChildrenForSelection(startElement, 4);
2644
+ if (selectionResult && selectionResult.success) {
2645
+ return selectionResult;
2646
+ }
2647
+
2648
+ return {
2649
+ success: false,
2650
+ error: `Element and its children (depth 4) do not contain a dropdown with option '${targetText}' (tag: ${startElement.tagName}, role: ${startElement.getAttribute('role')}, classes: ${startElement.className})`
2651
+ };
2652
+ }
2653
+ """
2654
+
2655
+ result = await cdp_session.cdp_client.send.Runtime.callFunctionOn(
2656
+ params={
2657
+ 'functionDeclaration': selection_script,
2658
+ 'arguments': [{'value': target_text}],
2659
+ 'objectId': object_id,
2660
+ 'returnByValue': True,
2661
+ },
2662
+ session_id=cdp_session.session_id,
2663
+ )
2664
+
2665
+ selection_result = result.get('result', {}).get('value', {})
2666
+
2667
+ if selection_result.get('success'):
2668
+ msg = selection_result.get('message', f'Selected option: {target_text}')
2669
+ self.logger.debug(f'{msg}')
2670
+
2671
+ # Return the result as a dict
2672
+ return {
2673
+ 'success': 'true',
2674
+ 'message': msg,
2675
+ 'value': selection_result.get('value', target_text),
2676
+ 'backend_node_id': str(index_for_logging),
2677
+ }
2678
+ else:
2679
+ error_msg = selection_result.get('error', f'Failed to select option: {target_text}')
2680
+ available_options = selection_result.get('availableOptions', [])
2681
+ self.logger.error(f'❌ {error_msg}')
2682
+ self.logger.debug(f'Available options from JavaScript: {available_options}')
2683
+
2684
+ # If we have available options, return structured error data
2685
+ if available_options:
2686
+ # Format options for short_term_memory (simple bulleted list)
2687
+ short_term_options = []
2688
+ for opt in available_options:
2689
+ if isinstance(opt, dict):
2690
+ text = opt.get('text', '').strip()
2691
+ value = opt.get('value', '').strip()
2692
+ if text:
2693
+ short_term_options.append(f'- {text}')
2694
+ elif value:
2695
+ short_term_options.append(f'- {value}')
2696
+ elif isinstance(opt, str):
2697
+ short_term_options.append(f'- {opt}')
2698
+
2699
+ if short_term_options:
2700
+ short_term_memory = 'Available dropdown options are:\n' + '\n'.join(short_term_options)
2701
+ long_term_memory = (
2702
+ f"Couldn't select the dropdown option as '{target_text}' is not one of the available options."
2703
+ )
2704
+
2705
+ # Return error result with structured memory instead of raising exception
2706
+ return {
2707
+ 'success': 'false',
2708
+ 'error': error_msg,
2709
+ 'short_term_memory': short_term_memory,
2710
+ 'long_term_memory': long_term_memory,
2711
+ 'backend_node_id': str(index_for_logging),
2712
+ }
2713
+
2714
+ # Fallback to regular error result if no available options
2715
+ return {
2716
+ 'success': 'false',
2717
+ 'error': error_msg,
2718
+ 'backend_node_id': str(index_for_logging),
2719
+ }
2720
+
2721
+ except Exception as e:
2722
+ error_msg = f'Failed to select dropdown option: {str(e)}'
2723
+ self.logger.error(error_msg)
2724
+ raise ValueError(error_msg) from e
2725
+
2726
+ except Exception as e:
2727
+ error_msg = f'Failed to select dropdown option "{target_text}" for element {index_for_logging}: {str(e)}'
2728
+ self.logger.error(error_msg)
2729
+ raise ValueError(error_msg) from e