optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,817 @@
1
+ """DOM watchdog for browser DOM tree management using CDP."""
2
+
3
+ import asyncio
4
+ import time
5
+ from typing import TYPE_CHECKING
6
+
7
+ from browser_use.browser.events import (
8
+ BrowserErrorEvent,
9
+ BrowserStateRequestEvent,
10
+ ScreenshotEvent,
11
+ TabCreatedEvent,
12
+ )
13
+ from browser_use.browser.watchdog_base import BaseWatchdog
14
+ from browser_use.dom.service import DomService
15
+ from browser_use.dom.views import (
16
+ EnhancedDOMTreeNode,
17
+ SerializedDOMState,
18
+ )
19
+ from browser_use.observability import observe_debug
20
+ from browser_use.utils import time_execution_async
21
+
22
+ if TYPE_CHECKING:
23
+ from browser_use.browser.views import BrowserStateSummary, NetworkRequest, PageInfo, PaginationButton
24
+
25
+
26
+ class DOMWatchdog(BaseWatchdog):
27
+ """Handles DOM tree building, serialization, and element access via CDP.
28
+
29
+ This watchdog acts as a bridge between the event-driven browser session
30
+ and the DomService implementation, maintaining cached state and providing
31
+ helper methods for other watchdogs.
32
+ """
33
+
34
+ LISTENS_TO = [TabCreatedEvent, BrowserStateRequestEvent]
35
+ EMITS = [BrowserErrorEvent]
36
+
37
+ # Public properties for other watchdogs
38
+ selector_map: dict[int, EnhancedDOMTreeNode] | None = None
39
+ current_dom_state: SerializedDOMState | None = None
40
+ enhanced_dom_tree: EnhancedDOMTreeNode | None = None
41
+
42
+ # Internal DOM service
43
+ _dom_service: DomService | None = None
44
+
45
+ # Network tracking - maps request_id to (url, start_time, method, resource_type)
46
+ _pending_requests: dict[str, tuple[str, float, str, str | None]] = {}
47
+
48
+ async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
49
+ # self.logger.debug('Setting up init scripts in browser')
50
+ return None
51
+
52
+ def _get_recent_events_str(self, limit: int = 10) -> str | None:
53
+ """Get the most recent events from the event bus as JSON.
54
+
55
+ Args:
56
+ limit: Maximum number of recent events to include
57
+
58
+ Returns:
59
+ JSON string of recent events or None if not available
60
+ """
61
+ import json
62
+
63
+ try:
64
+ # Get all events from history, sorted by creation time (most recent first)
65
+ all_events = sorted(
66
+ self.browser_session.event_bus.event_history.values(), key=lambda e: e.event_created_at.timestamp(), reverse=True
67
+ )
68
+
69
+ # Take the most recent events and create JSON-serializable data
70
+ recent_events_data = []
71
+ for event in all_events[:limit]:
72
+ event_data = {
73
+ 'event_type': event.event_type,
74
+ 'timestamp': event.event_created_at.isoformat(),
75
+ }
76
+ # Add specific fields for certain event types
77
+ if hasattr(event, 'url'):
78
+ event_data['url'] = getattr(event, 'url')
79
+ if hasattr(event, 'error_message'):
80
+ event_data['error_message'] = getattr(event, 'error_message')
81
+ if hasattr(event, 'target_id'):
82
+ event_data['target_id'] = getattr(event, 'target_id')
83
+ recent_events_data.append(event_data)
84
+
85
+ return json.dumps(recent_events_data) # Return empty array if no events
86
+ except Exception as e:
87
+ self.logger.debug(f'Failed to get recent events: {e}')
88
+
89
+ return json.dumps([]) # Return empty JSON array on error
90
+
91
+ async def _get_pending_network_requests(self) -> list['NetworkRequest']:
92
+ """Get list of currently pending network requests.
93
+
94
+ Uses document.readyState and performance API to detect pending requests.
95
+ Filters out ads, tracking, and other noise.
96
+
97
+ Returns:
98
+ List of NetworkRequest objects representing currently loading resources
99
+ """
100
+ from browser_use.browser.views import NetworkRequest
101
+
102
+ try:
103
+ if not self.browser_session.agent_focus:
104
+ return []
105
+
106
+ cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
107
+
108
+ # Use performance API to get pending requests
109
+ js_code = """
110
+ (function() {
111
+ const now = performance.now();
112
+ const resources = performance.getEntriesByType('resource');
113
+ const pending = [];
114
+
115
+ // Check document readyState
116
+ const docLoading = document.readyState !== 'complete';
117
+
118
+ // Common ad/tracking domains and patterns to filter out
119
+ const adDomains = [
120
+ // Standard ad/tracking networks
121
+ 'doubleclick.net', 'googlesyndication.com', 'googletagmanager.com',
122
+ 'facebook.net', 'analytics', 'ads', 'tracking', 'pixel',
123
+ 'hotjar.com', 'clarity.ms', 'mixpanel.com', 'segment.com',
124
+ // Analytics platforms
125
+ 'demdex.net', 'omtrdc.net', 'adobedtm.com', 'ensighten.com',
126
+ 'newrelic.com', 'nr-data.net', 'google-analytics.com',
127
+ // Social media trackers
128
+ 'connect.facebook.net', 'platform.twitter.com', 'platform.linkedin.com',
129
+ // CDN/image hosts (usually not critical for functionality)
130
+ '.cloudfront.net/image/', '.akamaized.net/image/',
131
+ // Common tracking paths
132
+ '/tracker/', '/collector/', '/beacon/', '/telemetry/', '/log/',
133
+ '/events/', '/eventBatch', '/track.', '/metrics/'
134
+ ];
135
+
136
+ // Get resources that are still loading (responseEnd is 0)
137
+ let totalResourcesChecked = 0;
138
+ let filteredByResponseEnd = 0;
139
+ const allDomains = new Set();
140
+
141
+ for (const entry of resources) {
142
+ totalResourcesChecked++;
143
+
144
+ // Track all domains from recent resources (for logging)
145
+ try {
146
+ const hostname = new URL(entry.name).hostname;
147
+ if (hostname) allDomains.add(hostname);
148
+ } catch (e) {}
149
+
150
+ if (entry.responseEnd === 0) {
151
+ filteredByResponseEnd++;
152
+ const url = entry.name;
153
+
154
+ // Filter out ads and tracking
155
+ const isAd = adDomains.some(domain => url.includes(domain));
156
+ if (isAd) continue;
157
+
158
+ // Filter out data: URLs and very long URLs (often inline resources)
159
+ if (url.startsWith('data:') || url.length > 500) continue;
160
+
161
+ const loadingDuration = now - entry.startTime;
162
+
163
+ // Skip requests that have been loading for >10 seconds (likely stuck/polling)
164
+ if (loadingDuration > 10000) continue;
165
+
166
+ const resourceType = entry.initiatorType || 'unknown';
167
+
168
+ // Filter out non-critical resources (images, fonts, icons) if loading >3 seconds
169
+ const nonCriticalTypes = ['img', 'image', 'icon', 'font'];
170
+ if (nonCriticalTypes.includes(resourceType) && loadingDuration > 3000) continue;
171
+
172
+ // Filter out image URLs even if type is unknown
173
+ const isImageUrl = /\\.(jpg|jpeg|png|gif|webp|svg|ico)(\\?|$)/i.test(url);
174
+ if (isImageUrl && loadingDuration > 3000) continue;
175
+
176
+ pending.push({
177
+ url: url,
178
+ method: 'GET',
179
+ loading_duration_ms: Math.round(loadingDuration),
180
+ resource_type: resourceType
181
+ });
182
+ }
183
+ }
184
+
185
+ return {
186
+ pending_requests: pending,
187
+ document_loading: docLoading,
188
+ document_ready_state: document.readyState,
189
+ debug: {
190
+ total_resources: totalResourcesChecked,
191
+ with_response_end_zero: filteredByResponseEnd,
192
+ after_all_filters: pending.length,
193
+ all_domains: Array.from(allDomains)
194
+ }
195
+ };
196
+ })()
197
+ """
198
+
199
+ result = await cdp_session.cdp_client.send.Runtime.evaluate(
200
+ params={'expression': js_code, 'returnByValue': True}, session_id=cdp_session.session_id
201
+ )
202
+
203
+ if result.get('result', {}).get('type') == 'object':
204
+ data = result['result'].get('value', {})
205
+ pending = data.get('pending_requests', [])
206
+ doc_state = data.get('document_ready_state', 'unknown')
207
+ doc_loading = data.get('document_loading', False)
208
+ debug_info = data.get('debug', {})
209
+
210
+ # Get all domains that had recent activity (from JS)
211
+ all_domains = debug_info.get('all_domains', [])
212
+ all_domains_str = ', '.join(sorted(all_domains)[:5]) if all_domains else 'none'
213
+ if len(all_domains) > 5:
214
+ all_domains_str += f' +{len(all_domains) - 5} more'
215
+
216
+ # Debug logging
217
+ self.logger.debug(
218
+ f'🔍 Network check: document.readyState={doc_state}, loading={doc_loading}, '
219
+ f'total_resources={debug_info.get("total_resources", 0)}, '
220
+ f'responseEnd=0: {debug_info.get("with_response_end_zero", 0)}, '
221
+ f'after_filters={len(pending)}, domains=[{all_domains_str}]'
222
+ )
223
+
224
+ # Convert to NetworkRequest objects
225
+ network_requests = []
226
+ for req in pending[:20]: # Limit to 20 to avoid overwhelming the context
227
+ network_requests.append(
228
+ NetworkRequest(
229
+ url=req['url'],
230
+ method=req.get('method', 'GET'),
231
+ loading_duration_ms=req.get('loading_duration_ms', 0.0),
232
+ resource_type=req.get('resource_type'),
233
+ )
234
+ )
235
+
236
+ return network_requests
237
+
238
+ except Exception as e:
239
+ self.logger.debug(f'Failed to get pending network requests: {e}')
240
+
241
+ return []
242
+
243
+ @observe_debug(ignore_input=True, ignore_output=True, name='browser_state_request_event')
244
+ async def on_BrowserStateRequestEvent(self, event: BrowserStateRequestEvent) -> 'BrowserStateSummary':
245
+ """Handle browser state request by coordinating DOM building and screenshot capture.
246
+
247
+ This is the main entry point for getting the complete browser state.
248
+
249
+ Args:
250
+ event: The browser state request event with options
251
+
252
+ Returns:
253
+ Complete BrowserStateSummary with DOM, screenshot, and target info
254
+ """
255
+ from browser_use.browser.views import BrowserStateSummary, PageInfo
256
+
257
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: STARTING browser state request')
258
+ page_url = await self.browser_session.get_current_page_url()
259
+ self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page URL: {page_url}')
260
+ if self.browser_session.agent_focus:
261
+ self.logger.debug(
262
+ f'Current page URL: {page_url}, target_id: {self.browser_session.agent_focus.target_id}, session_id: {self.browser_session.agent_focus.session_id}'
263
+ )
264
+ else:
265
+ self.logger.debug(f'Current page URL: {page_url}, no cdp_session attached')
266
+
267
+ # check if we should skip DOM tree build for pointless pages
268
+ not_a_meaningful_website = page_url.lower().split(':', 1)[0] not in ('http', 'https')
269
+
270
+ # Check for pending network requests BEFORE waiting (so we can see what's loading)
271
+ pending_requests_before_wait = []
272
+ if not not_a_meaningful_website:
273
+ try:
274
+ pending_requests_before_wait = await self._get_pending_network_requests()
275
+ if pending_requests_before_wait:
276
+ self.logger.debug(f'🔍 Found {len(pending_requests_before_wait)} pending requests before stability wait')
277
+ except Exception as e:
278
+ self.logger.debug(f'Failed to get pending requests before wait: {e}')
279
+ pending_requests = pending_requests_before_wait
280
+ # Wait for page stability using browser profile settings (main branch pattern)
281
+ if not not_a_meaningful_website:
282
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ⏳ Waiting for page stability...')
283
+ try:
284
+ if pending_requests_before_wait:
285
+ await asyncio.sleep(1)
286
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Page stability complete')
287
+ except Exception as e:
288
+ self.logger.warning(
289
+ f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Network waiting failed: {e}, continuing anyway...'
290
+ )
291
+
292
+ # Get tabs info once at the beginning for all paths
293
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting tabs info...')
294
+ tabs_info = await self.browser_session.get_tabs()
295
+ self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got {len(tabs_info)} tabs')
296
+ self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Tabs info: {tabs_info}')
297
+
298
+ # Get viewport / scroll position info, remember changing scroll position should invalidate selector_map cache because it only includes visible elements
299
+ # cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
300
+ # scroll_info = await cdp_session.cdp_client.send.Runtime.evaluate(
301
+ # params={'expression': 'JSON.stringify({y: document.body.scrollTop, x: document.body.scrollLeft, width: document.documentElement.clientWidth, height: document.documentElement.clientHeight})'},
302
+ # session_id=cdp_session.session_id,
303
+ # )
304
+ # self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got scroll info: {scroll_info["result"]}')
305
+
306
+ try:
307
+ # Fast path for empty pages
308
+ if not_a_meaningful_website:
309
+ self.logger.debug(f'⚡ Skipping BuildDOMTree for empty target: {page_url}')
310
+ self.logger.debug(f'📸 Not taking screenshot for empty page: {page_url} (non-http/https URL)')
311
+
312
+ # Create minimal DOM state
313
+ content = SerializedDOMState(_root=None, selector_map={})
314
+
315
+ # Skip screenshot for empty pages
316
+ screenshot_b64 = None
317
+
318
+ # Try to get page info from CDP, fall back to defaults if unavailable
319
+ try:
320
+ page_info = await self._get_page_info()
321
+ except Exception as e:
322
+ self.logger.debug(f'Failed to get page info from CDP for empty page: {e}, using fallback')
323
+ # Use default viewport dimensions
324
+ viewport = self.browser_session.browser_profile.viewport or {'width': 1280, 'height': 720}
325
+ page_info = PageInfo(
326
+ viewport_width=viewport['width'],
327
+ viewport_height=viewport['height'],
328
+ page_width=viewport['width'],
329
+ page_height=viewport['height'],
330
+ scroll_x=0,
331
+ scroll_y=0,
332
+ pixels_above=0,
333
+ pixels_below=0,
334
+ pixels_left=0,
335
+ pixels_right=0,
336
+ )
337
+
338
+ return BrowserStateSummary(
339
+ dom_state=content,
340
+ url=page_url,
341
+ title='Empty Tab',
342
+ tabs=tabs_info,
343
+ screenshot=screenshot_b64,
344
+ page_info=page_info,
345
+ pixels_above=0,
346
+ pixels_below=0,
347
+ browser_errors=[],
348
+ is_pdf_viewer=False,
349
+ recent_events=self._get_recent_events_str() if event.include_recent_events else None,
350
+ pending_network_requests=[], # Empty page has no pending requests
351
+ pagination_buttons=[], # Empty page has no pagination
352
+ closed_popup_messages=self.browser_session._closed_popup_messages.copy(),
353
+ )
354
+
355
+ # Execute DOM building and screenshot capture in parallel
356
+ dom_task = None
357
+ screenshot_task = None
358
+
359
+ # Start DOM building task if requested
360
+ if event.include_dom:
361
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🌳 Starting DOM tree build task...')
362
+
363
+ previous_state = (
364
+ self.browser_session._cached_browser_state_summary.dom_state
365
+ if self.browser_session._cached_browser_state_summary
366
+ else None
367
+ )
368
+
369
+ dom_task = asyncio.create_task(self._build_dom_tree_without_highlights(previous_state))
370
+
371
+ # Start clean screenshot task if requested (without JS highlights)
372
+ if event.include_screenshot:
373
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Starting clean screenshot task...')
374
+ screenshot_task = asyncio.create_task(self._capture_clean_screenshot())
375
+
376
+ # Wait for both tasks to complete
377
+ content = None
378
+ screenshot_b64 = None
379
+
380
+ if dom_task:
381
+ try:
382
+ content = await dom_task
383
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ DOM tree build completed')
384
+ except Exception as e:
385
+ self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: DOM build failed: {e}, using minimal state')
386
+ content = SerializedDOMState(_root=None, selector_map={})
387
+ else:
388
+ content = SerializedDOMState(_root=None, selector_map={})
389
+
390
+ if screenshot_task:
391
+ try:
392
+ screenshot_b64 = await screenshot_task
393
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Clean screenshot captured')
394
+ except Exception as e:
395
+ self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Clean screenshot failed: {e}')
396
+ screenshot_b64 = None
397
+
398
+ # Apply Python-based highlighting if both DOM and screenshot are available
399
+ # COMMENTED OUT: Removes highlight numbers from screenshots for code-use mode
400
+ if (
401
+ False
402
+ and screenshot_b64
403
+ and content
404
+ and content.selector_map
405
+ and self.browser_session.browser_profile.highlight_elements
406
+ ):
407
+ try:
408
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🎨 Applying Python-based highlighting...')
409
+ from browser_use.browser.python_highlights import create_highlighted_screenshot_async
410
+
411
+ # Get CDP session for viewport info
412
+ cdp_session = await self.browser_session.get_or_create_cdp_session()
413
+ start = time.time()
414
+ screenshot_b64 = await create_highlighted_screenshot_async(
415
+ screenshot_b64,
416
+ content.selector_map,
417
+ cdp_session,
418
+ self.browser_session.browser_profile.filter_highlight_ids,
419
+ )
420
+ self.logger.debug(
421
+ f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Applied highlights to {len(content.selector_map)} elements in {time.time() - start:.2f}s'
422
+ )
423
+ except Exception as e:
424
+ self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Python highlighting failed: {e}')
425
+
426
+ # Add browser-side highlights for user visibility
427
+ if content and content.selector_map and self.browser_session.browser_profile.dom_highlight_elements:
428
+ try:
429
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🎨 Adding browser-side highlights...')
430
+ await self.browser_session.add_highlights(content.selector_map)
431
+ self.logger.debug(
432
+ f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Added browser highlights for {len(content.selector_map)} elements'
433
+ )
434
+ except Exception as e:
435
+ self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Browser highlighting failed: {e}')
436
+
437
+ # Ensure we have valid content
438
+ if not content:
439
+ content = SerializedDOMState(_root=None, selector_map={})
440
+
441
+ # Tabs info already fetched at the beginning
442
+
443
+ # Get target title safely
444
+ try:
445
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page title...')
446
+ title = await asyncio.wait_for(self.browser_session.get_current_page_title(), timeout=1.0)
447
+ self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got title: {title}')
448
+ except Exception as e:
449
+ self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Failed to get title: {e}')
450
+ title = 'Page'
451
+
452
+ # Get comprehensive page info from CDP with timeout
453
+ try:
454
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page info from CDP...')
455
+ page_info = await asyncio.wait_for(self._get_page_info(), timeout=1.0)
456
+ self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page info from CDP: {page_info}')
457
+ except Exception as e:
458
+ self.logger.debug(
459
+ f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Failed to get page info from CDP: {e}, using fallback'
460
+ )
461
+ # Fallback to default viewport dimensions
462
+ viewport = self.browser_session.browser_profile.viewport or {'width': 1280, 'height': 720}
463
+ page_info = PageInfo(
464
+ viewport_width=viewport['width'],
465
+ viewport_height=viewport['height'],
466
+ page_width=viewport['width'],
467
+ page_height=viewport['height'],
468
+ scroll_x=0,
469
+ scroll_y=0,
470
+ pixels_above=0,
471
+ pixels_below=0,
472
+ pixels_left=0,
473
+ pixels_right=0,
474
+ )
475
+
476
+ # Check for PDF viewer
477
+ is_pdf_viewer = page_url.endswith('.pdf') or '/pdf/' in page_url
478
+
479
+ # Detect pagination buttons from the DOM
480
+ pagination_buttons_data = []
481
+ if content and content.selector_map:
482
+ pagination_buttons_data = self._detect_pagination_buttons(content.selector_map)
483
+
484
+ # Build and cache the browser state summary
485
+ if screenshot_b64:
486
+ self.logger.debug(
487
+ f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Creating BrowserStateSummary with screenshot, length: {len(screenshot_b64)}'
488
+ )
489
+ else:
490
+ self.logger.debug(
491
+ '🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Creating BrowserStateSummary WITHOUT screenshot'
492
+ )
493
+
494
+ browser_state = BrowserStateSummary(
495
+ dom_state=content,
496
+ url=page_url,
497
+ title=title,
498
+ tabs=tabs_info,
499
+ screenshot=screenshot_b64,
500
+ page_info=page_info,
501
+ pixels_above=0,
502
+ pixels_below=0,
503
+ browser_errors=[],
504
+ is_pdf_viewer=is_pdf_viewer,
505
+ recent_events=self._get_recent_events_str() if event.include_recent_events else None,
506
+ pending_network_requests=pending_requests,
507
+ pagination_buttons=pagination_buttons_data,
508
+ closed_popup_messages=self.browser_session._closed_popup_messages.copy(),
509
+ )
510
+
511
+ # Cache the state
512
+ self.browser_session._cached_browser_state_summary = browser_state
513
+
514
+ self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ COMPLETED - Returning browser state')
515
+ return browser_state
516
+
517
+ except Exception as e:
518
+ self.logger.error(f'Failed to get browser state: {e}')
519
+
520
+ # Return minimal recovery state
521
+ return BrowserStateSummary(
522
+ dom_state=SerializedDOMState(_root=None, selector_map={}),
523
+ url=page_url if 'page_url' in locals() else '',
524
+ title='Error',
525
+ tabs=[],
526
+ screenshot=None,
527
+ page_info=PageInfo(
528
+ viewport_width=1280,
529
+ viewport_height=720,
530
+ page_width=1280,
531
+ page_height=720,
532
+ scroll_x=0,
533
+ scroll_y=0,
534
+ pixels_above=0,
535
+ pixels_below=0,
536
+ pixels_left=0,
537
+ pixels_right=0,
538
+ ),
539
+ pixels_above=0,
540
+ pixels_below=0,
541
+ browser_errors=[str(e)],
542
+ is_pdf_viewer=False,
543
+ recent_events=None,
544
+ pending_network_requests=[], # Error state has no pending requests
545
+ pagination_buttons=[], # Error state has no pagination
546
+ closed_popup_messages=self.browser_session._closed_popup_messages.copy()
547
+ if hasattr(self, 'browser_session') and self.browser_session is not None
548
+ else [],
549
+ )
550
+
551
+ @time_execution_async('build_dom_tree_without_highlights')
552
+ @observe_debug(ignore_input=True, ignore_output=True, name='build_dom_tree_without_highlights')
553
+ async def _build_dom_tree_without_highlights(self, previous_state: SerializedDOMState | None = None) -> SerializedDOMState:
554
+ """Build DOM tree without injecting JavaScript highlights (for parallel execution)."""
555
+ try:
556
+ self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: STARTING DOM tree build')
557
+
558
+ # Create or reuse DOM service
559
+ if self._dom_service is None:
560
+ self._dom_service = DomService(
561
+ browser_session=self.browser_session,
562
+ logger=self.logger,
563
+ cross_origin_iframes=self.browser_session.browser_profile.cross_origin_iframes,
564
+ paint_order_filtering=self.browser_session.browser_profile.paint_order_filtering,
565
+ max_iframes=self.browser_session.browser_profile.max_iframes,
566
+ max_iframe_depth=self.browser_session.browser_profile.max_iframe_depth,
567
+ )
568
+
569
+ # Get serialized DOM tree using the service
570
+ self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: Calling DomService.get_serialized_dom_tree...')
571
+ start = time.time()
572
+ self.current_dom_state, self.enhanced_dom_tree, timing_info = await self._dom_service.get_serialized_dom_tree(
573
+ previous_cached_state=previous_state,
574
+ )
575
+ end = time.time()
576
+ self.logger.debug(
577
+ '🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ DomService.get_serialized_dom_tree completed'
578
+ )
579
+
580
+ self.logger.debug(f'Time taken to get DOM tree: {end - start} seconds')
581
+ self.logger.debug(f'Timing breakdown: {timing_info}')
582
+
583
+ # Update selector map for other watchdogs
584
+ self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: Updating selector maps...')
585
+ self.selector_map = self.current_dom_state.selector_map
586
+ # Update BrowserSession's cached selector map
587
+ if self.browser_session:
588
+ self.browser_session.update_cached_selector_map(self.selector_map)
589
+ self.logger.debug(
590
+ f'🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ Selector maps updated, {len(self.selector_map)} elements'
591
+ )
592
+
593
+ # Skip JavaScript highlighting injection - Python highlighting will be applied later
594
+ self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ COMPLETED DOM tree build (no JS highlights)')
595
+ return self.current_dom_state
596
+
597
+ except Exception as e:
598
+ self.logger.error(f'Failed to build DOM tree without highlights: {e}')
599
+ self.event_bus.dispatch(
600
+ BrowserErrorEvent(
601
+ error_type='DOMBuildFailed',
602
+ message=str(e),
603
+ )
604
+ )
605
+ raise
606
+
607
+ @time_execution_async('capture_clean_screenshot')
608
+ @observe_debug(ignore_input=True, ignore_output=True, name='capture_clean_screenshot')
609
+ async def _capture_clean_screenshot(self) -> str:
610
+ """Capture a clean screenshot without JavaScript highlights."""
611
+ try:
612
+ self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: Capturing clean screenshot...')
613
+
614
+ # Ensure we have a focused CDP session
615
+ assert self.browser_session.agent_focus is not None, 'No current target ID'
616
+ await self.browser_session.get_or_create_cdp_session(target_id=self.browser_session.agent_focus.target_id, focus=True)
617
+
618
+ # Check if handler is registered
619
+ handlers = self.event_bus.handlers.get('ScreenshotEvent', [])
620
+ handler_names = [getattr(h, '__name__', str(h)) for h in handlers]
621
+ self.logger.debug(f'📸 ScreenshotEvent handlers registered: {len(handlers)} - {handler_names}')
622
+
623
+ screenshot_event = self.event_bus.dispatch(ScreenshotEvent(full_page=False))
624
+ self.logger.debug('📸 Dispatched ScreenshotEvent, waiting for event to complete...')
625
+
626
+ # Wait for the event itself to complete (this waits for all handlers)
627
+ await screenshot_event
628
+
629
+ # Get the single handler result
630
+ screenshot_b64 = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True)
631
+ if screenshot_b64 is None:
632
+ raise RuntimeError('Screenshot handler returned None')
633
+ self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: ✅ Clean screenshot captured successfully')
634
+ return str(screenshot_b64)
635
+
636
+ except TimeoutError:
637
+ self.logger.warning('📸 Clean screenshot timed out after 6 seconds - no handler registered or slow page?')
638
+ raise
639
+ except Exception as e:
640
+ self.logger.warning(f'📸 Clean screenshot failed: {type(e).__name__}: {e}')
641
+ raise
642
+
643
+ async def _wait_for_stable_network(self):
644
+ """Wait for page stability - simplified for CDP-only branch."""
645
+ start_time = time.time()
646
+
647
+ # Apply minimum wait time first (let page settle)
648
+ min_wait = self.browser_session.browser_profile.minimum_wait_page_load_time
649
+ if min_wait > 0:
650
+ self.logger.debug(f'⏳ Minimum wait: {min_wait}s')
651
+ await asyncio.sleep(min_wait)
652
+
653
+ # Apply network idle wait time (for dynamic content like iframes)
654
+ network_idle_wait = self.browser_session.browser_profile.wait_for_network_idle_page_load_time
655
+ if network_idle_wait > 0:
656
+ self.logger.debug(f'⏳ Network idle wait: {network_idle_wait}s')
657
+ await asyncio.sleep(network_idle_wait)
658
+
659
+ elapsed = time.time() - start_time
660
+ self.logger.debug(f'✅ Page stability wait completed in {elapsed:.2f}s')
661
+
662
+ def _detect_pagination_buttons(self, selector_map: dict[int, EnhancedDOMTreeNode]) -> list['PaginationButton']:
663
+ """Detect pagination buttons from the DOM selector map.
664
+
665
+ Args:
666
+ selector_map: Dictionary mapping element indices to DOM tree nodes
667
+
668
+ Returns:
669
+ List of PaginationButton instances found in the DOM
670
+ """
671
+ from browser_use.browser.views import PaginationButton
672
+
673
+ pagination_buttons_data = []
674
+ try:
675
+ self.logger.debug('🔍 DOMWatchdog._detect_pagination_buttons: Detecting pagination buttons...')
676
+ pagination_buttons_raw = DomService.detect_pagination_buttons(selector_map)
677
+ # Convert to PaginationButton instances
678
+ pagination_buttons_data = [
679
+ PaginationButton(
680
+ button_type=btn['button_type'], # type: ignore
681
+ backend_node_id=btn['backend_node_id'], # type: ignore
682
+ text=btn['text'], # type: ignore
683
+ selector=btn['selector'], # type: ignore
684
+ is_disabled=btn['is_disabled'], # type: ignore
685
+ )
686
+ for btn in pagination_buttons_raw
687
+ ]
688
+ if pagination_buttons_data:
689
+ self.logger.debug(
690
+ f'🔍 DOMWatchdog._detect_pagination_buttons: Found {len(pagination_buttons_data)} pagination buttons'
691
+ )
692
+ except Exception as e:
693
+ self.logger.warning(f'🔍 DOMWatchdog._detect_pagination_buttons: Pagination detection failed: {e}')
694
+
695
+ return pagination_buttons_data
696
+
697
+ async def _get_page_info(self) -> 'PageInfo':
698
+ """Get comprehensive page information using a single CDP call.
699
+
700
+ TODO: should we make this an event as well?
701
+
702
+ Returns:
703
+ PageInfo with all viewport, page dimensions, and scroll information
704
+ """
705
+
706
+ from browser_use.browser.views import PageInfo
707
+
708
+ # Get CDP session for the current target
709
+ if not self.browser_session.agent_focus:
710
+ raise RuntimeError('No active CDP session - browser may not be connected yet')
711
+
712
+ cdp_session = await self.browser_session.get_or_create_cdp_session(
713
+ target_id=self.browser_session.agent_focus.target_id, focus=True
714
+ )
715
+
716
+ # Get layout metrics which includes all the information we need
717
+ metrics = await asyncio.wait_for(
718
+ cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id), timeout=10.0
719
+ )
720
+
721
+ # Extract different viewport types
722
+ layout_viewport = metrics.get('layoutViewport', {})
723
+ visual_viewport = metrics.get('visualViewport', {})
724
+ css_visual_viewport = metrics.get('cssVisualViewport', {})
725
+ css_layout_viewport = metrics.get('cssLayoutViewport', {})
726
+ content_size = metrics.get('contentSize', {})
727
+
728
+ # Calculate device pixel ratio to convert between device pixels and CSS pixels
729
+ # This matches the approach in dom/service.py _get_viewport_ratio method
730
+ css_width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1280.0))
731
+ device_width = visual_viewport.get('clientWidth', css_width)
732
+ device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
733
+
734
+ # For viewport dimensions, use CSS pixels (what JavaScript sees)
735
+ # Prioritize CSS layout viewport, then fall back to layout viewport
736
+ viewport_width = int(css_layout_viewport.get('clientWidth') or layout_viewport.get('clientWidth', 1280))
737
+ viewport_height = int(css_layout_viewport.get('clientHeight') or layout_viewport.get('clientHeight', 720))
738
+
739
+ # For total page dimensions, content size is typically in device pixels, so convert to CSS pixels
740
+ # by dividing by device pixel ratio
741
+ raw_page_width = content_size.get('width', viewport_width * device_pixel_ratio)
742
+ raw_page_height = content_size.get('height', viewport_height * device_pixel_ratio)
743
+ page_width = int(raw_page_width / device_pixel_ratio)
744
+ page_height = int(raw_page_height / device_pixel_ratio)
745
+
746
+ # For scroll position, use CSS visual viewport if available, otherwise CSS layout viewport
747
+ # These should already be in CSS pixels
748
+ scroll_x = int(css_visual_viewport.get('pageX') or css_layout_viewport.get('pageX', 0))
749
+ scroll_y = int(css_visual_viewport.get('pageY') or css_layout_viewport.get('pageY', 0))
750
+
751
+ # Calculate scroll information - pixels that are above/below/left/right of current viewport
752
+ pixels_above = scroll_y
753
+ pixels_below = max(0, page_height - viewport_height - scroll_y)
754
+ pixels_left = scroll_x
755
+ pixels_right = max(0, page_width - viewport_width - scroll_x)
756
+
757
+ page_info = PageInfo(
758
+ viewport_width=viewport_width,
759
+ viewport_height=viewport_height,
760
+ page_width=page_width,
761
+ page_height=page_height,
762
+ scroll_x=scroll_x,
763
+ scroll_y=scroll_y,
764
+ pixels_above=pixels_above,
765
+ pixels_below=pixels_below,
766
+ pixels_left=pixels_left,
767
+ pixels_right=pixels_right,
768
+ )
769
+
770
+ return page_info
771
+
772
+ # ========== Public Helper Methods ==========
773
+
774
+ async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
775
+ """Get DOM element by index from cached selector map.
776
+
777
+ Builds DOM if not cached.
778
+
779
+ Returns:
780
+ EnhancedDOMTreeNode or None if index not found
781
+ """
782
+ if not self.selector_map:
783
+ # Build DOM if not cached
784
+ await self._build_dom_tree_without_highlights()
785
+
786
+ return self.selector_map.get(index) if self.selector_map else None
787
+
788
+ def clear_cache(self) -> None:
789
+ """Clear cached DOM state to force rebuild on next access."""
790
+ self.selector_map = None
791
+ self.current_dom_state = None
792
+ self.enhanced_dom_tree = None
793
+ # Keep the DOM service instance to reuse its CDP client connection
794
+
795
+ def is_file_input(self, element: EnhancedDOMTreeNode) -> bool:
796
+ """Check if element is a file input."""
797
+ return element.node_name.upper() == 'INPUT' and element.attributes.get('type', '').lower() == 'file'
798
+
799
+ @staticmethod
800
+ def is_element_visible_according_to_all_parents(node: EnhancedDOMTreeNode, html_frames: list[EnhancedDOMTreeNode]) -> bool:
801
+ """Check if the element is visible according to all its parent HTML frames.
802
+
803
+ Delegates to the DomService static method.
804
+ """
805
+ return DomService.is_element_visible_according_to_all_parents(node, html_frames)
806
+
807
+ async def __aexit__(self, exc_type, exc_value, traceback):
808
+ """Clean up DOM service on exit."""
809
+ if self._dom_service:
810
+ await self._dom_service.__aexit__(exc_type, exc_value, traceback)
811
+ self._dom_service = None
812
+
813
+ def __del__(self):
814
+ """Clean up DOM service on deletion."""
815
+ super().__del__()
816
+ # DOM service will clean up its own CDP client
817
+ self._dom_service = None