optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,134 @@
1
+ """Mouse class for mouse operations."""
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from cdp_use.cdp.input.commands import DispatchMouseEventParameters, SynthesizeScrollGestureParameters
7
+ from cdp_use.cdp.input.types import MouseButton
8
+
9
+ from browser_use.browser.session import BrowserSession
10
+
11
+
12
+ class Mouse:
13
+ """Mouse operations for a target."""
14
+
15
+ def __init__(self, browser_session: 'BrowserSession', session_id: str | None = None, target_id: str | None = None):
16
+ self._browser_session = browser_session
17
+ self._client = browser_session.cdp_client
18
+ self._session_id = session_id
19
+ self._target_id = target_id
20
+
21
+ async def click(self, x: int, y: int, button: 'MouseButton' = 'left', click_count: int = 1) -> None:
22
+ """Click at the specified coordinates."""
23
+ # Mouse press
24
+ press_params: 'DispatchMouseEventParameters' = {
25
+ 'type': 'mousePressed',
26
+ 'x': x,
27
+ 'y': y,
28
+ 'button': button,
29
+ 'clickCount': click_count,
30
+ }
31
+ await self._client.send.Input.dispatchMouseEvent(
32
+ press_params,
33
+ session_id=self._session_id,
34
+ )
35
+
36
+ # Mouse release
37
+ release_params: 'DispatchMouseEventParameters' = {
38
+ 'type': 'mouseReleased',
39
+ 'x': x,
40
+ 'y': y,
41
+ 'button': button,
42
+ 'clickCount': click_count,
43
+ }
44
+ await self._client.send.Input.dispatchMouseEvent(
45
+ release_params,
46
+ session_id=self._session_id,
47
+ )
48
+
49
+ async def down(self, button: 'MouseButton' = 'left', click_count: int = 1) -> None:
50
+ """Press mouse button down."""
51
+ params: 'DispatchMouseEventParameters' = {
52
+ 'type': 'mousePressed',
53
+ 'x': 0, # Will use last mouse position
54
+ 'y': 0,
55
+ 'button': button,
56
+ 'clickCount': click_count,
57
+ }
58
+ await self._client.send.Input.dispatchMouseEvent(
59
+ params,
60
+ session_id=self._session_id,
61
+ )
62
+
63
+ async def up(self, button: 'MouseButton' = 'left', click_count: int = 1) -> None:
64
+ """Release mouse button."""
65
+ params: 'DispatchMouseEventParameters' = {
66
+ 'type': 'mouseReleased',
67
+ 'x': 0, # Will use last mouse position
68
+ 'y': 0,
69
+ 'button': button,
70
+ 'clickCount': click_count,
71
+ }
72
+ await self._client.send.Input.dispatchMouseEvent(
73
+ params,
74
+ session_id=self._session_id,
75
+ )
76
+
77
+ async def move(self, x: int, y: int, steps: int = 1) -> None:
78
+ """Move mouse to the specified coordinates."""
79
+ # TODO: Implement smooth movement with multiple steps if needed
80
+ _ = steps # Acknowledge parameter for future use
81
+
82
+ params: 'DispatchMouseEventParameters' = {'type': 'mouseMoved', 'x': x, 'y': y}
83
+ await self._client.send.Input.dispatchMouseEvent(params, session_id=self._session_id)
84
+
85
+ async def scroll(self, x: int = 0, y: int = 0, delta_x: int | None = None, delta_y: int | None = None) -> None:
86
+ """Scroll the page using robust CDP methods."""
87
+ if not self._session_id:
88
+ raise RuntimeError('Session ID is required for scroll operations')
89
+
90
+ # Method 1: Try mouse wheel event (most reliable)
91
+ try:
92
+ # Get viewport dimensions
93
+ layout_metrics = await self._client.send.Page.getLayoutMetrics(session_id=self._session_id)
94
+ viewport_width = layout_metrics['layoutViewport']['clientWidth']
95
+ viewport_height = layout_metrics['layoutViewport']['clientHeight']
96
+
97
+ # Use provided coordinates or center of viewport
98
+ scroll_x = x if x > 0 else viewport_width / 2
99
+ scroll_y = y if y > 0 else viewport_height / 2
100
+
101
+ # Calculate scroll deltas (positive = down/right)
102
+ scroll_delta_x = delta_x or 0
103
+ scroll_delta_y = delta_y or 0
104
+
105
+ # Dispatch mouse wheel event
106
+ await self._client.send.Input.dispatchMouseEvent(
107
+ params={
108
+ 'type': 'mouseWheel',
109
+ 'x': scroll_x,
110
+ 'y': scroll_y,
111
+ 'deltaX': scroll_delta_x,
112
+ 'deltaY': scroll_delta_y,
113
+ },
114
+ session_id=self._session_id,
115
+ )
116
+ return
117
+
118
+ except Exception:
119
+ pass
120
+
121
+ # Method 2: Fallback to synthesizeScrollGesture
122
+ try:
123
+ params: 'SynthesizeScrollGestureParameters' = {'x': x, 'y': y, 'xDistance': delta_x or 0, 'yDistance': delta_y or 0}
124
+ await self._client.send.Input.synthesizeScrollGesture(
125
+ params,
126
+ session_id=self._session_id,
127
+ )
128
+ except Exception:
129
+ # Method 3: JavaScript fallback
130
+ scroll_js = f'window.scrollBy({delta_x or 0}, {delta_y or 0})'
131
+ await self._client.send.Runtime.evaluate(
132
+ params={'expression': scroll_js, 'returnByValue': True},
133
+ session_id=self._session_id,
134
+ )
@@ -0,0 +1,561 @@
1
+ """Page class for page-level operations."""
2
+
3
+ from typing import TYPE_CHECKING, TypeVar
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from browser_use.actor.utils import get_key_info
8
+ from browser_use.dom.serializer.serializer import DOMTreeSerializer
9
+ from browser_use.dom.service import DomService
10
+ from browser_use.llm.messages import SystemMessage, UserMessage
11
+
12
+ T = TypeVar('T', bound=BaseModel)
13
+
14
+ if TYPE_CHECKING:
15
+ from cdp_use.cdp.dom.commands import (
16
+ DescribeNodeParameters,
17
+ QuerySelectorAllParameters,
18
+ )
19
+ from cdp_use.cdp.emulation.commands import SetDeviceMetricsOverrideParameters
20
+ from cdp_use.cdp.input.commands import (
21
+ DispatchKeyEventParameters,
22
+ )
23
+ from cdp_use.cdp.page.commands import CaptureScreenshotParameters, NavigateParameters, NavigateToHistoryEntryParameters
24
+ from cdp_use.cdp.runtime.commands import EvaluateParameters
25
+ from cdp_use.cdp.target.commands import (
26
+ AttachToTargetParameters,
27
+ GetTargetInfoParameters,
28
+ )
29
+ from cdp_use.cdp.target.types import TargetInfo
30
+
31
+ from browser_use.browser.session import BrowserSession
32
+ from browser_use.llm.base import BaseChatModel
33
+
34
+ from .element import Element
35
+ from .mouse import Mouse
36
+
37
+
38
+ class Page:
39
+ """Page operations (tab or iframe)."""
40
+
41
+ def __init__(
42
+ self, browser_session: 'BrowserSession', target_id: str, session_id: str | None = None, llm: 'BaseChatModel | None' = None
43
+ ):
44
+ self._browser_session = browser_session
45
+ self._client = browser_session.cdp_client
46
+ self._target_id = target_id
47
+ self._session_id: str | None = session_id
48
+ self._mouse: 'Mouse | None' = None
49
+
50
+ self._llm = llm
51
+
52
+ async def _ensure_session(self) -> str:
53
+ """Ensure we have a session ID for this target."""
54
+ if not self._session_id:
55
+ params: 'AttachToTargetParameters' = {'targetId': self._target_id, 'flatten': True}
56
+ result = await self._client.send.Target.attachToTarget(params)
57
+ self._session_id = result['sessionId']
58
+
59
+ # Enable necessary domains
60
+ import asyncio
61
+
62
+ await asyncio.gather(
63
+ self._client.send.Page.enable(session_id=self._session_id),
64
+ self._client.send.DOM.enable(session_id=self._session_id),
65
+ self._client.send.Runtime.enable(session_id=self._session_id),
66
+ self._client.send.Network.enable(session_id=self._session_id),
67
+ )
68
+
69
+ return self._session_id
70
+
71
+ @property
72
+ async def session_id(self) -> str:
73
+ """Get the session ID for this target.
74
+
75
+ @dev Pass this to an arbitrary CDP call
76
+ """
77
+ return await self._ensure_session()
78
+
79
+ @property
80
+ async def mouse(self) -> 'Mouse':
81
+ """Get the mouse interface for this target."""
82
+ if not self._mouse:
83
+ session_id = await self._ensure_session()
84
+ from .mouse import Mouse
85
+
86
+ self._mouse = Mouse(self._browser_session, session_id, self._target_id)
87
+ return self._mouse
88
+
89
+ async def reload(self) -> None:
90
+ """Reload the target."""
91
+ session_id = await self._ensure_session()
92
+ await self._client.send.Page.reload(session_id=session_id)
93
+
94
+ async def get_element(self, backend_node_id: int) -> 'Element':
95
+ """Get an element by its backend node ID."""
96
+ session_id = await self._ensure_session()
97
+
98
+ from .element import Element as Element_
99
+
100
+ return Element_(self._browser_session, backend_node_id, session_id)
101
+
102
+ async def evaluate(self, page_function: str, *args) -> str:
103
+ """Execute JavaScript in the target.
104
+
105
+ Args:
106
+ page_function: JavaScript code that MUST start with (...args) => format
107
+ *args: Arguments to pass to the function
108
+
109
+ Returns:
110
+ String representation of the JavaScript execution result.
111
+ Objects and arrays are JSON-stringified.
112
+ """
113
+ session_id = await self._ensure_session()
114
+
115
+ # Clean and fix common JavaScript string parsing issues
116
+ page_function = self._fix_javascript_string(page_function)
117
+
118
+ # Enforce arrow function format
119
+ if not (page_function.startswith('(') and '=>' in page_function):
120
+ raise ValueError(f'JavaScript code must start with (...args) => format. Got: {page_function[:50]}...')
121
+
122
+ # Build the expression - call the arrow function with provided args
123
+ if args:
124
+ # Convert args to JSON representation for safe passing
125
+ import json
126
+
127
+ arg_strs = [json.dumps(arg) for arg in args]
128
+ expression = f'({page_function})({", ".join(arg_strs)})'
129
+ else:
130
+ expression = f'({page_function})()'
131
+
132
+ # Debug: print the actual expression being evaluated
133
+ print(f'DEBUG: Evaluating JavaScript: {repr(expression)}')
134
+
135
+ params: 'EvaluateParameters' = {'expression': expression, 'returnByValue': True, 'awaitPromise': True}
136
+ result = await self._client.send.Runtime.evaluate(
137
+ params,
138
+ session_id=session_id,
139
+ )
140
+
141
+ if 'exceptionDetails' in result:
142
+ raise RuntimeError(f'JavaScript evaluation failed: {result["exceptionDetails"]}')
143
+
144
+ value = result.get('result', {}).get('value')
145
+
146
+ # Always return string representation
147
+ if value is None:
148
+ return ''
149
+ elif isinstance(value, str):
150
+ return value
151
+ else:
152
+ # Convert objects, numbers, booleans to string
153
+ import json
154
+
155
+ try:
156
+ return json.dumps(value) if isinstance(value, (dict, list)) else str(value)
157
+ except (TypeError, ValueError):
158
+ return str(value)
159
+
160
+ def _fix_javascript_string(self, js_code: str) -> str:
161
+ """Fix common JavaScript string parsing issues when written as Python string."""
162
+
163
+ # Just do minimal, safe cleaning
164
+ js_code = js_code.strip()
165
+
166
+ # Only fix the most common and safe issues:
167
+
168
+ # 1. Remove obvious Python string wrapper quotes if they exist
169
+ if (js_code.startswith('"') and js_code.endswith('"')) or (js_code.startswith("'") and js_code.endswith("'")):
170
+ # Check if it's a wrapped string (not part of JS syntax)
171
+ inner = js_code[1:-1]
172
+ if inner.count('"') + inner.count("'") == 0 or '() =>' in inner:
173
+ js_code = inner
174
+
175
+ # 2. Only fix clearly escaped quotes that shouldn't be
176
+ # But be very conservative - only if we're sure it's a Python string artifact
177
+ if '\\"' in js_code and js_code.count('\\"') > js_code.count('"'):
178
+ js_code = js_code.replace('\\"', '"')
179
+ if "\\'" in js_code and js_code.count("\\'") > js_code.count("'"):
180
+ js_code = js_code.replace("\\'", "'")
181
+
182
+ # 3. Basic whitespace normalization only
183
+ js_code = js_code.strip()
184
+
185
+ # Final validation - ensure it's not empty
186
+ if not js_code:
187
+ raise ValueError('JavaScript code is empty after cleaning')
188
+
189
+ return js_code
190
+
191
+ async def screenshot(self, format: str = 'jpeg', quality: int | None = None) -> str:
192
+ """Take a screenshot and return base64 encoded image.
193
+
194
+ Args:
195
+ format: Image format ('jpeg', 'png', 'webp')
196
+ quality: Quality 0-100 for JPEG format
197
+
198
+ Returns:
199
+ Base64-encoded image data
200
+ """
201
+ session_id = await self._ensure_session()
202
+
203
+ params: 'CaptureScreenshotParameters' = {'format': format}
204
+
205
+ if quality is not None and format.lower() == 'jpeg':
206
+ params['quality'] = quality
207
+
208
+ result = await self._client.send.Page.captureScreenshot(params, session_id=session_id)
209
+
210
+ return result['data']
211
+
212
+ async def press(self, key: str) -> None:
213
+ """Press a key on the page (sends keyboard input to the focused element or page)."""
214
+ session_id = await self._ensure_session()
215
+
216
+ # Handle key combinations like "Control+A"
217
+ if '+' in key:
218
+ parts = key.split('+')
219
+ modifiers = parts[:-1]
220
+ main_key = parts[-1]
221
+
222
+ # Calculate modifier bitmask
223
+ modifier_value = 0
224
+ modifier_map = {'Alt': 1, 'Control': 2, 'Meta': 4, 'Shift': 8}
225
+ for mod in modifiers:
226
+ modifier_value |= modifier_map.get(mod, 0)
227
+
228
+ # Press modifier keys
229
+ for mod in modifiers:
230
+ code, vk_code = get_key_info(mod)
231
+ params: 'DispatchKeyEventParameters' = {'type': 'keyDown', 'key': mod, 'code': code}
232
+ if vk_code is not None:
233
+ params['windowsVirtualKeyCode'] = vk_code
234
+ await self._client.send.Input.dispatchKeyEvent(params, session_id=session_id)
235
+
236
+ # Press main key with modifiers bitmask
237
+ main_code, main_vk_code = get_key_info(main_key)
238
+ main_down_params: 'DispatchKeyEventParameters' = {
239
+ 'type': 'keyDown',
240
+ 'key': main_key,
241
+ 'code': main_code,
242
+ 'modifiers': modifier_value,
243
+ }
244
+ if main_vk_code is not None:
245
+ main_down_params['windowsVirtualKeyCode'] = main_vk_code
246
+ await self._client.send.Input.dispatchKeyEvent(main_down_params, session_id=session_id)
247
+
248
+ main_up_params: 'DispatchKeyEventParameters' = {
249
+ 'type': 'keyUp',
250
+ 'key': main_key,
251
+ 'code': main_code,
252
+ 'modifiers': modifier_value,
253
+ }
254
+ if main_vk_code is not None:
255
+ main_up_params['windowsVirtualKeyCode'] = main_vk_code
256
+ await self._client.send.Input.dispatchKeyEvent(main_up_params, session_id=session_id)
257
+
258
+ # Release modifier keys
259
+ for mod in reversed(modifiers):
260
+ code, vk_code = get_key_info(mod)
261
+ release_params: 'DispatchKeyEventParameters' = {'type': 'keyUp', 'key': mod, 'code': code}
262
+ if vk_code is not None:
263
+ release_params['windowsVirtualKeyCode'] = vk_code
264
+ await self._client.send.Input.dispatchKeyEvent(release_params, session_id=session_id)
265
+ else:
266
+ # Simple key press
267
+ code, vk_code = get_key_info(key)
268
+ key_down_params: 'DispatchKeyEventParameters' = {'type': 'keyDown', 'key': key, 'code': code}
269
+ if vk_code is not None:
270
+ key_down_params['windowsVirtualKeyCode'] = vk_code
271
+ await self._client.send.Input.dispatchKeyEvent(key_down_params, session_id=session_id)
272
+
273
+ key_up_params: 'DispatchKeyEventParameters' = {'type': 'keyUp', 'key': key, 'code': code}
274
+ if vk_code is not None:
275
+ key_up_params['windowsVirtualKeyCode'] = vk_code
276
+ await self._client.send.Input.dispatchKeyEvent(key_up_params, session_id=session_id)
277
+
278
+ async def set_viewport_size(self, width: int, height: int) -> None:
279
+ """Set the viewport size."""
280
+ session_id = await self._ensure_session()
281
+
282
+ params: 'SetDeviceMetricsOverrideParameters' = {
283
+ 'width': width,
284
+ 'height': height,
285
+ 'deviceScaleFactor': 1.0,
286
+ 'mobile': False,
287
+ }
288
+ await self._client.send.Emulation.setDeviceMetricsOverride(
289
+ params,
290
+ session_id=session_id,
291
+ )
292
+
293
+ # Target properties (from CDP getTargetInfo)
294
+ async def get_target_info(self) -> 'TargetInfo':
295
+ """Get target information."""
296
+ params: 'GetTargetInfoParameters' = {'targetId': self._target_id}
297
+ result = await self._client.send.Target.getTargetInfo(params)
298
+ return result['targetInfo']
299
+
300
+ async def get_url(self) -> str:
301
+ """Get the current URL."""
302
+ info = await self.get_target_info()
303
+ return info.get('url', '')
304
+
305
+ async def get_title(self) -> str:
306
+ """Get the current title."""
307
+ info = await self.get_target_info()
308
+ return info.get('title', '')
309
+
310
+ async def goto(self, url: str) -> None:
311
+ """Navigate this target to a URL."""
312
+ session_id = await self._ensure_session()
313
+
314
+ params: 'NavigateParameters' = {'url': url}
315
+ await self._client.send.Page.navigate(params, session_id=session_id)
316
+
317
+ async def navigate(self, url: str) -> None:
318
+ """Alias for goto."""
319
+ await self.goto(url)
320
+
321
+ async def go_back(self) -> None:
322
+ """Navigate back in history."""
323
+ session_id = await self._ensure_session()
324
+
325
+ try:
326
+ # Get navigation history
327
+ history = await self._client.send.Page.getNavigationHistory(session_id=session_id)
328
+ current_index = history['currentIndex']
329
+ entries = history['entries']
330
+
331
+ # Check if we can go back
332
+ if current_index <= 0:
333
+ raise RuntimeError('Cannot go back - no previous entry in history')
334
+
335
+ # Navigate to the previous entry
336
+ previous_entry_id = entries[current_index - 1]['id']
337
+ params: 'NavigateToHistoryEntryParameters' = {'entryId': previous_entry_id}
338
+ await self._client.send.Page.navigateToHistoryEntry(params, session_id=session_id)
339
+
340
+ except Exception as e:
341
+ raise RuntimeError(f'Failed to navigate back: {e}')
342
+
343
+ async def go_forward(self) -> None:
344
+ """Navigate forward in history."""
345
+ session_id = await self._ensure_session()
346
+
347
+ try:
348
+ # Get navigation history
349
+ history = await self._client.send.Page.getNavigationHistory(session_id=session_id)
350
+ current_index = history['currentIndex']
351
+ entries = history['entries']
352
+
353
+ # Check if we can go forward
354
+ if current_index >= len(entries) - 1:
355
+ raise RuntimeError('Cannot go forward - no next entry in history')
356
+
357
+ # Navigate to the next entry
358
+ next_entry_id = entries[current_index + 1]['id']
359
+ params: 'NavigateToHistoryEntryParameters' = {'entryId': next_entry_id}
360
+ await self._client.send.Page.navigateToHistoryEntry(params, session_id=session_id)
361
+
362
+ except Exception as e:
363
+ raise RuntimeError(f'Failed to navigate forward: {e}')
364
+
365
+ # Element finding methods (these would need to be implemented based on DOM queries)
366
+ async def get_elements_by_css_selector(self, selector: str) -> list['Element']:
367
+ """Get elements by CSS selector."""
368
+ session_id = await self._ensure_session()
369
+
370
+ # Get document first
371
+ doc_result = await self._client.send.DOM.getDocument(session_id=session_id)
372
+ document_node_id = doc_result['root']['nodeId']
373
+
374
+ # Query selector all
375
+ query_params: 'QuerySelectorAllParameters' = {'nodeId': document_node_id, 'selector': selector}
376
+ result = await self._client.send.DOM.querySelectorAll(query_params, session_id=session_id)
377
+
378
+ elements = []
379
+ from .element import Element as Element_
380
+
381
+ # Convert node IDs to backend node IDs
382
+ for node_id in result['nodeIds']:
383
+ # Get backend node ID
384
+ describe_params: 'DescribeNodeParameters' = {'nodeId': node_id}
385
+ node_result = await self._client.send.DOM.describeNode(describe_params, session_id=session_id)
386
+ backend_node_id = node_result['node']['backendNodeId']
387
+ elements.append(Element_(self._browser_session, backend_node_id, session_id))
388
+
389
+ return elements
390
+
391
+ # AI METHODS
392
+
393
+ @property
394
+ def dom_service(self) -> 'DomService':
395
+ """Get the DOM service for this target."""
396
+ return DomService(self._browser_session)
397
+
398
+ async def get_element_by_prompt(self, prompt: str, llm: 'BaseChatModel | None' = None) -> 'Element | None':
399
+ """Get an element by a prompt."""
400
+ await self._ensure_session()
401
+ llm = llm or self._llm
402
+
403
+ if not llm:
404
+ raise ValueError('LLM not provided')
405
+
406
+ dom_service = self.dom_service
407
+
408
+ enhanced_dom_tree = await dom_service.get_dom_tree(target_id=self._target_id)
409
+
410
+ serialized_dom_state, _ = DOMTreeSerializer(
411
+ enhanced_dom_tree, None, paint_order_filtering=True
412
+ ).serialize_accessible_elements()
413
+
414
+ llm_representation = serialized_dom_state.llm_representation()
415
+
416
+ system_message = SystemMessage(
417
+ content="""You are an AI created to find an element on a page by a prompt.
418
+
419
+ <browser_state>
420
+ Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
421
+ - index: Numeric identifier for interaction
422
+ - type: HTML element type (button, input, etc.)
423
+ - text: Element description
424
+
425
+ Examples:
426
+ [33]<div>User form</div>
427
+ [35]<button aria-label='Submit form'>Submit</button>
428
+
429
+ Note that:
430
+ - Only elements with numeric indexes in [] are interactive
431
+ - (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
432
+ - Pure text elements without [] are not interactive.
433
+ </browser_state>
434
+
435
+ Your task is to find an element index (if any) that matches the prompt (written in <prompt> tag).
436
+
437
+ If non of the elements matches the, return None.
438
+
439
+ Before you return the element index, reason about the state and elements for a sentence or two."""
440
+ )
441
+
442
+ state_message = UserMessage(
443
+ content=f"""
444
+ <browser_state>
445
+ {llm_representation}
446
+ </browser_state>
447
+
448
+ <prompt>
449
+ {prompt}
450
+ </prompt>
451
+ """
452
+ )
453
+
454
+ class ElementResponse(BaseModel):
455
+ # thinking: str
456
+ element_highlight_index: int | None
457
+
458
+ llm_response = await llm.ainvoke(
459
+ [
460
+ system_message,
461
+ state_message,
462
+ ],
463
+ output_format=ElementResponse,
464
+ )
465
+
466
+ element_highlight_index = llm_response.completion.element_highlight_index
467
+
468
+ if element_highlight_index is None or element_highlight_index not in serialized_dom_state.selector_map:
469
+ return None
470
+
471
+ element = serialized_dom_state.selector_map[element_highlight_index]
472
+
473
+ from .element import Element as Element_
474
+
475
+ return Element_(self._browser_session, element.backend_node_id, self._session_id)
476
+
477
+ async def must_get_element_by_prompt(self, prompt: str, llm: 'BaseChatModel | None' = None) -> 'Element':
478
+ """Get an element by a prompt.
479
+
480
+ @dev LLM can still return None, this just raises an error if the element is not found.
481
+ """
482
+ element = await self.get_element_by_prompt(prompt, llm)
483
+ if element is None:
484
+ raise ValueError(f'No element found for prompt: {prompt}')
485
+
486
+ return element
487
+
488
+ async def extract_content(self, prompt: str, structured_output: type[T], llm: 'BaseChatModel | None' = None) -> T:
489
+ """Extract structured content from the current page using LLM.
490
+
491
+ Extracts clean markdown from the page and sends it to LLM for structured data extraction.
492
+
493
+ Args:
494
+ prompt: Description of what content to extract
495
+ structured_output: Pydantic BaseModel class defining the expected output structure
496
+ llm: Language model to use for extraction
497
+
498
+ Returns:
499
+ The structured BaseModel instance with extracted content
500
+ """
501
+ llm = llm or self._llm
502
+
503
+ if not llm:
504
+ raise ValueError('LLM not provided')
505
+
506
+ # Extract clean markdown using the same method as in tools/service.py
507
+ try:
508
+ content, content_stats = await self._extract_clean_markdown()
509
+ except Exception as e:
510
+ raise RuntimeError(f'Could not extract clean markdown: {type(e).__name__}')
511
+
512
+ # System prompt for structured extraction
513
+ system_prompt = """
514
+ You are an expert at extracting structured data from the markdown of a webpage.
515
+
516
+ <input>
517
+ You will be given a query and the markdown of a webpage that has been filtered to remove noise and advertising content.
518
+ </input>
519
+
520
+ <instructions>
521
+ - You are tasked to extract information from the webpage that is relevant to the query.
522
+ - You should ONLY use the information available in the webpage to answer the query. Do not make up information or provide guess from your own knowledge.
523
+ - If the information relevant to the query is not available in the page, your response should mention that.
524
+ - If the query asks for all items, products, etc., make sure to directly list all of them.
525
+ - Return the extracted content in the exact structured format specified.
526
+ </instructions>
527
+
528
+ <output>
529
+ - Your output should present ALL the information relevant to the query in the specified structured format.
530
+ - Do not answer in conversational format - directly output the relevant information in the structured format.
531
+ </output>
532
+ """.strip()
533
+
534
+ # Build prompt with just query and content
535
+ prompt_content = f'<query>\n{prompt}\n</query>\n\n<webpage_content>\n{content}\n</webpage_content>'
536
+
537
+ # Send to LLM with structured output
538
+ import asyncio
539
+
540
+ try:
541
+ response = await asyncio.wait_for(
542
+ llm.ainvoke(
543
+ [SystemMessage(content=system_prompt), UserMessage(content=prompt_content)], output_format=structured_output
544
+ ),
545
+ timeout=120.0,
546
+ )
547
+
548
+ # Return the structured output BaseModel instance
549
+ return response.completion
550
+ except Exception as e:
551
+ raise RuntimeError(str(e))
552
+
553
+ async def _extract_clean_markdown(self, extract_links: bool = False) -> tuple[str, dict]:
554
+ """Extract clean markdown from the current page using enhanced DOM tree.
555
+
556
+ Uses the shared markdown extractor for consistency with tools/service.py.
557
+ """
558
+ from browser_use.dom.markdown_extractor import extract_clean_markdown
559
+
560
+ dom_service = self.dom_service
561
+ return await extract_clean_markdown(dom_service=dom_service, target_id=self._target_id, extract_links=extract_links)