optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Mouse class for mouse operations."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from cdp_use.cdp.input.commands import DispatchMouseEventParameters, SynthesizeScrollGestureParameters
|
|
7
|
+
from cdp_use.cdp.input.types import MouseButton
|
|
8
|
+
|
|
9
|
+
from browser_use.browser.session import BrowserSession
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Mouse:
|
|
13
|
+
"""Mouse operations for a target."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, browser_session: 'BrowserSession', session_id: str | None = None, target_id: str | None = None):
|
|
16
|
+
self._browser_session = browser_session
|
|
17
|
+
self._client = browser_session.cdp_client
|
|
18
|
+
self._session_id = session_id
|
|
19
|
+
self._target_id = target_id
|
|
20
|
+
|
|
21
|
+
async def click(self, x: int, y: int, button: 'MouseButton' = 'left', click_count: int = 1) -> None:
|
|
22
|
+
"""Click at the specified coordinates."""
|
|
23
|
+
# Mouse press
|
|
24
|
+
press_params: 'DispatchMouseEventParameters' = {
|
|
25
|
+
'type': 'mousePressed',
|
|
26
|
+
'x': x,
|
|
27
|
+
'y': y,
|
|
28
|
+
'button': button,
|
|
29
|
+
'clickCount': click_count,
|
|
30
|
+
}
|
|
31
|
+
await self._client.send.Input.dispatchMouseEvent(
|
|
32
|
+
press_params,
|
|
33
|
+
session_id=self._session_id,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Mouse release
|
|
37
|
+
release_params: 'DispatchMouseEventParameters' = {
|
|
38
|
+
'type': 'mouseReleased',
|
|
39
|
+
'x': x,
|
|
40
|
+
'y': y,
|
|
41
|
+
'button': button,
|
|
42
|
+
'clickCount': click_count,
|
|
43
|
+
}
|
|
44
|
+
await self._client.send.Input.dispatchMouseEvent(
|
|
45
|
+
release_params,
|
|
46
|
+
session_id=self._session_id,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
async def down(self, button: 'MouseButton' = 'left', click_count: int = 1) -> None:
|
|
50
|
+
"""Press mouse button down."""
|
|
51
|
+
params: 'DispatchMouseEventParameters' = {
|
|
52
|
+
'type': 'mousePressed',
|
|
53
|
+
'x': 0, # Will use last mouse position
|
|
54
|
+
'y': 0,
|
|
55
|
+
'button': button,
|
|
56
|
+
'clickCount': click_count,
|
|
57
|
+
}
|
|
58
|
+
await self._client.send.Input.dispatchMouseEvent(
|
|
59
|
+
params,
|
|
60
|
+
session_id=self._session_id,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
async def up(self, button: 'MouseButton' = 'left', click_count: int = 1) -> None:
|
|
64
|
+
"""Release mouse button."""
|
|
65
|
+
params: 'DispatchMouseEventParameters' = {
|
|
66
|
+
'type': 'mouseReleased',
|
|
67
|
+
'x': 0, # Will use last mouse position
|
|
68
|
+
'y': 0,
|
|
69
|
+
'button': button,
|
|
70
|
+
'clickCount': click_count,
|
|
71
|
+
}
|
|
72
|
+
await self._client.send.Input.dispatchMouseEvent(
|
|
73
|
+
params,
|
|
74
|
+
session_id=self._session_id,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
async def move(self, x: int, y: int, steps: int = 1) -> None:
|
|
78
|
+
"""Move mouse to the specified coordinates."""
|
|
79
|
+
# TODO: Implement smooth movement with multiple steps if needed
|
|
80
|
+
_ = steps # Acknowledge parameter for future use
|
|
81
|
+
|
|
82
|
+
params: 'DispatchMouseEventParameters' = {'type': 'mouseMoved', 'x': x, 'y': y}
|
|
83
|
+
await self._client.send.Input.dispatchMouseEvent(params, session_id=self._session_id)
|
|
84
|
+
|
|
85
|
+
async def scroll(self, x: int = 0, y: int = 0, delta_x: int | None = None, delta_y: int | None = None) -> None:
|
|
86
|
+
"""Scroll the page using robust CDP methods."""
|
|
87
|
+
if not self._session_id:
|
|
88
|
+
raise RuntimeError('Session ID is required for scroll operations')
|
|
89
|
+
|
|
90
|
+
# Method 1: Try mouse wheel event (most reliable)
|
|
91
|
+
try:
|
|
92
|
+
# Get viewport dimensions
|
|
93
|
+
layout_metrics = await self._client.send.Page.getLayoutMetrics(session_id=self._session_id)
|
|
94
|
+
viewport_width = layout_metrics['layoutViewport']['clientWidth']
|
|
95
|
+
viewport_height = layout_metrics['layoutViewport']['clientHeight']
|
|
96
|
+
|
|
97
|
+
# Use provided coordinates or center of viewport
|
|
98
|
+
scroll_x = x if x > 0 else viewport_width / 2
|
|
99
|
+
scroll_y = y if y > 0 else viewport_height / 2
|
|
100
|
+
|
|
101
|
+
# Calculate scroll deltas (positive = down/right)
|
|
102
|
+
scroll_delta_x = delta_x or 0
|
|
103
|
+
scroll_delta_y = delta_y or 0
|
|
104
|
+
|
|
105
|
+
# Dispatch mouse wheel event
|
|
106
|
+
await self._client.send.Input.dispatchMouseEvent(
|
|
107
|
+
params={
|
|
108
|
+
'type': 'mouseWheel',
|
|
109
|
+
'x': scroll_x,
|
|
110
|
+
'y': scroll_y,
|
|
111
|
+
'deltaX': scroll_delta_x,
|
|
112
|
+
'deltaY': scroll_delta_y,
|
|
113
|
+
},
|
|
114
|
+
session_id=self._session_id,
|
|
115
|
+
)
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
except Exception:
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
# Method 2: Fallback to synthesizeScrollGesture
|
|
122
|
+
try:
|
|
123
|
+
params: 'SynthesizeScrollGestureParameters' = {'x': x, 'y': y, 'xDistance': delta_x or 0, 'yDistance': delta_y or 0}
|
|
124
|
+
await self._client.send.Input.synthesizeScrollGesture(
|
|
125
|
+
params,
|
|
126
|
+
session_id=self._session_id,
|
|
127
|
+
)
|
|
128
|
+
except Exception:
|
|
129
|
+
# Method 3: JavaScript fallback
|
|
130
|
+
scroll_js = f'window.scrollBy({delta_x or 0}, {delta_y or 0})'
|
|
131
|
+
await self._client.send.Runtime.evaluate(
|
|
132
|
+
params={'expression': scroll_js, 'returnByValue': True},
|
|
133
|
+
session_id=self._session_id,
|
|
134
|
+
)
|
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
"""Page class for page-level operations."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from browser_use.actor.utils import get_key_info
|
|
8
|
+
from browser_use.dom.serializer.serializer import DOMTreeSerializer
|
|
9
|
+
from browser_use.dom.service import DomService
|
|
10
|
+
from browser_use.llm.messages import SystemMessage, UserMessage
|
|
11
|
+
|
|
12
|
+
T = TypeVar('T', bound=BaseModel)
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from cdp_use.cdp.dom.commands import (
|
|
16
|
+
DescribeNodeParameters,
|
|
17
|
+
QuerySelectorAllParameters,
|
|
18
|
+
)
|
|
19
|
+
from cdp_use.cdp.emulation.commands import SetDeviceMetricsOverrideParameters
|
|
20
|
+
from cdp_use.cdp.input.commands import (
|
|
21
|
+
DispatchKeyEventParameters,
|
|
22
|
+
)
|
|
23
|
+
from cdp_use.cdp.page.commands import CaptureScreenshotParameters, NavigateParameters, NavigateToHistoryEntryParameters
|
|
24
|
+
from cdp_use.cdp.runtime.commands import EvaluateParameters
|
|
25
|
+
from cdp_use.cdp.target.commands import (
|
|
26
|
+
AttachToTargetParameters,
|
|
27
|
+
GetTargetInfoParameters,
|
|
28
|
+
)
|
|
29
|
+
from cdp_use.cdp.target.types import TargetInfo
|
|
30
|
+
|
|
31
|
+
from browser_use.browser.session import BrowserSession
|
|
32
|
+
from browser_use.llm.base import BaseChatModel
|
|
33
|
+
|
|
34
|
+
from .element import Element
|
|
35
|
+
from .mouse import Mouse
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Page:
|
|
39
|
+
"""Page operations (tab or iframe)."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self, browser_session: 'BrowserSession', target_id: str, session_id: str | None = None, llm: 'BaseChatModel | None' = None
|
|
43
|
+
):
|
|
44
|
+
self._browser_session = browser_session
|
|
45
|
+
self._client = browser_session.cdp_client
|
|
46
|
+
self._target_id = target_id
|
|
47
|
+
self._session_id: str | None = session_id
|
|
48
|
+
self._mouse: 'Mouse | None' = None
|
|
49
|
+
|
|
50
|
+
self._llm = llm
|
|
51
|
+
|
|
52
|
+
async def _ensure_session(self) -> str:
|
|
53
|
+
"""Ensure we have a session ID for this target."""
|
|
54
|
+
if not self._session_id:
|
|
55
|
+
params: 'AttachToTargetParameters' = {'targetId': self._target_id, 'flatten': True}
|
|
56
|
+
result = await self._client.send.Target.attachToTarget(params)
|
|
57
|
+
self._session_id = result['sessionId']
|
|
58
|
+
|
|
59
|
+
# Enable necessary domains
|
|
60
|
+
import asyncio
|
|
61
|
+
|
|
62
|
+
await asyncio.gather(
|
|
63
|
+
self._client.send.Page.enable(session_id=self._session_id),
|
|
64
|
+
self._client.send.DOM.enable(session_id=self._session_id),
|
|
65
|
+
self._client.send.Runtime.enable(session_id=self._session_id),
|
|
66
|
+
self._client.send.Network.enable(session_id=self._session_id),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return self._session_id
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
async def session_id(self) -> str:
|
|
73
|
+
"""Get the session ID for this target.
|
|
74
|
+
|
|
75
|
+
@dev Pass this to an arbitrary CDP call
|
|
76
|
+
"""
|
|
77
|
+
return await self._ensure_session()
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
async def mouse(self) -> 'Mouse':
|
|
81
|
+
"""Get the mouse interface for this target."""
|
|
82
|
+
if not self._mouse:
|
|
83
|
+
session_id = await self._ensure_session()
|
|
84
|
+
from .mouse import Mouse
|
|
85
|
+
|
|
86
|
+
self._mouse = Mouse(self._browser_session, session_id, self._target_id)
|
|
87
|
+
return self._mouse
|
|
88
|
+
|
|
89
|
+
async def reload(self) -> None:
|
|
90
|
+
"""Reload the target."""
|
|
91
|
+
session_id = await self._ensure_session()
|
|
92
|
+
await self._client.send.Page.reload(session_id=session_id)
|
|
93
|
+
|
|
94
|
+
async def get_element(self, backend_node_id: int) -> 'Element':
|
|
95
|
+
"""Get an element by its backend node ID."""
|
|
96
|
+
session_id = await self._ensure_session()
|
|
97
|
+
|
|
98
|
+
from .element import Element as Element_
|
|
99
|
+
|
|
100
|
+
return Element_(self._browser_session, backend_node_id, session_id)
|
|
101
|
+
|
|
102
|
+
async def evaluate(self, page_function: str, *args) -> str:
|
|
103
|
+
"""Execute JavaScript in the target.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
page_function: JavaScript code that MUST start with (...args) => format
|
|
107
|
+
*args: Arguments to pass to the function
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
String representation of the JavaScript execution result.
|
|
111
|
+
Objects and arrays are JSON-stringified.
|
|
112
|
+
"""
|
|
113
|
+
session_id = await self._ensure_session()
|
|
114
|
+
|
|
115
|
+
# Clean and fix common JavaScript string parsing issues
|
|
116
|
+
page_function = self._fix_javascript_string(page_function)
|
|
117
|
+
|
|
118
|
+
# Enforce arrow function format
|
|
119
|
+
if not (page_function.startswith('(') and '=>' in page_function):
|
|
120
|
+
raise ValueError(f'JavaScript code must start with (...args) => format. Got: {page_function[:50]}...')
|
|
121
|
+
|
|
122
|
+
# Build the expression - call the arrow function with provided args
|
|
123
|
+
if args:
|
|
124
|
+
# Convert args to JSON representation for safe passing
|
|
125
|
+
import json
|
|
126
|
+
|
|
127
|
+
arg_strs = [json.dumps(arg) for arg in args]
|
|
128
|
+
expression = f'({page_function})({", ".join(arg_strs)})'
|
|
129
|
+
else:
|
|
130
|
+
expression = f'({page_function})()'
|
|
131
|
+
|
|
132
|
+
# Debug: print the actual expression being evaluated
|
|
133
|
+
print(f'DEBUG: Evaluating JavaScript: {repr(expression)}')
|
|
134
|
+
|
|
135
|
+
params: 'EvaluateParameters' = {'expression': expression, 'returnByValue': True, 'awaitPromise': True}
|
|
136
|
+
result = await self._client.send.Runtime.evaluate(
|
|
137
|
+
params,
|
|
138
|
+
session_id=session_id,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
if 'exceptionDetails' in result:
|
|
142
|
+
raise RuntimeError(f'JavaScript evaluation failed: {result["exceptionDetails"]}')
|
|
143
|
+
|
|
144
|
+
value = result.get('result', {}).get('value')
|
|
145
|
+
|
|
146
|
+
# Always return string representation
|
|
147
|
+
if value is None:
|
|
148
|
+
return ''
|
|
149
|
+
elif isinstance(value, str):
|
|
150
|
+
return value
|
|
151
|
+
else:
|
|
152
|
+
# Convert objects, numbers, booleans to string
|
|
153
|
+
import json
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
return json.dumps(value) if isinstance(value, (dict, list)) else str(value)
|
|
157
|
+
except (TypeError, ValueError):
|
|
158
|
+
return str(value)
|
|
159
|
+
|
|
160
|
+
def _fix_javascript_string(self, js_code: str) -> str:
|
|
161
|
+
"""Fix common JavaScript string parsing issues when written as Python string."""
|
|
162
|
+
|
|
163
|
+
# Just do minimal, safe cleaning
|
|
164
|
+
js_code = js_code.strip()
|
|
165
|
+
|
|
166
|
+
# Only fix the most common and safe issues:
|
|
167
|
+
|
|
168
|
+
# 1. Remove obvious Python string wrapper quotes if they exist
|
|
169
|
+
if (js_code.startswith('"') and js_code.endswith('"')) or (js_code.startswith("'") and js_code.endswith("'")):
|
|
170
|
+
# Check if it's a wrapped string (not part of JS syntax)
|
|
171
|
+
inner = js_code[1:-1]
|
|
172
|
+
if inner.count('"') + inner.count("'") == 0 or '() =>' in inner:
|
|
173
|
+
js_code = inner
|
|
174
|
+
|
|
175
|
+
# 2. Only fix clearly escaped quotes that shouldn't be
|
|
176
|
+
# But be very conservative - only if we're sure it's a Python string artifact
|
|
177
|
+
if '\\"' in js_code and js_code.count('\\"') > js_code.count('"'):
|
|
178
|
+
js_code = js_code.replace('\\"', '"')
|
|
179
|
+
if "\\'" in js_code and js_code.count("\\'") > js_code.count("'"):
|
|
180
|
+
js_code = js_code.replace("\\'", "'")
|
|
181
|
+
|
|
182
|
+
# 3. Basic whitespace normalization only
|
|
183
|
+
js_code = js_code.strip()
|
|
184
|
+
|
|
185
|
+
# Final validation - ensure it's not empty
|
|
186
|
+
if not js_code:
|
|
187
|
+
raise ValueError('JavaScript code is empty after cleaning')
|
|
188
|
+
|
|
189
|
+
return js_code
|
|
190
|
+
|
|
191
|
+
async def screenshot(self, format: str = 'jpeg', quality: int | None = None) -> str:
|
|
192
|
+
"""Take a screenshot and return base64 encoded image.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
format: Image format ('jpeg', 'png', 'webp')
|
|
196
|
+
quality: Quality 0-100 for JPEG format
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Base64-encoded image data
|
|
200
|
+
"""
|
|
201
|
+
session_id = await self._ensure_session()
|
|
202
|
+
|
|
203
|
+
params: 'CaptureScreenshotParameters' = {'format': format}
|
|
204
|
+
|
|
205
|
+
if quality is not None and format.lower() == 'jpeg':
|
|
206
|
+
params['quality'] = quality
|
|
207
|
+
|
|
208
|
+
result = await self._client.send.Page.captureScreenshot(params, session_id=session_id)
|
|
209
|
+
|
|
210
|
+
return result['data']
|
|
211
|
+
|
|
212
|
+
async def press(self, key: str) -> None:
|
|
213
|
+
"""Press a key on the page (sends keyboard input to the focused element or page)."""
|
|
214
|
+
session_id = await self._ensure_session()
|
|
215
|
+
|
|
216
|
+
# Handle key combinations like "Control+A"
|
|
217
|
+
if '+' in key:
|
|
218
|
+
parts = key.split('+')
|
|
219
|
+
modifiers = parts[:-1]
|
|
220
|
+
main_key = parts[-1]
|
|
221
|
+
|
|
222
|
+
# Calculate modifier bitmask
|
|
223
|
+
modifier_value = 0
|
|
224
|
+
modifier_map = {'Alt': 1, 'Control': 2, 'Meta': 4, 'Shift': 8}
|
|
225
|
+
for mod in modifiers:
|
|
226
|
+
modifier_value |= modifier_map.get(mod, 0)
|
|
227
|
+
|
|
228
|
+
# Press modifier keys
|
|
229
|
+
for mod in modifiers:
|
|
230
|
+
code, vk_code = get_key_info(mod)
|
|
231
|
+
params: 'DispatchKeyEventParameters' = {'type': 'keyDown', 'key': mod, 'code': code}
|
|
232
|
+
if vk_code is not None:
|
|
233
|
+
params['windowsVirtualKeyCode'] = vk_code
|
|
234
|
+
await self._client.send.Input.dispatchKeyEvent(params, session_id=session_id)
|
|
235
|
+
|
|
236
|
+
# Press main key with modifiers bitmask
|
|
237
|
+
main_code, main_vk_code = get_key_info(main_key)
|
|
238
|
+
main_down_params: 'DispatchKeyEventParameters' = {
|
|
239
|
+
'type': 'keyDown',
|
|
240
|
+
'key': main_key,
|
|
241
|
+
'code': main_code,
|
|
242
|
+
'modifiers': modifier_value,
|
|
243
|
+
}
|
|
244
|
+
if main_vk_code is not None:
|
|
245
|
+
main_down_params['windowsVirtualKeyCode'] = main_vk_code
|
|
246
|
+
await self._client.send.Input.dispatchKeyEvent(main_down_params, session_id=session_id)
|
|
247
|
+
|
|
248
|
+
main_up_params: 'DispatchKeyEventParameters' = {
|
|
249
|
+
'type': 'keyUp',
|
|
250
|
+
'key': main_key,
|
|
251
|
+
'code': main_code,
|
|
252
|
+
'modifiers': modifier_value,
|
|
253
|
+
}
|
|
254
|
+
if main_vk_code is not None:
|
|
255
|
+
main_up_params['windowsVirtualKeyCode'] = main_vk_code
|
|
256
|
+
await self._client.send.Input.dispatchKeyEvent(main_up_params, session_id=session_id)
|
|
257
|
+
|
|
258
|
+
# Release modifier keys
|
|
259
|
+
for mod in reversed(modifiers):
|
|
260
|
+
code, vk_code = get_key_info(mod)
|
|
261
|
+
release_params: 'DispatchKeyEventParameters' = {'type': 'keyUp', 'key': mod, 'code': code}
|
|
262
|
+
if vk_code is not None:
|
|
263
|
+
release_params['windowsVirtualKeyCode'] = vk_code
|
|
264
|
+
await self._client.send.Input.dispatchKeyEvent(release_params, session_id=session_id)
|
|
265
|
+
else:
|
|
266
|
+
# Simple key press
|
|
267
|
+
code, vk_code = get_key_info(key)
|
|
268
|
+
key_down_params: 'DispatchKeyEventParameters' = {'type': 'keyDown', 'key': key, 'code': code}
|
|
269
|
+
if vk_code is not None:
|
|
270
|
+
key_down_params['windowsVirtualKeyCode'] = vk_code
|
|
271
|
+
await self._client.send.Input.dispatchKeyEvent(key_down_params, session_id=session_id)
|
|
272
|
+
|
|
273
|
+
key_up_params: 'DispatchKeyEventParameters' = {'type': 'keyUp', 'key': key, 'code': code}
|
|
274
|
+
if vk_code is not None:
|
|
275
|
+
key_up_params['windowsVirtualKeyCode'] = vk_code
|
|
276
|
+
await self._client.send.Input.dispatchKeyEvent(key_up_params, session_id=session_id)
|
|
277
|
+
|
|
278
|
+
async def set_viewport_size(self, width: int, height: int) -> None:
|
|
279
|
+
"""Set the viewport size."""
|
|
280
|
+
session_id = await self._ensure_session()
|
|
281
|
+
|
|
282
|
+
params: 'SetDeviceMetricsOverrideParameters' = {
|
|
283
|
+
'width': width,
|
|
284
|
+
'height': height,
|
|
285
|
+
'deviceScaleFactor': 1.0,
|
|
286
|
+
'mobile': False,
|
|
287
|
+
}
|
|
288
|
+
await self._client.send.Emulation.setDeviceMetricsOverride(
|
|
289
|
+
params,
|
|
290
|
+
session_id=session_id,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Target properties (from CDP getTargetInfo)
|
|
294
|
+
async def get_target_info(self) -> 'TargetInfo':
|
|
295
|
+
"""Get target information."""
|
|
296
|
+
params: 'GetTargetInfoParameters' = {'targetId': self._target_id}
|
|
297
|
+
result = await self._client.send.Target.getTargetInfo(params)
|
|
298
|
+
return result['targetInfo']
|
|
299
|
+
|
|
300
|
+
async def get_url(self) -> str:
|
|
301
|
+
"""Get the current URL."""
|
|
302
|
+
info = await self.get_target_info()
|
|
303
|
+
return info.get('url', '')
|
|
304
|
+
|
|
305
|
+
async def get_title(self) -> str:
|
|
306
|
+
"""Get the current title."""
|
|
307
|
+
info = await self.get_target_info()
|
|
308
|
+
return info.get('title', '')
|
|
309
|
+
|
|
310
|
+
async def goto(self, url: str) -> None:
|
|
311
|
+
"""Navigate this target to a URL."""
|
|
312
|
+
session_id = await self._ensure_session()
|
|
313
|
+
|
|
314
|
+
params: 'NavigateParameters' = {'url': url}
|
|
315
|
+
await self._client.send.Page.navigate(params, session_id=session_id)
|
|
316
|
+
|
|
317
|
+
async def navigate(self, url: str) -> None:
|
|
318
|
+
"""Alias for goto."""
|
|
319
|
+
await self.goto(url)
|
|
320
|
+
|
|
321
|
+
async def go_back(self) -> None:
|
|
322
|
+
"""Navigate back in history."""
|
|
323
|
+
session_id = await self._ensure_session()
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
# Get navigation history
|
|
327
|
+
history = await self._client.send.Page.getNavigationHistory(session_id=session_id)
|
|
328
|
+
current_index = history['currentIndex']
|
|
329
|
+
entries = history['entries']
|
|
330
|
+
|
|
331
|
+
# Check if we can go back
|
|
332
|
+
if current_index <= 0:
|
|
333
|
+
raise RuntimeError('Cannot go back - no previous entry in history')
|
|
334
|
+
|
|
335
|
+
# Navigate to the previous entry
|
|
336
|
+
previous_entry_id = entries[current_index - 1]['id']
|
|
337
|
+
params: 'NavigateToHistoryEntryParameters' = {'entryId': previous_entry_id}
|
|
338
|
+
await self._client.send.Page.navigateToHistoryEntry(params, session_id=session_id)
|
|
339
|
+
|
|
340
|
+
except Exception as e:
|
|
341
|
+
raise RuntimeError(f'Failed to navigate back: {e}')
|
|
342
|
+
|
|
343
|
+
async def go_forward(self) -> None:
|
|
344
|
+
"""Navigate forward in history."""
|
|
345
|
+
session_id = await self._ensure_session()
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
# Get navigation history
|
|
349
|
+
history = await self._client.send.Page.getNavigationHistory(session_id=session_id)
|
|
350
|
+
current_index = history['currentIndex']
|
|
351
|
+
entries = history['entries']
|
|
352
|
+
|
|
353
|
+
# Check if we can go forward
|
|
354
|
+
if current_index >= len(entries) - 1:
|
|
355
|
+
raise RuntimeError('Cannot go forward - no next entry in history')
|
|
356
|
+
|
|
357
|
+
# Navigate to the next entry
|
|
358
|
+
next_entry_id = entries[current_index + 1]['id']
|
|
359
|
+
params: 'NavigateToHistoryEntryParameters' = {'entryId': next_entry_id}
|
|
360
|
+
await self._client.send.Page.navigateToHistoryEntry(params, session_id=session_id)
|
|
361
|
+
|
|
362
|
+
except Exception as e:
|
|
363
|
+
raise RuntimeError(f'Failed to navigate forward: {e}')
|
|
364
|
+
|
|
365
|
+
# Element finding methods (these would need to be implemented based on DOM queries)
|
|
366
|
+
async def get_elements_by_css_selector(self, selector: str) -> list['Element']:
|
|
367
|
+
"""Get elements by CSS selector."""
|
|
368
|
+
session_id = await self._ensure_session()
|
|
369
|
+
|
|
370
|
+
# Get document first
|
|
371
|
+
doc_result = await self._client.send.DOM.getDocument(session_id=session_id)
|
|
372
|
+
document_node_id = doc_result['root']['nodeId']
|
|
373
|
+
|
|
374
|
+
# Query selector all
|
|
375
|
+
query_params: 'QuerySelectorAllParameters' = {'nodeId': document_node_id, 'selector': selector}
|
|
376
|
+
result = await self._client.send.DOM.querySelectorAll(query_params, session_id=session_id)
|
|
377
|
+
|
|
378
|
+
elements = []
|
|
379
|
+
from .element import Element as Element_
|
|
380
|
+
|
|
381
|
+
# Convert node IDs to backend node IDs
|
|
382
|
+
for node_id in result['nodeIds']:
|
|
383
|
+
# Get backend node ID
|
|
384
|
+
describe_params: 'DescribeNodeParameters' = {'nodeId': node_id}
|
|
385
|
+
node_result = await self._client.send.DOM.describeNode(describe_params, session_id=session_id)
|
|
386
|
+
backend_node_id = node_result['node']['backendNodeId']
|
|
387
|
+
elements.append(Element_(self._browser_session, backend_node_id, session_id))
|
|
388
|
+
|
|
389
|
+
return elements
|
|
390
|
+
|
|
391
|
+
# AI METHODS
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def dom_service(self) -> 'DomService':
|
|
395
|
+
"""Get the DOM service for this target."""
|
|
396
|
+
return DomService(self._browser_session)
|
|
397
|
+
|
|
398
|
+
async def get_element_by_prompt(self, prompt: str, llm: 'BaseChatModel | None' = None) -> 'Element | None':
|
|
399
|
+
"""Get an element by a prompt."""
|
|
400
|
+
await self._ensure_session()
|
|
401
|
+
llm = llm or self._llm
|
|
402
|
+
|
|
403
|
+
if not llm:
|
|
404
|
+
raise ValueError('LLM not provided')
|
|
405
|
+
|
|
406
|
+
dom_service = self.dom_service
|
|
407
|
+
|
|
408
|
+
enhanced_dom_tree = await dom_service.get_dom_tree(target_id=self._target_id)
|
|
409
|
+
|
|
410
|
+
serialized_dom_state, _ = DOMTreeSerializer(
|
|
411
|
+
enhanced_dom_tree, None, paint_order_filtering=True
|
|
412
|
+
).serialize_accessible_elements()
|
|
413
|
+
|
|
414
|
+
llm_representation = serialized_dom_state.llm_representation()
|
|
415
|
+
|
|
416
|
+
system_message = SystemMessage(
|
|
417
|
+
content="""You are an AI created to find an element on a page by a prompt.
|
|
418
|
+
|
|
419
|
+
<browser_state>
|
|
420
|
+
Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
|
|
421
|
+
- index: Numeric identifier for interaction
|
|
422
|
+
- type: HTML element type (button, input, etc.)
|
|
423
|
+
- text: Element description
|
|
424
|
+
|
|
425
|
+
Examples:
|
|
426
|
+
[33]<div>User form</div>
|
|
427
|
+
[35]<button aria-label='Submit form'>Submit</button>
|
|
428
|
+
|
|
429
|
+
Note that:
|
|
430
|
+
- Only elements with numeric indexes in [] are interactive
|
|
431
|
+
- (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
|
|
432
|
+
- Pure text elements without [] are not interactive.
|
|
433
|
+
</browser_state>
|
|
434
|
+
|
|
435
|
+
Your task is to find an element index (if any) that matches the prompt (written in <prompt> tag).
|
|
436
|
+
|
|
437
|
+
If non of the elements matches the, return None.
|
|
438
|
+
|
|
439
|
+
Before you return the element index, reason about the state and elements for a sentence or two."""
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
state_message = UserMessage(
|
|
443
|
+
content=f"""
|
|
444
|
+
<browser_state>
|
|
445
|
+
{llm_representation}
|
|
446
|
+
</browser_state>
|
|
447
|
+
|
|
448
|
+
<prompt>
|
|
449
|
+
{prompt}
|
|
450
|
+
</prompt>
|
|
451
|
+
"""
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
class ElementResponse(BaseModel):
|
|
455
|
+
# thinking: str
|
|
456
|
+
element_highlight_index: int | None
|
|
457
|
+
|
|
458
|
+
llm_response = await llm.ainvoke(
|
|
459
|
+
[
|
|
460
|
+
system_message,
|
|
461
|
+
state_message,
|
|
462
|
+
],
|
|
463
|
+
output_format=ElementResponse,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
element_highlight_index = llm_response.completion.element_highlight_index
|
|
467
|
+
|
|
468
|
+
if element_highlight_index is None or element_highlight_index not in serialized_dom_state.selector_map:
|
|
469
|
+
return None
|
|
470
|
+
|
|
471
|
+
element = serialized_dom_state.selector_map[element_highlight_index]
|
|
472
|
+
|
|
473
|
+
from .element import Element as Element_
|
|
474
|
+
|
|
475
|
+
return Element_(self._browser_session, element.backend_node_id, self._session_id)
|
|
476
|
+
|
|
477
|
+
async def must_get_element_by_prompt(self, prompt: str, llm: 'BaseChatModel | None' = None) -> 'Element':
|
|
478
|
+
"""Get an element by a prompt.
|
|
479
|
+
|
|
480
|
+
@dev LLM can still return None, this just raises an error if the element is not found.
|
|
481
|
+
"""
|
|
482
|
+
element = await self.get_element_by_prompt(prompt, llm)
|
|
483
|
+
if element is None:
|
|
484
|
+
raise ValueError(f'No element found for prompt: {prompt}')
|
|
485
|
+
|
|
486
|
+
return element
|
|
487
|
+
|
|
488
|
+
async def extract_content(self, prompt: str, structured_output: type[T], llm: 'BaseChatModel | None' = None) -> T:
|
|
489
|
+
"""Extract structured content from the current page using LLM.
|
|
490
|
+
|
|
491
|
+
Extracts clean markdown from the page and sends it to LLM for structured data extraction.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
prompt: Description of what content to extract
|
|
495
|
+
structured_output: Pydantic BaseModel class defining the expected output structure
|
|
496
|
+
llm: Language model to use for extraction
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
The structured BaseModel instance with extracted content
|
|
500
|
+
"""
|
|
501
|
+
llm = llm or self._llm
|
|
502
|
+
|
|
503
|
+
if not llm:
|
|
504
|
+
raise ValueError('LLM not provided')
|
|
505
|
+
|
|
506
|
+
# Extract clean markdown using the same method as in tools/service.py
|
|
507
|
+
try:
|
|
508
|
+
content, content_stats = await self._extract_clean_markdown()
|
|
509
|
+
except Exception as e:
|
|
510
|
+
raise RuntimeError(f'Could not extract clean markdown: {type(e).__name__}')
|
|
511
|
+
|
|
512
|
+
# System prompt for structured extraction
|
|
513
|
+
system_prompt = """
|
|
514
|
+
You are an expert at extracting structured data from the markdown of a webpage.
|
|
515
|
+
|
|
516
|
+
<input>
|
|
517
|
+
You will be given a query and the markdown of a webpage that has been filtered to remove noise and advertising content.
|
|
518
|
+
</input>
|
|
519
|
+
|
|
520
|
+
<instructions>
|
|
521
|
+
- You are tasked to extract information from the webpage that is relevant to the query.
|
|
522
|
+
- You should ONLY use the information available in the webpage to answer the query. Do not make up information or provide guess from your own knowledge.
|
|
523
|
+
- If the information relevant to the query is not available in the page, your response should mention that.
|
|
524
|
+
- If the query asks for all items, products, etc., make sure to directly list all of them.
|
|
525
|
+
- Return the extracted content in the exact structured format specified.
|
|
526
|
+
</instructions>
|
|
527
|
+
|
|
528
|
+
<output>
|
|
529
|
+
- Your output should present ALL the information relevant to the query in the specified structured format.
|
|
530
|
+
- Do not answer in conversational format - directly output the relevant information in the structured format.
|
|
531
|
+
</output>
|
|
532
|
+
""".strip()
|
|
533
|
+
|
|
534
|
+
# Build prompt with just query and content
|
|
535
|
+
prompt_content = f'<query>\n{prompt}\n</query>\n\n<webpage_content>\n{content}\n</webpage_content>'
|
|
536
|
+
|
|
537
|
+
# Send to LLM with structured output
|
|
538
|
+
import asyncio
|
|
539
|
+
|
|
540
|
+
try:
|
|
541
|
+
response = await asyncio.wait_for(
|
|
542
|
+
llm.ainvoke(
|
|
543
|
+
[SystemMessage(content=system_prompt), UserMessage(content=prompt_content)], output_format=structured_output
|
|
544
|
+
),
|
|
545
|
+
timeout=120.0,
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
# Return the structured output BaseModel instance
|
|
549
|
+
return response.completion
|
|
550
|
+
except Exception as e:
|
|
551
|
+
raise RuntimeError(str(e))
|
|
552
|
+
|
|
553
|
+
async def _extract_clean_markdown(self, extract_links: bool = False) -> tuple[str, dict]:
|
|
554
|
+
"""Extract clean markdown from the current page using enhanced DOM tree.
|
|
555
|
+
|
|
556
|
+
Uses the shared markdown extractor for consistency with tools/service.py.
|
|
557
|
+
"""
|
|
558
|
+
from browser_use.dom.markdown_extractor import extract_clean_markdown
|
|
559
|
+
|
|
560
|
+
dom_service = self.dom_service
|
|
561
|
+
return await extract_clean_markdown(dom_service=dom_service, target_id=self._target_id, extract_links=extract_links)
|