optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Recording Watchdog for Browser Use Sessions."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import ClassVar
|
|
6
|
+
|
|
7
|
+
from bubus import BaseEvent
|
|
8
|
+
from cdp_use.cdp.page.events import ScreencastFrameEvent
|
|
9
|
+
from uuid_extensions import uuid7str
|
|
10
|
+
|
|
11
|
+
from browser_use.browser.events import BrowserConnectedEvent, BrowserStopEvent
|
|
12
|
+
from browser_use.browser.profile import ViewportSize
|
|
13
|
+
from browser_use.browser.video_recorder import VideoRecorderService
|
|
14
|
+
from browser_use.browser.watchdog_base import BaseWatchdog
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RecordingWatchdog(BaseWatchdog):
|
|
18
|
+
"""
|
|
19
|
+
Manages video recording of a browser session using CDP screencasting.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [BrowserConnectedEvent, BrowserStopEvent]
|
|
23
|
+
EMITS: ClassVar[list[type[BaseEvent]]] = []
|
|
24
|
+
|
|
25
|
+
_recorder: VideoRecorderService | None = None
|
|
26
|
+
|
|
27
|
+
async def on_BrowserConnectedEvent(self, event: BrowserConnectedEvent) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Starts video recording if it is configured in the browser profile.
|
|
30
|
+
"""
|
|
31
|
+
profile = self.browser_session.browser_profile
|
|
32
|
+
if not profile.record_video_dir:
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
# Dynamically determine video size
|
|
36
|
+
size = profile.record_video_size
|
|
37
|
+
if not size:
|
|
38
|
+
self.logger.debug('record_video_size not specified, detecting viewport size...')
|
|
39
|
+
size = await self._get_current_viewport_size()
|
|
40
|
+
|
|
41
|
+
if not size:
|
|
42
|
+
self.logger.warning('Cannot start video recording: viewport size could not be determined.')
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
video_format = getattr(profile, 'record_video_format', 'mp4').strip('.')
|
|
46
|
+
output_path = Path(profile.record_video_dir) / f'{uuid7str()}.{video_format}'
|
|
47
|
+
|
|
48
|
+
self.logger.debug(f'Initializing video recorder for format: {video_format}')
|
|
49
|
+
self._recorder = VideoRecorderService(output_path=output_path, size=size, framerate=profile.record_video_framerate)
|
|
50
|
+
self._recorder.start()
|
|
51
|
+
|
|
52
|
+
if not self._recorder._is_active:
|
|
53
|
+
self._recorder = None
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
self.browser_session.cdp_client.register.Page.screencastFrame(self.on_screencastFrame)
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session()
|
|
60
|
+
await cdp_session.cdp_client.send.Page.startScreencast(
|
|
61
|
+
params={
|
|
62
|
+
'format': 'png',
|
|
63
|
+
'quality': 90,
|
|
64
|
+
'maxWidth': size['width'],
|
|
65
|
+
'maxHeight': size['height'],
|
|
66
|
+
'everyNthFrame': 1,
|
|
67
|
+
},
|
|
68
|
+
session_id=cdp_session.session_id,
|
|
69
|
+
)
|
|
70
|
+
self.logger.info(f'📹 Started video recording to {output_path}')
|
|
71
|
+
except Exception as e:
|
|
72
|
+
self.logger.error(f'Failed to start screencast via CDP: {e}')
|
|
73
|
+
if self._recorder:
|
|
74
|
+
self._recorder.stop_and_save()
|
|
75
|
+
self._recorder = None
|
|
76
|
+
|
|
77
|
+
async def _get_current_viewport_size(self) -> ViewportSize | None:
|
|
78
|
+
"""Gets the current viewport size directly from the browser via CDP."""
|
|
79
|
+
try:
|
|
80
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session()
|
|
81
|
+
metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
|
|
82
|
+
|
|
83
|
+
# Use cssVisualViewport for the most accurate representation of the visible area
|
|
84
|
+
viewport = metrics.get('cssVisualViewport', {})
|
|
85
|
+
width = viewport.get('clientWidth')
|
|
86
|
+
height = viewport.get('clientHeight')
|
|
87
|
+
|
|
88
|
+
if width and height:
|
|
89
|
+
self.logger.debug(f'Detected viewport size: {width}x{height}')
|
|
90
|
+
return ViewportSize(width=int(width), height=int(height))
|
|
91
|
+
except Exception as e:
|
|
92
|
+
self.logger.warning(f'Failed to get viewport size from browser: {e}')
|
|
93
|
+
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
def on_screencastFrame(self, event: ScreencastFrameEvent, session_id: str | None) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Synchronous handler for incoming screencast frames.
|
|
99
|
+
"""
|
|
100
|
+
if not self._recorder:
|
|
101
|
+
return
|
|
102
|
+
self._recorder.add_frame(event['data'])
|
|
103
|
+
asyncio.create_task(self._ack_screencast_frame(event, session_id))
|
|
104
|
+
|
|
105
|
+
async def _ack_screencast_frame(self, event: ScreencastFrameEvent, session_id: str | None) -> None:
|
|
106
|
+
"""
|
|
107
|
+
Asynchronously acknowledges a screencast frame.
|
|
108
|
+
"""
|
|
109
|
+
try:
|
|
110
|
+
await self.browser_session.cdp_client.send.Page.screencastFrameAck(
|
|
111
|
+
params={'sessionId': event['sessionId']}, session_id=session_id
|
|
112
|
+
)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
self.logger.debug(f'Failed to acknowledge screencast frame: {e}')
|
|
115
|
+
|
|
116
|
+
async def on_BrowserStopEvent(self, event: BrowserStopEvent) -> None:
|
|
117
|
+
"""
|
|
118
|
+
Stops the video recording and finalizes the video file.
|
|
119
|
+
"""
|
|
120
|
+
if self._recorder:
|
|
121
|
+
recorder = self._recorder
|
|
122
|
+
self._recorder = None
|
|
123
|
+
|
|
124
|
+
self.logger.debug('Stopping video recording and saving file...')
|
|
125
|
+
loop = asyncio.get_event_loop()
|
|
126
|
+
await loop.run_in_executor(None, recorder.stop_and_save)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Screenshot watchdog for handling screenshot requests using CDP."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
4
|
+
|
|
5
|
+
from bubus import BaseEvent
|
|
6
|
+
from cdp_use.cdp.page import CaptureScreenshotParameters
|
|
7
|
+
|
|
8
|
+
from browser_use.browser.events import ScreenshotEvent
|
|
9
|
+
from browser_use.browser.views import BrowserError
|
|
10
|
+
from browser_use.browser.watchdog_base import BaseWatchdog
|
|
11
|
+
from browser_use.observability import observe_debug
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ScreenshotWatchdog(BaseWatchdog):
|
|
18
|
+
"""Handles screenshot requests using CDP."""
|
|
19
|
+
|
|
20
|
+
# Events this watchdog listens to
|
|
21
|
+
LISTENS_TO: ClassVar[list[type[BaseEvent[Any]]]] = [ScreenshotEvent]
|
|
22
|
+
|
|
23
|
+
# Events this watchdog emits
|
|
24
|
+
EMITS: ClassVar[list[type[BaseEvent[Any]]]] = []
|
|
25
|
+
|
|
26
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='screenshot_event_handler')
|
|
27
|
+
async def on_ScreenshotEvent(self, event: ScreenshotEvent) -> str:
|
|
28
|
+
"""Handle screenshot request using CDP.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
event: ScreenshotEvent with optional full_page and clip parameters
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dict with 'screenshot' key containing base64-encoded screenshot or None
|
|
35
|
+
"""
|
|
36
|
+
self.logger.debug('[ScreenshotWatchdog] Handler START - on_ScreenshotEvent called')
|
|
37
|
+
try:
|
|
38
|
+
# Get CDP client and session for current target
|
|
39
|
+
cdp_session = await self.browser_session.get_or_create_cdp_session()
|
|
40
|
+
|
|
41
|
+
# Prepare screenshot parameters
|
|
42
|
+
params = CaptureScreenshotParameters(format='jpeg', quality=60, captureBeyondViewport=False)
|
|
43
|
+
|
|
44
|
+
# Take screenshot using CDP
|
|
45
|
+
self.logger.debug(f'[ScreenshotWatchdog] Taking screenshot with params: {params}')
|
|
46
|
+
result = await cdp_session.cdp_client.send.Page.captureScreenshot(params=params, session_id=cdp_session.session_id)
|
|
47
|
+
|
|
48
|
+
# Return base64-encoded screenshot data
|
|
49
|
+
if result and 'data' in result:
|
|
50
|
+
self.logger.debug('[ScreenshotWatchdog] Screenshot captured successfully')
|
|
51
|
+
return result['data']
|
|
52
|
+
|
|
53
|
+
raise BrowserError('[ScreenshotWatchdog] Screenshot result missing data')
|
|
54
|
+
except Exception as e:
|
|
55
|
+
self.logger.error(f'[ScreenshotWatchdog] Screenshot failed: {e}')
|
|
56
|
+
raise
|
|
57
|
+
finally:
|
|
58
|
+
# Try to remove highlights even on failure
|
|
59
|
+
try:
|
|
60
|
+
await self.browser_session.remove_highlights()
|
|
61
|
+
except Exception:
|
|
62
|
+
pass
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""Security watchdog for enforcing URL access policies."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
4
|
+
|
|
5
|
+
from bubus import BaseEvent
|
|
6
|
+
|
|
7
|
+
from browser_use.browser.events import (
|
|
8
|
+
BrowserErrorEvent,
|
|
9
|
+
NavigateToUrlEvent,
|
|
10
|
+
NavigationCompleteEvent,
|
|
11
|
+
TabCreatedEvent,
|
|
12
|
+
)
|
|
13
|
+
from browser_use.browser.watchdog_base import BaseWatchdog
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
# Track if we've shown the glob warning
|
|
19
|
+
_GLOB_WARNING_SHOWN = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SecurityWatchdog(BaseWatchdog):
|
|
23
|
+
"""Monitors and enforces security policies for URL access."""
|
|
24
|
+
|
|
25
|
+
# Event contracts
|
|
26
|
+
LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [
|
|
27
|
+
NavigateToUrlEvent,
|
|
28
|
+
NavigationCompleteEvent,
|
|
29
|
+
TabCreatedEvent,
|
|
30
|
+
]
|
|
31
|
+
EMITS: ClassVar[list[type[BaseEvent]]] = [
|
|
32
|
+
BrowserErrorEvent,
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
async def on_NavigateToUrlEvent(self, event: NavigateToUrlEvent) -> None:
|
|
36
|
+
"""Check if navigation URL is allowed before navigation starts."""
|
|
37
|
+
# Security check BEFORE navigation
|
|
38
|
+
if not self._is_url_allowed(event.url):
|
|
39
|
+
self.logger.warning(f'⛔️ Blocking navigation to disallowed URL: {event.url}')
|
|
40
|
+
self.event_bus.dispatch(
|
|
41
|
+
BrowserErrorEvent(
|
|
42
|
+
error_type='NavigationBlocked',
|
|
43
|
+
message=f'Navigation blocked to disallowed URL: {event.url}',
|
|
44
|
+
details={'url': event.url, 'reason': 'not_in_allowed_domains'},
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
# Stop event propagation by raising exception
|
|
48
|
+
raise ValueError(f'Navigation to {event.url} blocked by security policy')
|
|
49
|
+
|
|
50
|
+
async def on_NavigationCompleteEvent(self, event: NavigationCompleteEvent) -> None:
|
|
51
|
+
"""Check if navigated URL is allowed (catches redirects to blocked domains)."""
|
|
52
|
+
# Check if the navigated URL is allowed (in case of redirects)
|
|
53
|
+
if not self._is_url_allowed(event.url):
|
|
54
|
+
self.logger.warning(f'⛔️ Navigation to non-allowed URL detected: {event.url}')
|
|
55
|
+
|
|
56
|
+
# Dispatch browser error
|
|
57
|
+
self.event_bus.dispatch(
|
|
58
|
+
BrowserErrorEvent(
|
|
59
|
+
error_type='NavigationBlocked',
|
|
60
|
+
message=f'Navigation blocked to non-allowed URL: {event.url} - redirecting to about:blank',
|
|
61
|
+
details={'url': event.url, 'target_id': event.target_id},
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
# Navigate to about:blank to keep session alive
|
|
65
|
+
# Agent will see the error and can continue with other tasks
|
|
66
|
+
try:
|
|
67
|
+
session = await self.browser_session.get_or_create_cdp_session(target_id=event.target_id)
|
|
68
|
+
await session.cdp_client.send.Page.navigate(params={'url': 'about:blank'}, session_id=session.session_id)
|
|
69
|
+
self.logger.info(f'⛔️ Navigated to about:blank after blocked URL: {event.url}')
|
|
70
|
+
except Exception as e:
|
|
71
|
+
pass
|
|
72
|
+
self.logger.error(f'⛔️ Failed to navigate to about:blank: {type(e).__name__} {e}')
|
|
73
|
+
|
|
74
|
+
async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
|
|
75
|
+
"""Check if new tab URL is allowed."""
|
|
76
|
+
if not self._is_url_allowed(event.url):
|
|
77
|
+
self.logger.warning(f'⛔️ New tab created with disallowed URL: {event.url}')
|
|
78
|
+
|
|
79
|
+
# Dispatch error and try to close the tab
|
|
80
|
+
self.event_bus.dispatch(
|
|
81
|
+
BrowserErrorEvent(
|
|
82
|
+
error_type='TabCreationBlocked',
|
|
83
|
+
message=f'Tab created with non-allowed URL: {event.url}',
|
|
84
|
+
details={'url': event.url, 'target_id': event.target_id},
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Try to close the offending tab
|
|
89
|
+
try:
|
|
90
|
+
await self.browser_session._cdp_close_page(event.target_id)
|
|
91
|
+
self.logger.info(f'⛔️ Closed new tab with non-allowed URL: {event.url}')
|
|
92
|
+
except Exception as e:
|
|
93
|
+
self.logger.error(f'⛔️ Failed to close new tab with non-allowed URL: {type(e).__name__} {e}')
|
|
94
|
+
|
|
95
|
+
def _is_root_domain(self, domain: str) -> bool:
|
|
96
|
+
"""Check if a domain is a root domain (no subdomain present).
|
|
97
|
+
|
|
98
|
+
Simple heuristic: only add www for domains with exactly 1 dot (domain.tld).
|
|
99
|
+
For complex cases like country TLDs or subdomains, users should configure explicitly.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
domain: The domain to check
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
True if it's a simple root domain, False otherwise
|
|
106
|
+
"""
|
|
107
|
+
# Skip if it contains wildcards or protocol
|
|
108
|
+
if '*' in domain or '://' in domain:
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
return domain.count('.') == 1
|
|
112
|
+
|
|
113
|
+
def _log_glob_warning(self) -> None:
|
|
114
|
+
"""Log a warning about glob patterns in allowed_domains."""
|
|
115
|
+
global _GLOB_WARNING_SHOWN
|
|
116
|
+
if not _GLOB_WARNING_SHOWN:
|
|
117
|
+
_GLOB_WARNING_SHOWN = True
|
|
118
|
+
self.logger.warning(
|
|
119
|
+
'⚠️ Using glob patterns in allowed_domains. '
|
|
120
|
+
'Note: Patterns like "*.example.com" will match both subdomains AND the main domain.'
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def _get_domain_variants(self, host: str) -> tuple[str, str]:
|
|
124
|
+
"""Get both variants of a domain (with and without www prefix).
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
host: The hostname to process
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Tuple of (original_host, variant_host)
|
|
131
|
+
- If host starts with www., variant is without www.
|
|
132
|
+
- Otherwise, variant is with www. prefix
|
|
133
|
+
"""
|
|
134
|
+
if host.startswith('www.'):
|
|
135
|
+
return (host, host[4:]) # ('www.example.com', 'example.com')
|
|
136
|
+
else:
|
|
137
|
+
return (host, f'www.{host}') # ('example.com', 'www.example.com')
|
|
138
|
+
|
|
139
|
+
def _is_ip_address(self, host: str) -> bool:
|
|
140
|
+
"""Check if a hostname is an IP address (IPv4 or IPv6).
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
host: The hostname to check
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
True if the host is an IP address, False otherwise
|
|
147
|
+
"""
|
|
148
|
+
import ipaddress
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Try to parse as IP address (handles both IPv4 and IPv6)
|
|
152
|
+
ipaddress.ip_address(host)
|
|
153
|
+
return True
|
|
154
|
+
except ValueError:
|
|
155
|
+
return False
|
|
156
|
+
except Exception:
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
def _is_url_allowed(self, url: str) -> bool:
|
|
160
|
+
"""Check if a URL is allowed based on the allowed_domains configuration.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
url: The URL to check
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
True if the URL is allowed, False otherwise
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
# Always allow internal browser targets (before any other checks)
|
|
170
|
+
if url in ['about:blank', 'chrome://new-tab-page/', 'chrome://new-tab-page', 'chrome://newtab/']:
|
|
171
|
+
return True
|
|
172
|
+
|
|
173
|
+
# Parse the URL to extract components
|
|
174
|
+
from urllib.parse import urlparse
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
parsed = urlparse(url)
|
|
178
|
+
except Exception:
|
|
179
|
+
# Invalid URL
|
|
180
|
+
return False
|
|
181
|
+
|
|
182
|
+
# Allow data: and blob: URLs (they don't have hostnames)
|
|
183
|
+
if parsed.scheme in ['data', 'blob']:
|
|
184
|
+
return True
|
|
185
|
+
|
|
186
|
+
# Get the actual host (domain)
|
|
187
|
+
host = parsed.hostname
|
|
188
|
+
if not host:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
# Check if IP addresses should be blocked (before domain checks)
|
|
192
|
+
if self.browser_session.browser_profile.block_ip_addresses:
|
|
193
|
+
if self._is_ip_address(host):
|
|
194
|
+
return False
|
|
195
|
+
|
|
196
|
+
# If no allowed_domains specified, allow all URLs
|
|
197
|
+
if (
|
|
198
|
+
not self.browser_session.browser_profile.allowed_domains
|
|
199
|
+
and not self.browser_session.browser_profile.prohibited_domains
|
|
200
|
+
):
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
# Check allowed domains (fast path for sets, slow path for lists with patterns)
|
|
204
|
+
if self.browser_session.browser_profile.allowed_domains:
|
|
205
|
+
allowed_domains = self.browser_session.browser_profile.allowed_domains
|
|
206
|
+
|
|
207
|
+
if isinstance(allowed_domains, set):
|
|
208
|
+
# Fast path: O(1) exact hostname match - check both www and non-www variants
|
|
209
|
+
host_variant, host_alt = self._get_domain_variants(host)
|
|
210
|
+
return host_variant in allowed_domains or host_alt in allowed_domains
|
|
211
|
+
else:
|
|
212
|
+
# Slow path: O(n) pattern matching for lists
|
|
213
|
+
for pattern in allowed_domains:
|
|
214
|
+
if self._is_url_match(url, host, parsed.scheme, pattern):
|
|
215
|
+
return True
|
|
216
|
+
return False
|
|
217
|
+
|
|
218
|
+
# Check prohibited domains (fast path for sets, slow path for lists with patterns)
|
|
219
|
+
if self.browser_session.browser_profile.prohibited_domains:
|
|
220
|
+
prohibited_domains = self.browser_session.browser_profile.prohibited_domains
|
|
221
|
+
|
|
222
|
+
if isinstance(prohibited_domains, set):
|
|
223
|
+
# Fast path: O(1) exact hostname match - check both www and non-www variants
|
|
224
|
+
host_variant, host_alt = self._get_domain_variants(host)
|
|
225
|
+
return host_variant not in prohibited_domains and host_alt not in prohibited_domains
|
|
226
|
+
else:
|
|
227
|
+
# Slow path: O(n) pattern matching for lists
|
|
228
|
+
for pattern in prohibited_domains:
|
|
229
|
+
if self._is_url_match(url, host, parsed.scheme, pattern):
|
|
230
|
+
return False
|
|
231
|
+
return True
|
|
232
|
+
|
|
233
|
+
return True
|
|
234
|
+
|
|
235
|
+
def _is_url_match(self, url: str, host: str, scheme: str, pattern: str) -> bool:
|
|
236
|
+
"""Check if a URL matches a pattern."""
|
|
237
|
+
|
|
238
|
+
# Full URL for matching (scheme + host)
|
|
239
|
+
full_url_pattern = f'{scheme}://{host}'
|
|
240
|
+
|
|
241
|
+
# Handle glob patterns
|
|
242
|
+
if '*' in pattern:
|
|
243
|
+
self._log_glob_warning()
|
|
244
|
+
import fnmatch
|
|
245
|
+
|
|
246
|
+
# Check if pattern matches the host
|
|
247
|
+
if pattern.startswith('*.'):
|
|
248
|
+
# Pattern like *.example.com should match subdomains and main domain
|
|
249
|
+
domain_part = pattern[2:] # Remove *.
|
|
250
|
+
if host == domain_part or host.endswith('.' + domain_part):
|
|
251
|
+
# Only match http/https URLs for domain-only patterns
|
|
252
|
+
if scheme in ['http', 'https']:
|
|
253
|
+
return True
|
|
254
|
+
elif pattern.endswith('/*'):
|
|
255
|
+
# Pattern like brave://* should match any brave:// URL
|
|
256
|
+
prefix = pattern[:-1] # Remove the * at the end
|
|
257
|
+
if url.startswith(prefix):
|
|
258
|
+
return True
|
|
259
|
+
else:
|
|
260
|
+
# Use fnmatch for other glob patterns
|
|
261
|
+
if fnmatch.fnmatch(
|
|
262
|
+
full_url_pattern if '://' in pattern else host,
|
|
263
|
+
pattern,
|
|
264
|
+
):
|
|
265
|
+
return True
|
|
266
|
+
else:
|
|
267
|
+
# Exact match
|
|
268
|
+
if '://' in pattern:
|
|
269
|
+
# Full URL pattern
|
|
270
|
+
if url.startswith(pattern):
|
|
271
|
+
return True
|
|
272
|
+
else:
|
|
273
|
+
# Domain-only pattern (case-insensitive comparison)
|
|
274
|
+
if host.lower() == pattern.lower():
|
|
275
|
+
return True
|
|
276
|
+
# If pattern is a root domain, also check www subdomain
|
|
277
|
+
if self._is_root_domain(pattern) and host.lower() == f'www.{pattern.lower()}':
|
|
278
|
+
return True
|
|
279
|
+
|
|
280
|
+
return False
|