optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
browser_use/utils.py
ADDED
|
@@ -0,0 +1,670 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import platform
|
|
5
|
+
import re
|
|
6
|
+
import signal
|
|
7
|
+
import time
|
|
8
|
+
from collections.abc import Callable, Coroutine
|
|
9
|
+
from fnmatch import fnmatch
|
|
10
|
+
from functools import cache, wraps
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from sys import stderr
|
|
13
|
+
from typing import Any, ParamSpec, TypeVar
|
|
14
|
+
from urllib.parse import urlparse
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
from dotenv import load_dotenv
|
|
18
|
+
|
|
19
|
+
load_dotenv()
|
|
20
|
+
|
|
21
|
+
# Pre-compiled regex for URL detection - used in URL shortening
|
|
22
|
+
URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+|[^\s<>"\']+\.[a-z]{2,}(?:/[^\s<>"\']*)?', re.IGNORECASE)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
# Import error types - these may need to be adjusted based on actual import paths
|
|
28
|
+
try:
|
|
29
|
+
from openai import BadRequestError as OpenAIBadRequestError
|
|
30
|
+
except ImportError:
|
|
31
|
+
OpenAIBadRequestError = None
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from groq import BadRequestError as GroqBadRequestError # type: ignore[import-not-found]
|
|
35
|
+
except ImportError:
|
|
36
|
+
GroqBadRequestError = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Global flag to prevent duplicate exit messages
|
|
40
|
+
_exiting = False
|
|
41
|
+
|
|
42
|
+
# Define generic type variables for return type and parameters
|
|
43
|
+
R = TypeVar('R')
|
|
44
|
+
T = TypeVar('T')
|
|
45
|
+
P = ParamSpec('P')
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SignalHandler:
|
|
49
|
+
"""
|
|
50
|
+
A modular and reusable signal handling system for managing SIGINT (Ctrl+C), SIGTERM,
|
|
51
|
+
and other signals in asyncio applications.
|
|
52
|
+
|
|
53
|
+
This class provides:
|
|
54
|
+
- Configurable signal handling for SIGINT and SIGTERM
|
|
55
|
+
- Support for custom pause/resume callbacks
|
|
56
|
+
- Management of event loop state across signals
|
|
57
|
+
- Standardized handling of first and second Ctrl+C presses
|
|
58
|
+
- Cross-platform compatibility (with simplified behavior on Windows)
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
loop: asyncio.AbstractEventLoop | None = None,
|
|
64
|
+
pause_callback: Callable[[], None] | None = None,
|
|
65
|
+
resume_callback: Callable[[], None] | None = None,
|
|
66
|
+
custom_exit_callback: Callable[[], None] | None = None,
|
|
67
|
+
exit_on_second_int: bool = True,
|
|
68
|
+
interruptible_task_patterns: list[str] | None = None,
|
|
69
|
+
):
|
|
70
|
+
"""
|
|
71
|
+
Initialize the signal handler.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
loop: The asyncio event loop to use. Defaults to current event loop.
|
|
75
|
+
pause_callback: Function to call when system is paused (first Ctrl+C)
|
|
76
|
+
resume_callback: Function to call when system is resumed
|
|
77
|
+
custom_exit_callback: Function to call on exit (second Ctrl+C or SIGTERM)
|
|
78
|
+
exit_on_second_int: Whether to exit on second SIGINT (Ctrl+C)
|
|
79
|
+
interruptible_task_patterns: List of patterns to match task names that should be
|
|
80
|
+
canceled on first Ctrl+C (default: ['step', 'multi_act', 'get_next_action'])
|
|
81
|
+
"""
|
|
82
|
+
self.loop = loop or asyncio.get_event_loop()
|
|
83
|
+
self.pause_callback = pause_callback
|
|
84
|
+
self.resume_callback = resume_callback
|
|
85
|
+
self.custom_exit_callback = custom_exit_callback
|
|
86
|
+
self.exit_on_second_int = exit_on_second_int
|
|
87
|
+
self.interruptible_task_patterns = interruptible_task_patterns or ['step', 'multi_act', 'get_next_action']
|
|
88
|
+
self.is_windows = platform.system() == 'Windows'
|
|
89
|
+
|
|
90
|
+
# Initialize loop state attributes
|
|
91
|
+
self._initialize_loop_state()
|
|
92
|
+
|
|
93
|
+
# Store original signal handlers to restore them later if needed
|
|
94
|
+
self.original_sigint_handler = None
|
|
95
|
+
self.original_sigterm_handler = None
|
|
96
|
+
|
|
97
|
+
def _initialize_loop_state(self) -> None:
|
|
98
|
+
"""Initialize loop state attributes used for signal handling."""
|
|
99
|
+
setattr(self.loop, 'ctrl_c_pressed', False)
|
|
100
|
+
setattr(self.loop, 'waiting_for_input', False)
|
|
101
|
+
|
|
102
|
+
def register(self) -> None:
|
|
103
|
+
"""Register signal handlers for SIGINT and SIGTERM."""
|
|
104
|
+
try:
|
|
105
|
+
if self.is_windows:
|
|
106
|
+
# On Windows, use simple signal handling with immediate exit on Ctrl+C
|
|
107
|
+
def windows_handler(sig, frame):
|
|
108
|
+
print('\n\n🛑 Got Ctrl+C. Exiting immediately on Windows...\n', file=stderr)
|
|
109
|
+
# Run the custom exit callback if provided
|
|
110
|
+
if self.custom_exit_callback:
|
|
111
|
+
self.custom_exit_callback()
|
|
112
|
+
os._exit(0)
|
|
113
|
+
|
|
114
|
+
self.original_sigint_handler = signal.signal(signal.SIGINT, windows_handler)
|
|
115
|
+
else:
|
|
116
|
+
# On Unix-like systems, use asyncio's signal handling for smoother experience
|
|
117
|
+
self.original_sigint_handler = self.loop.add_signal_handler(signal.SIGINT, lambda: self.sigint_handler())
|
|
118
|
+
self.original_sigterm_handler = self.loop.add_signal_handler(signal.SIGTERM, lambda: self.sigterm_handler())
|
|
119
|
+
|
|
120
|
+
except Exception:
|
|
121
|
+
# there are situations where signal handlers are not supported, e.g.
|
|
122
|
+
# - when running in a thread other than the main thread
|
|
123
|
+
# - some operating systems
|
|
124
|
+
# - inside jupyter notebooks
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
def unregister(self) -> None:
|
|
128
|
+
"""Unregister signal handlers and restore original handlers if possible."""
|
|
129
|
+
try:
|
|
130
|
+
if self.is_windows:
|
|
131
|
+
# On Windows, just restore the original SIGINT handler
|
|
132
|
+
if self.original_sigint_handler:
|
|
133
|
+
signal.signal(signal.SIGINT, self.original_sigint_handler)
|
|
134
|
+
else:
|
|
135
|
+
# On Unix-like systems, use asyncio's signal handler removal
|
|
136
|
+
self.loop.remove_signal_handler(signal.SIGINT)
|
|
137
|
+
self.loop.remove_signal_handler(signal.SIGTERM)
|
|
138
|
+
|
|
139
|
+
# Restore original handlers if available
|
|
140
|
+
if self.original_sigint_handler:
|
|
141
|
+
signal.signal(signal.SIGINT, self.original_sigint_handler)
|
|
142
|
+
if self.original_sigterm_handler:
|
|
143
|
+
signal.signal(signal.SIGTERM, self.original_sigterm_handler)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.warning(f'Error while unregistering signal handlers: {e}')
|
|
146
|
+
|
|
147
|
+
def _handle_second_ctrl_c(self) -> None:
|
|
148
|
+
"""
|
|
149
|
+
Handle a second Ctrl+C press by performing cleanup and exiting.
|
|
150
|
+
This is shared logic used by both sigint_handler and wait_for_resume.
|
|
151
|
+
"""
|
|
152
|
+
global _exiting
|
|
153
|
+
|
|
154
|
+
if not _exiting:
|
|
155
|
+
_exiting = True
|
|
156
|
+
|
|
157
|
+
# Call custom exit callback if provided
|
|
158
|
+
if self.custom_exit_callback:
|
|
159
|
+
try:
|
|
160
|
+
self.custom_exit_callback()
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.error(f'Error in exit callback: {e}')
|
|
163
|
+
|
|
164
|
+
# Force immediate exit - more reliable than sys.exit()
|
|
165
|
+
print('\n\n🛑 Got second Ctrl+C. Exiting immediately...\n', file=stderr)
|
|
166
|
+
|
|
167
|
+
# Reset terminal to a clean state by sending multiple escape sequences
|
|
168
|
+
# Order matters for terminal resets - we try different approaches
|
|
169
|
+
|
|
170
|
+
# Reset terminal modes for both stdout and stderr
|
|
171
|
+
print('\033[?25h', end='', flush=True, file=stderr) # Show cursor
|
|
172
|
+
print('\033[?25h', end='', flush=True) # Show cursor
|
|
173
|
+
|
|
174
|
+
# Reset text attributes and terminal modes
|
|
175
|
+
print('\033[0m', end='', flush=True, file=stderr) # Reset text attributes
|
|
176
|
+
print('\033[0m', end='', flush=True) # Reset text attributes
|
|
177
|
+
|
|
178
|
+
# Disable special input modes that may cause arrow keys to output control chars
|
|
179
|
+
print('\033[?1l', end='', flush=True, file=stderr) # Reset cursor keys to normal mode
|
|
180
|
+
print('\033[?1l', end='', flush=True) # Reset cursor keys to normal mode
|
|
181
|
+
|
|
182
|
+
# Disable bracketed paste mode
|
|
183
|
+
print('\033[?2004l', end='', flush=True, file=stderr)
|
|
184
|
+
print('\033[?2004l', end='', flush=True)
|
|
185
|
+
|
|
186
|
+
# Carriage return helps ensure a clean line
|
|
187
|
+
print('\r', end='', flush=True, file=stderr)
|
|
188
|
+
print('\r', end='', flush=True)
|
|
189
|
+
|
|
190
|
+
# these ^^ attempts dont work as far as we can tell
|
|
191
|
+
# we still dont know what causes the broken input, if you know how to fix it, please let us know
|
|
192
|
+
print('(tip: press [Enter] once to fix escape codes appearing after chrome exit)', file=stderr)
|
|
193
|
+
|
|
194
|
+
os._exit(0)
|
|
195
|
+
|
|
196
|
+
def sigint_handler(self) -> None:
|
|
197
|
+
"""
|
|
198
|
+
SIGINT (Ctrl+C) handler.
|
|
199
|
+
|
|
200
|
+
First Ctrl+C: Cancel current step and pause.
|
|
201
|
+
Second Ctrl+C: Exit immediately if exit_on_second_int is True.
|
|
202
|
+
"""
|
|
203
|
+
global _exiting
|
|
204
|
+
|
|
205
|
+
if _exiting:
|
|
206
|
+
# Already exiting, force exit immediately
|
|
207
|
+
os._exit(0)
|
|
208
|
+
|
|
209
|
+
if getattr(self.loop, 'ctrl_c_pressed', False):
|
|
210
|
+
# If we're in the waiting for input state, let the pause method handle it
|
|
211
|
+
if getattr(self.loop, 'waiting_for_input', False):
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
# Second Ctrl+C - exit immediately if configured to do so
|
|
215
|
+
if self.exit_on_second_int:
|
|
216
|
+
self._handle_second_ctrl_c()
|
|
217
|
+
|
|
218
|
+
# Mark that Ctrl+C was pressed
|
|
219
|
+
setattr(self.loop, 'ctrl_c_pressed', True)
|
|
220
|
+
|
|
221
|
+
# Cancel current tasks that should be interruptible - this is crucial for immediate pausing
|
|
222
|
+
self._cancel_interruptible_tasks()
|
|
223
|
+
|
|
224
|
+
# Call pause callback if provided - this sets the paused flag
|
|
225
|
+
if self.pause_callback:
|
|
226
|
+
try:
|
|
227
|
+
self.pause_callback()
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.error(f'Error in pause callback: {e}')
|
|
230
|
+
|
|
231
|
+
# Log pause message after pause_callback is called (not before)
|
|
232
|
+
print('----------------------------------------------------------------------', file=stderr)
|
|
233
|
+
|
|
234
|
+
def sigterm_handler(self) -> None:
|
|
235
|
+
"""
|
|
236
|
+
SIGTERM handler.
|
|
237
|
+
|
|
238
|
+
Always exits the program completely.
|
|
239
|
+
"""
|
|
240
|
+
global _exiting
|
|
241
|
+
if not _exiting:
|
|
242
|
+
_exiting = True
|
|
243
|
+
print('\n\n🛑 SIGTERM received. Exiting immediately...\n\n', file=stderr)
|
|
244
|
+
|
|
245
|
+
# Call custom exit callback if provided
|
|
246
|
+
if self.custom_exit_callback:
|
|
247
|
+
self.custom_exit_callback()
|
|
248
|
+
|
|
249
|
+
os._exit(0)
|
|
250
|
+
|
|
251
|
+
def _cancel_interruptible_tasks(self) -> None:
|
|
252
|
+
"""Cancel current tasks that should be interruptible."""
|
|
253
|
+
current_task = asyncio.current_task(self.loop)
|
|
254
|
+
for task in asyncio.all_tasks(self.loop):
|
|
255
|
+
if task != current_task and not task.done():
|
|
256
|
+
task_name = task.get_name() if hasattr(task, 'get_name') else str(task)
|
|
257
|
+
# Cancel tasks that match certain patterns
|
|
258
|
+
if any(pattern in task_name for pattern in self.interruptible_task_patterns):
|
|
259
|
+
logger.debug(f'Cancelling task: {task_name}')
|
|
260
|
+
task.cancel()
|
|
261
|
+
# Add exception handler to silence "Task exception was never retrieved" warnings
|
|
262
|
+
task.add_done_callback(lambda t: t.exception() if t.cancelled() else None)
|
|
263
|
+
|
|
264
|
+
# Also cancel the current task if it's interruptible
|
|
265
|
+
if current_task and not current_task.done():
|
|
266
|
+
task_name = current_task.get_name() if hasattr(current_task, 'get_name') else str(current_task)
|
|
267
|
+
if any(pattern in task_name for pattern in self.interruptible_task_patterns):
|
|
268
|
+
logger.debug(f'Cancelling current task: {task_name}')
|
|
269
|
+
current_task.cancel()
|
|
270
|
+
|
|
271
|
+
def wait_for_resume(self) -> None:
|
|
272
|
+
"""
|
|
273
|
+
Wait for user input to resume or exit.
|
|
274
|
+
|
|
275
|
+
This method should be called after handling the first Ctrl+C.
|
|
276
|
+
It temporarily restores default signal handling to allow catching
|
|
277
|
+
a second Ctrl+C directly.
|
|
278
|
+
"""
|
|
279
|
+
# Set flag to indicate we're waiting for input
|
|
280
|
+
setattr(self.loop, 'waiting_for_input', True)
|
|
281
|
+
|
|
282
|
+
# Temporarily restore default signal handling for SIGINT
|
|
283
|
+
# This ensures KeyboardInterrupt will be raised during input()
|
|
284
|
+
original_handler = signal.getsignal(signal.SIGINT)
|
|
285
|
+
try:
|
|
286
|
+
signal.signal(signal.SIGINT, signal.default_int_handler)
|
|
287
|
+
except ValueError:
|
|
288
|
+
# we are running in a thread other than the main thread
|
|
289
|
+
# or signal handlers are not supported for some other reason
|
|
290
|
+
pass
|
|
291
|
+
|
|
292
|
+
green = '\x1b[32;1m'
|
|
293
|
+
red = '\x1b[31m'
|
|
294
|
+
blink = '\033[33;5m'
|
|
295
|
+
unblink = '\033[0m'
|
|
296
|
+
reset = '\x1b[0m'
|
|
297
|
+
|
|
298
|
+
try: # escape code is to blink the ...
|
|
299
|
+
print(
|
|
300
|
+
f'➡️ Press {green}[Enter]{reset} to resume or {red}[Ctrl+C]{reset} again to exit{blink}...{unblink} ',
|
|
301
|
+
end='',
|
|
302
|
+
flush=True,
|
|
303
|
+
file=stderr,
|
|
304
|
+
)
|
|
305
|
+
input() # This will raise KeyboardInterrupt on Ctrl+C
|
|
306
|
+
|
|
307
|
+
# Call resume callback if provided
|
|
308
|
+
if self.resume_callback:
|
|
309
|
+
self.resume_callback()
|
|
310
|
+
except KeyboardInterrupt:
|
|
311
|
+
# Use the shared method to handle second Ctrl+C
|
|
312
|
+
self._handle_second_ctrl_c()
|
|
313
|
+
finally:
|
|
314
|
+
try:
|
|
315
|
+
# Restore our signal handler
|
|
316
|
+
signal.signal(signal.SIGINT, original_handler)
|
|
317
|
+
setattr(self.loop, 'waiting_for_input', False)
|
|
318
|
+
except Exception:
|
|
319
|
+
pass
|
|
320
|
+
|
|
321
|
+
def reset(self) -> None:
|
|
322
|
+
"""Reset state after resuming."""
|
|
323
|
+
# Clear the flags
|
|
324
|
+
if hasattr(self.loop, 'ctrl_c_pressed'):
|
|
325
|
+
setattr(self.loop, 'ctrl_c_pressed', False)
|
|
326
|
+
if hasattr(self.loop, 'waiting_for_input'):
|
|
327
|
+
setattr(self.loop, 'waiting_for_input', False)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def time_execution_sync(additional_text: str = '') -> Callable[[Callable[P, R]], Callable[P, R]]:
|
|
331
|
+
def decorator(func: Callable[P, R]) -> Callable[P, R]:
|
|
332
|
+
@wraps(func)
|
|
333
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
334
|
+
start_time = time.time()
|
|
335
|
+
result = func(*args, **kwargs)
|
|
336
|
+
execution_time = time.time() - start_time
|
|
337
|
+
# Only log if execution takes more than 0.25 seconds
|
|
338
|
+
if execution_time > 0.25:
|
|
339
|
+
self_has_logger = args and getattr(args[0], 'logger', None)
|
|
340
|
+
if self_has_logger:
|
|
341
|
+
logger = getattr(args[0], 'logger')
|
|
342
|
+
elif 'agent' in kwargs:
|
|
343
|
+
logger = getattr(kwargs['agent'], 'logger')
|
|
344
|
+
elif 'browser_session' in kwargs:
|
|
345
|
+
logger = getattr(kwargs['browser_session'], 'logger')
|
|
346
|
+
else:
|
|
347
|
+
logger = logging.getLogger(__name__)
|
|
348
|
+
logger.debug(f'⏳ {additional_text.strip("-")}() took {execution_time:.2f}s')
|
|
349
|
+
return result
|
|
350
|
+
|
|
351
|
+
return wrapper
|
|
352
|
+
|
|
353
|
+
return decorator
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def time_execution_async(
|
|
357
|
+
additional_text: str = '',
|
|
358
|
+
) -> Callable[[Callable[P, Coroutine[Any, Any, R]]], Callable[P, Coroutine[Any, Any, R]]]:
|
|
359
|
+
def decorator(func: Callable[P, Coroutine[Any, Any, R]]) -> Callable[P, Coroutine[Any, Any, R]]:
|
|
360
|
+
@wraps(func)
|
|
361
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
362
|
+
start_time = time.time()
|
|
363
|
+
result = await func(*args, **kwargs)
|
|
364
|
+
execution_time = time.time() - start_time
|
|
365
|
+
# Only log if execution takes more than 0.25 seconds to avoid spamming the logs
|
|
366
|
+
# you can lower this threshold locally when you're doing dev work to performance optimize stuff
|
|
367
|
+
if execution_time > 0.25:
|
|
368
|
+
self_has_logger = args and getattr(args[0], 'logger', None)
|
|
369
|
+
if self_has_logger:
|
|
370
|
+
logger = getattr(args[0], 'logger')
|
|
371
|
+
elif 'agent' in kwargs:
|
|
372
|
+
logger = getattr(kwargs['agent'], 'logger')
|
|
373
|
+
elif 'browser_session' in kwargs:
|
|
374
|
+
logger = getattr(kwargs['browser_session'], 'logger')
|
|
375
|
+
else:
|
|
376
|
+
logger = logging.getLogger(__name__)
|
|
377
|
+
logger.debug(f'⏳ {additional_text.strip("-")}() took {execution_time:.2f}s')
|
|
378
|
+
return result
|
|
379
|
+
|
|
380
|
+
return wrapper
|
|
381
|
+
|
|
382
|
+
return decorator
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def singleton(cls):
|
|
386
|
+
instance = [None]
|
|
387
|
+
|
|
388
|
+
def wrapper(*args, **kwargs):
|
|
389
|
+
if instance[0] is None:
|
|
390
|
+
instance[0] = cls(*args, **kwargs)
|
|
391
|
+
return instance[0]
|
|
392
|
+
|
|
393
|
+
return wrapper
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def check_env_variables(keys: list[str], any_or_all=all) -> bool:
|
|
397
|
+
"""Check if all required environment variables are set"""
|
|
398
|
+
return any_or_all(os.getenv(key, '').strip() for key in keys)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def is_unsafe_pattern(pattern: str) -> bool:
|
|
402
|
+
"""
|
|
403
|
+
Check if a domain pattern has complex wildcards that could match too many domains.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
pattern: The domain pattern to check
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
bool: True if the pattern has unsafe wildcards, False otherwise
|
|
410
|
+
"""
|
|
411
|
+
# Extract domain part if there's a scheme
|
|
412
|
+
if '://' in pattern:
|
|
413
|
+
_, pattern = pattern.split('://', 1)
|
|
414
|
+
|
|
415
|
+
# Remove safe patterns (*.domain and domain.*)
|
|
416
|
+
bare_domain = pattern.replace('.*', '').replace('*.', '')
|
|
417
|
+
|
|
418
|
+
# If there are still wildcards, it's potentially unsafe
|
|
419
|
+
return '*' in bare_domain
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def is_new_tab_page(url: str) -> bool:
|
|
423
|
+
"""
|
|
424
|
+
Check if a URL is a new tab page (about:blank, chrome://new-tab-page, or chrome://newtab).
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
url: The URL to check
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
bool: True if the URL is a new tab page, False otherwise
|
|
431
|
+
"""
|
|
432
|
+
return url in ('about:blank', 'chrome://new-tab-page/', 'chrome://new-tab-page', 'chrome://newtab/', 'chrome://newtab')
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def match_url_with_domain_pattern(url: str, domain_pattern: str, log_warnings: bool = False) -> bool:
|
|
436
|
+
"""
|
|
437
|
+
Check if a URL matches a domain pattern. SECURITY CRITICAL.
|
|
438
|
+
|
|
439
|
+
Supports optional glob patterns and schemes:
|
|
440
|
+
- *.example.com will match sub.example.com and example.com
|
|
441
|
+
- *google.com will match google.com, agoogle.com, and www.google.com
|
|
442
|
+
- http*://example.com will match http://example.com, https://example.com
|
|
443
|
+
- chrome-extension://* will match chrome-extension://aaaaaaaaaaaa and chrome-extension://bbbbbbbbbbbbb
|
|
444
|
+
|
|
445
|
+
When no scheme is specified, https is used by default for security.
|
|
446
|
+
For example, 'example.com' will match 'https://example.com' but not 'http://example.com'.
|
|
447
|
+
|
|
448
|
+
Note: New tab pages (about:blank, chrome://new-tab-page) must be handled at the callsite, not inside this function.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
url: The URL to check
|
|
452
|
+
domain_pattern: Domain pattern to match against
|
|
453
|
+
log_warnings: Whether to log warnings about unsafe patterns
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
bool: True if the URL matches the pattern, False otherwise
|
|
457
|
+
"""
|
|
458
|
+
try:
|
|
459
|
+
# Note: new tab pages should be handled at the callsite, not here
|
|
460
|
+
if is_new_tab_page(url):
|
|
461
|
+
return False
|
|
462
|
+
|
|
463
|
+
parsed_url = urlparse(url)
|
|
464
|
+
|
|
465
|
+
# Extract only the hostname and scheme components
|
|
466
|
+
scheme = parsed_url.scheme.lower() if parsed_url.scheme else ''
|
|
467
|
+
domain = parsed_url.hostname.lower() if parsed_url.hostname else ''
|
|
468
|
+
|
|
469
|
+
if not scheme or not domain:
|
|
470
|
+
return False
|
|
471
|
+
|
|
472
|
+
# Normalize the domain pattern
|
|
473
|
+
domain_pattern = domain_pattern.lower()
|
|
474
|
+
|
|
475
|
+
# Handle pattern with scheme
|
|
476
|
+
if '://' in domain_pattern:
|
|
477
|
+
pattern_scheme, pattern_domain = domain_pattern.split('://', 1)
|
|
478
|
+
else:
|
|
479
|
+
pattern_scheme = 'https' # Default to matching only https for security
|
|
480
|
+
pattern_domain = domain_pattern
|
|
481
|
+
|
|
482
|
+
# Handle port in pattern (we strip ports from patterns since we already
|
|
483
|
+
# extracted only the hostname from the URL)
|
|
484
|
+
if ':' in pattern_domain and not pattern_domain.startswith(':'):
|
|
485
|
+
pattern_domain = pattern_domain.split(':', 1)[0]
|
|
486
|
+
|
|
487
|
+
# If scheme doesn't match, return False
|
|
488
|
+
if not fnmatch(scheme, pattern_scheme):
|
|
489
|
+
return False
|
|
490
|
+
|
|
491
|
+
# Check for exact match
|
|
492
|
+
if pattern_domain == '*' or domain == pattern_domain:
|
|
493
|
+
return True
|
|
494
|
+
|
|
495
|
+
# Handle glob patterns
|
|
496
|
+
if '*' in pattern_domain:
|
|
497
|
+
# Check for unsafe glob patterns
|
|
498
|
+
# First, check for patterns like *.*.domain which are unsafe
|
|
499
|
+
if pattern_domain.count('*.') > 1 or pattern_domain.count('.*') > 1:
|
|
500
|
+
if log_warnings:
|
|
501
|
+
logger = logging.getLogger(__name__)
|
|
502
|
+
logger.error(f'⛔️ Multiple wildcards in pattern=[{domain_pattern}] are not supported')
|
|
503
|
+
return False # Don't match unsafe patterns
|
|
504
|
+
|
|
505
|
+
# Check for wildcards in TLD part (example.*)
|
|
506
|
+
if pattern_domain.endswith('.*'):
|
|
507
|
+
if log_warnings:
|
|
508
|
+
logger = logging.getLogger(__name__)
|
|
509
|
+
logger.error(f'⛔️ Wildcard TLDs like in pattern=[{domain_pattern}] are not supported for security')
|
|
510
|
+
return False # Don't match unsafe patterns
|
|
511
|
+
|
|
512
|
+
# Then check for embedded wildcards
|
|
513
|
+
bare_domain = pattern_domain.replace('*.', '')
|
|
514
|
+
if '*' in bare_domain:
|
|
515
|
+
if log_warnings:
|
|
516
|
+
logger = logging.getLogger(__name__)
|
|
517
|
+
logger.error(f'⛔️ Only *.domain style patterns are supported, ignoring pattern=[{domain_pattern}]')
|
|
518
|
+
return False # Don't match unsafe patterns
|
|
519
|
+
|
|
520
|
+
# Special handling so that *.google.com also matches bare google.com
|
|
521
|
+
if pattern_domain.startswith('*.'):
|
|
522
|
+
parent_domain = pattern_domain[2:]
|
|
523
|
+
if domain == parent_domain or fnmatch(domain, parent_domain):
|
|
524
|
+
return True
|
|
525
|
+
|
|
526
|
+
# Normal case: match domain against pattern
|
|
527
|
+
if fnmatch(domain, pattern_domain):
|
|
528
|
+
return True
|
|
529
|
+
|
|
530
|
+
return False
|
|
531
|
+
except Exception as e:
|
|
532
|
+
logger = logging.getLogger(__name__)
|
|
533
|
+
logger.error(f'⛔️ Error matching URL {url} with pattern {domain_pattern}: {type(e).__name__}: {e}')
|
|
534
|
+
return False
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def merge_dicts(a: dict, b: dict, path: tuple[str, ...] = ()):
|
|
538
|
+
for key in b:
|
|
539
|
+
if key in a:
|
|
540
|
+
if isinstance(a[key], dict) and isinstance(b[key], dict):
|
|
541
|
+
merge_dicts(a[key], b[key], path + (str(key),))
|
|
542
|
+
elif isinstance(a[key], list) and isinstance(b[key], list):
|
|
543
|
+
a[key] = a[key] + b[key]
|
|
544
|
+
elif a[key] != b[key]:
|
|
545
|
+
raise Exception('Conflict at ' + '.'.join(path + (str(key),)))
|
|
546
|
+
else:
|
|
547
|
+
a[key] = b[key]
|
|
548
|
+
return a
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
@cache
|
|
552
|
+
def get_browser_use_version() -> str:
|
|
553
|
+
"""Get the browser-use package version using the same logic as Agent._set_browser_use_version_and_source"""
|
|
554
|
+
try:
|
|
555
|
+
package_root = Path(__file__).parent.parent
|
|
556
|
+
pyproject_path = package_root / 'pyproject.toml'
|
|
557
|
+
|
|
558
|
+
# Try to read version from pyproject.toml
|
|
559
|
+
if pyproject_path.exists():
|
|
560
|
+
import re
|
|
561
|
+
|
|
562
|
+
with open(pyproject_path, encoding='utf-8') as f:
|
|
563
|
+
content = f.read()
|
|
564
|
+
match = re.search(r'version\s*=\s*["\']([^"\']+)["\']', content)
|
|
565
|
+
if match:
|
|
566
|
+
version = f'{match.group(1)}'
|
|
567
|
+
os.environ['LIBRARY_VERSION'] = version # used by bubus event_schema so all Event schemas include versioning
|
|
568
|
+
return version
|
|
569
|
+
|
|
570
|
+
# If pyproject.toml doesn't exist, try getting version from pip
|
|
571
|
+
from importlib.metadata import version as get_version
|
|
572
|
+
|
|
573
|
+
version = str(get_version('browser-use'))
|
|
574
|
+
os.environ['LIBRARY_VERSION'] = version
|
|
575
|
+
return version
|
|
576
|
+
|
|
577
|
+
except Exception as e:
|
|
578
|
+
logger.debug(f'Error detecting browser-use version: {type(e).__name__}: {e}')
|
|
579
|
+
return 'unknown'
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
async def check_latest_browser_use_version() -> str | None:
|
|
583
|
+
"""Check the latest version of browser-use from PyPI asynchronously.
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
The latest version string if successful, None if failed
|
|
587
|
+
"""
|
|
588
|
+
try:
|
|
589
|
+
async with httpx.AsyncClient(timeout=3.0) as client:
|
|
590
|
+
response = await client.get('https://pypi.org/pypi/browser-use/json')
|
|
591
|
+
if response.status_code == 200:
|
|
592
|
+
data = response.json()
|
|
593
|
+
return data['info']['version']
|
|
594
|
+
except Exception:
|
|
595
|
+
# Silently fail - we don't want to break agent startup due to network issues
|
|
596
|
+
pass
|
|
597
|
+
return None
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
@cache
|
|
601
|
+
def get_git_info() -> dict[str, str] | None:
|
|
602
|
+
"""Get git information if installed from git repository"""
|
|
603
|
+
try:
|
|
604
|
+
import subprocess
|
|
605
|
+
|
|
606
|
+
package_root = Path(__file__).parent.parent
|
|
607
|
+
git_dir = package_root / '.git'
|
|
608
|
+
if not git_dir.exists():
|
|
609
|
+
return None
|
|
610
|
+
|
|
611
|
+
# Get git commit hash
|
|
612
|
+
commit_hash = (
|
|
613
|
+
subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=package_root, stderr=subprocess.DEVNULL).decode().strip()
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# Get git branch
|
|
617
|
+
branch = (
|
|
618
|
+
subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], cwd=package_root, stderr=subprocess.DEVNULL)
|
|
619
|
+
.decode()
|
|
620
|
+
.strip()
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
# Get remote URL
|
|
624
|
+
remote_url = (
|
|
625
|
+
subprocess.check_output(['git', 'config', '--get', 'remote.origin.url'], cwd=package_root, stderr=subprocess.DEVNULL)
|
|
626
|
+
.decode()
|
|
627
|
+
.strip()
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
# Get commit timestamp
|
|
631
|
+
commit_timestamp = (
|
|
632
|
+
subprocess.check_output(['git', 'show', '-s', '--format=%ci', 'HEAD'], cwd=package_root, stderr=subprocess.DEVNULL)
|
|
633
|
+
.decode()
|
|
634
|
+
.strip()
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
return {'commit_hash': commit_hash, 'branch': branch, 'remote_url': remote_url, 'commit_timestamp': commit_timestamp}
|
|
638
|
+
except Exception as e:
|
|
639
|
+
logger.debug(f'Error getting git info: {type(e).__name__}: {e}')
|
|
640
|
+
return None
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def _log_pretty_path(path: str | Path | None) -> str:
|
|
644
|
+
"""Pretty-print a path, shorten home dir to ~ and cwd to ."""
|
|
645
|
+
|
|
646
|
+
if not path or not str(path).strip():
|
|
647
|
+
return '' # always falsy in -> falsy out so it can be used in ternaries
|
|
648
|
+
|
|
649
|
+
# dont print anything thats not a path
|
|
650
|
+
if not isinstance(path, (str, Path)):
|
|
651
|
+
# no other types are safe to just str(path) and log to terminal unless we know what they are
|
|
652
|
+
# e.g. what if we get storage_date=dict | Path and the dict version could contain real cookies
|
|
653
|
+
return f'<{type(path).__name__}>'
|
|
654
|
+
|
|
655
|
+
# replace home dir and cwd with ~ and .
|
|
656
|
+
pretty_path = str(path).replace(str(Path.home()), '~').replace(str(Path.cwd().resolve()), '.')
|
|
657
|
+
|
|
658
|
+
# wrap in quotes if it contains spaces
|
|
659
|
+
if pretty_path.strip() and ' ' in pretty_path:
|
|
660
|
+
pretty_path = f'"{pretty_path}"'
|
|
661
|
+
|
|
662
|
+
return pretty_path
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _log_pretty_url(s: str, max_len: int | None = 22) -> str:
|
|
666
|
+
"""Truncate/pretty-print a URL with a maximum length, removing the protocol and www. prefix"""
|
|
667
|
+
s = s.replace('https://', '').replace('http://', '').replace('www.', '')
|
|
668
|
+
if max_len is not None and len(s) > max_len:
|
|
669
|
+
return s[:max_len] + '…'
|
|
670
|
+
return s
|