seleniumbase 4.32.1__py3-none-any.whl → 4.32.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,328 @@
1
+ """CDP-Driver is based on NoDriver"""
2
+ from __future__ import annotations
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ import types
7
+ import typing
8
+ from seleniumbase.fixtures import shared_utils
9
+ from typing import Optional, List, Union, Callable
10
+ from .element import Element
11
+ from .browser import Browser
12
+ from .browser import PathLike
13
+ from .config import Config
14
+ from .tab import Tab
15
+ import mycdp as cdp
16
+
17
+ logger = logging.getLogger(__name__)
18
+ T = typing.TypeVar("T")
19
+
20
+
21
+ async def start(
22
+ config: Optional[Config] = None,
23
+ *,
24
+ user_data_dir: Optional[PathLike] = None,
25
+ headless: Optional[bool] = False,
26
+ incognito: Optional[bool] = False,
27
+ guest: Optional[bool] = False,
28
+ browser_executable_path: Optional[PathLike] = None,
29
+ browser_args: Optional[List[str]] = None,
30
+ sandbox: Optional[bool] = True,
31
+ lang: Optional[str] = None,
32
+ host: Optional[str] = None,
33
+ port: Optional[int] = None,
34
+ expert: Optional[bool] = None,
35
+ **kwargs: Optional[dict],
36
+ ) -> Browser:
37
+ """
38
+ Helper function to launch a browser. It accepts several keyword parameters.
39
+ Conveniently, you can just call it bare (no parameters) to quickly launch
40
+ an instance with best practice defaults.
41
+ Note: Due to a Chrome-130 bug, use start_async or start_sync instead.
42
+ (Calling this method directly could lead to an unresponsive browser)
43
+ Note: New args are expected: Use kwargs only!
44
+ Note: This should be called ``await start()``
45
+ :param user_data_dir:
46
+ :type user_data_dir: PathLike
47
+ :param headless:
48
+ :type headless: bool
49
+ :param browser_executable_path:
50
+ :type browser_executable_path: PathLike
51
+ :param browser_args:
52
+ ["--some-chromeparam=somevalue", "some-other-param=someval"]
53
+ :type browser_args: List[str]
54
+ :param sandbox: Default True, but when set to False it adds --no-sandbox
55
+ to the params, also when using linux under a root user,
56
+ it adds False automatically (else Chrome won't start).
57
+ :type sandbox: bool
58
+ :param lang: language string
59
+ :type lang: str
60
+ :param port: If you connect to an existing debuggable session,
61
+ you can specify the port here.
62
+ If both host and port are provided,
63
+ then a local Chrome browser will not be started!
64
+ :type port: int
65
+ :param host: If you connect to an existing debuggable session,
66
+ you can specify the host here.
67
+ If both host and port are provided,
68
+ then a local Chrome browser will not be started!
69
+ :type host: str
70
+ :param expert: When set to True, "expert" mode is enabled.
71
+ This means adding: --disable-web-security --disable-site-isolation-trials,
72
+ as well as some scripts and patching useful for debugging.
73
+ (For example, ensuring shadow-root is always in "open" mode.)
74
+ :type expert: bool
75
+ """
76
+ if not config:
77
+ config = Config(
78
+ user_data_dir,
79
+ headless,
80
+ incognito,
81
+ guest,
82
+ browser_executable_path,
83
+ browser_args,
84
+ sandbox,
85
+ lang,
86
+ host=host,
87
+ port=port,
88
+ expert=expert,
89
+ **kwargs,
90
+ )
91
+ return await Browser.create(config)
92
+
93
+
94
+ async def start_async(*args, **kwargs) -> Browser:
95
+ headless = False
96
+ binary_location = None
97
+ if "browser_executable_path" in kwargs:
98
+ binary_location = kwargs["browser_executable_path"]
99
+ if shared_utils.is_chrome_130_or_newer(binary_location):
100
+ if "headless" in kwargs:
101
+ headless = kwargs["headless"]
102
+ decoy_args = kwargs
103
+ decoy_args["headless"] = True
104
+ driver = await start(**decoy_args)
105
+ kwargs["headless"] = headless
106
+ kwargs["user_data_dir"] = driver.config.user_data_dir
107
+ time.sleep(0.2)
108
+ driver.stop() # Due to Chrome-130, must stop & start
109
+ time.sleep(0.1)
110
+ return await start(*args, **kwargs)
111
+
112
+
113
+ def start_sync(*args, **kwargs) -> Browser:
114
+ loop = asyncio.get_event_loop()
115
+ headless = False
116
+ binary_location = None
117
+ if "browser_executable_path" in kwargs:
118
+ binary_location = kwargs["browser_executable_path"]
119
+ if shared_utils.is_chrome_130_or_newer(binary_location):
120
+ if "headless" in kwargs:
121
+ headless = kwargs["headless"]
122
+ decoy_args = kwargs
123
+ decoy_args["headless"] = True
124
+ driver = loop.run_until_complete(start(**decoy_args))
125
+ kwargs["headless"] = headless
126
+ kwargs["user_data_dir"] = driver.config.user_data_dir
127
+ time.sleep(0.2)
128
+ driver.stop() # Due to Chrome-130, must stop & start
129
+ time.sleep(0.1)
130
+ return loop.run_until_complete(start(*args, **kwargs))
131
+
132
+
133
+ async def create_from_driver(driver) -> Browser:
134
+ """Create a Browser instance from a running driver instance."""
135
+ from .config import Config
136
+
137
+ conf = Config()
138
+ host, port = driver.options.debugger_address.split(":")
139
+ conf.host, conf.port = host, int(port)
140
+ # Create Browser instance
141
+ browser = await start(conf)
142
+ browser._process_pid = driver.browser_pid
143
+ # Stop chromedriver binary
144
+ driver.service.stop()
145
+ driver.browser_pid = -1
146
+ driver.user_data_dir = None
147
+ return browser
148
+
149
+
150
+ def free_port() -> int:
151
+ """Determines a free port using sockets."""
152
+ import socket
153
+
154
+ free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
155
+ free_socket.bind(("127.0.0.1", 0))
156
+ free_socket.listen(5)
157
+ port: int = free_socket.getsockname()[1]
158
+ free_socket.close()
159
+ return port
160
+
161
+
162
+ def filter_recurse_all(
163
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
164
+ ) -> List[T]:
165
+ """
166
+ Test each child using predicate(child),
167
+ and return all children for which predicate(child) == True
168
+ :param doc: The cdp.dom.Node object or :py:class:`cdp_driver.Element`
169
+ :param predicate: A function which takes a node as first parameter
170
+ and returns a boolean, where True means include.
171
+ """
172
+ if not hasattr(doc, "children"):
173
+ raise TypeError("Object should have a .children attribute!")
174
+ out = []
175
+ if doc and doc.children:
176
+ for child in doc.children:
177
+ if predicate(child):
178
+ out.append(child)
179
+ if child.shadow_roots is not None:
180
+ out.extend(
181
+ filter_recurse_all(child.shadow_roots[0], predicate)
182
+ )
183
+ out.extend(filter_recurse_all(child, predicate))
184
+ return out
185
+
186
+
187
+ def filter_recurse(
188
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
189
+ ) -> T:
190
+ """
191
+ Test each child using predicate(child),
192
+ and return the first child of which predicate(child) == True
193
+ :param doc: the cdp.dom.Node object or :py:class:`cdp_driver.Element`
194
+ :param predicate: a function which takes a node as first parameter
195
+ and returns a boolean, where True means include.
196
+ """
197
+ if not hasattr(doc, "children"):
198
+ raise TypeError("Object should have a .children attribute!")
199
+ if doc and doc.children:
200
+ for child in doc.children:
201
+ if predicate(child):
202
+ return child
203
+ if child.shadow_roots:
204
+ shadow_root_result = filter_recurse(
205
+ child.shadow_roots[0], predicate
206
+ )
207
+ if shadow_root_result:
208
+ return shadow_root_result
209
+ result = filter_recurse(child, predicate)
210
+ if result:
211
+ return result
212
+
213
+
214
+ def circle(
215
+ x, y=None, radius=10, num=10, dir=0
216
+ ) -> typing.Generator[typing.Tuple[float, float], None, None]:
217
+ """
218
+ A generator will calculate coordinates around a circle.
219
+ :param x: start x position
220
+ :type x: int
221
+ :param y: start y position
222
+ :type y: int
223
+ :param radius: size of the circle
224
+ :type radius: int
225
+ :param num: the amount of points calculated
226
+ (higher => slower, more cpu, but more detailed)
227
+ :type num: int
228
+ """
229
+ import math
230
+
231
+ r = radius
232
+ w = num
233
+ if not y:
234
+ y = x
235
+ a = int(x - r * 2)
236
+ b = int(y - r * 2)
237
+ m = (2 * math.pi) / w
238
+ if dir == 0:
239
+ # Regular direction
240
+ ran = 0, w + 1, 1
241
+ else:
242
+ # Opposite direction
243
+ ran = w + 1, 0, -1
244
+ for i in range(*ran):
245
+ x = a + r * math.sin(m * i)
246
+ y = b + r * math.cos(m * i)
247
+ yield x, y
248
+
249
+
250
+ def remove_from_tree(tree: cdp.dom.Node, node: cdp.dom.Node) -> cdp.dom.Node:
251
+ if not hasattr(tree, "children"):
252
+ raise TypeError("Object should have a .children attribute!")
253
+ if tree and tree.children:
254
+ for child in tree.children:
255
+ if child.backend_node_id == node.backend_node_id:
256
+ tree.children.remove(child)
257
+ remove_from_tree(child, node)
258
+ return tree
259
+
260
+
261
+ async def html_from_tree(
262
+ tree: Union[cdp.dom.Node, Element], target: Tab
263
+ ):
264
+ if not hasattr(tree, "children"):
265
+ raise TypeError("Object should have a .children attribute!")
266
+ out = ""
267
+ if tree and tree.children:
268
+ for child in tree.children:
269
+ if isinstance(child, Element):
270
+ out += await child.get_html()
271
+ else:
272
+ out += await target.send(
273
+ cdp.dom.get_outer_html(
274
+ backend_node_id=child.backend_node_id
275
+ )
276
+ )
277
+ out += await html_from_tree(child, target)
278
+ return out
279
+
280
+
281
+ def compare_target_info(
282
+ info1: cdp.target.TargetInfo, info2: cdp.target.TargetInfo
283
+ ) -> List[typing.Tuple[str, typing.Any, typing.Any]]:
284
+ """
285
+ When logging mode is set to debug, browser object will log when target info
286
+ is changed. To provide more meaningful log messages,
287
+ this function is called to check what has actually changed
288
+ between the 2 (by simple dict comparison).
289
+ It returns a list of tuples
290
+ [ ... ( key_which_has_changed, old_value, new_value) ]
291
+ :param info1:
292
+ :param info2:
293
+ """
294
+ d1 = info1.__dict__
295
+ d2 = info2.__dict__
296
+ return [(k, v, d2[k]) for (k, v) in d1.items() if d2[k] != v]
297
+
298
+
299
+ def loop():
300
+ loop = asyncio.new_event_loop()
301
+ asyncio.set_event_loop(loop)
302
+ return loop
303
+
304
+
305
+ def cdp_get_module(domain: Union[str, types.ModuleType]):
306
+ """
307
+ Get cdp module by given string.
308
+ :param domain:
309
+ """
310
+ import importlib
311
+
312
+ if isinstance(domain, types.ModuleType):
313
+ domain_mod = domain
314
+ else:
315
+ try:
316
+ if domain in ("input",):
317
+ domain = "input_"
318
+ domain_mod = getattr(cdp, domain)
319
+ if not domain_mod:
320
+ raise AttributeError
321
+ except AttributeError:
322
+ try:
323
+ domain_mod = importlib.import_module(domain)
324
+ except ModuleNotFoundError:
325
+ raise ModuleNotFoundError(
326
+ "Could not find cdp module from input '%s'" % domain
327
+ )
328
+ return domain_mod
@@ -0,0 +1,328 @@
1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import secrets
5
+ import sys
6
+ import tempfile
7
+ import zipfile
8
+ from typing import Union, List, Optional
9
+
10
+ __all__ = [
11
+ "Config",
12
+ "find_chrome_executable",
13
+ "temp_profile_dir",
14
+ "is_root",
15
+ "is_posix",
16
+ "PathLike",
17
+ ]
18
+
19
+ logger = logging.getLogger(__name__)
20
+ is_posix = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
21
+
22
+ PathLike = Union[str, pathlib.Path]
23
+ AUTO = None
24
+
25
+
26
+ class Config:
27
+ """Config object"""
28
+
29
+ def __init__(
30
+ self,
31
+ user_data_dir: Optional[PathLike] = AUTO,
32
+ headless: Optional[bool] = False,
33
+ incognito: Optional[bool] = False,
34
+ guest: Optional[bool] = False,
35
+ browser_executable_path: Optional[PathLike] = AUTO,
36
+ browser_args: Optional[List[str]] = AUTO,
37
+ sandbox: Optional[bool] = True,
38
+ lang: Optional[str] = "en-US",
39
+ host: str = AUTO,
40
+ port: int = AUTO,
41
+ expert: bool = AUTO,
42
+ **kwargs: dict,
43
+ ):
44
+ """
45
+ Creates a config object.
46
+ Can be called without any arguments to generate a best-practice config,
47
+ which is recommended.
48
+ Calling the object, eg: myconfig(), returns the list of arguments which
49
+ are provided to the browser.
50
+ Additional args can be added using the :py:obj:`~add_argument method`.
51
+ Instances of this class are usually not instantiated by end users.
52
+ :param user_data_dir: the data directory to use
53
+ :param headless: set to True for headless mode
54
+ :param browser_executable_path:
55
+ Specify browser executable, instead of using autodetect.
56
+ :param browser_args: Forwarded to browser executable.
57
+ Eg: ["--some-chromeparam=somevalue", "some-other-param=someval"]
58
+ :param sandbox: disables sandbox
59
+ :param autodiscover_targets: use autodiscovery of targets
60
+ :param lang:
61
+ Language string to use other than the default "en-US,en;q=0.9"
62
+ :param expert: When set to True, "expert" mode is enabled.
63
+ This adds: --disable-web-security --disable-site-isolation-trials,
64
+ as well as some scripts and patching useful for debugging.
65
+ (For example, ensuring shadow-root is always in "open" mode.)
66
+ :param kwargs:
67
+ :type user_data_dir: PathLike
68
+ :type headless: bool
69
+ :type browser_executable_path: PathLike
70
+ :type browser_args: list[str]
71
+ :type sandbox: bool
72
+ :type lang: str
73
+ :type kwargs: dict
74
+ """
75
+ if not browser_args:
76
+ browser_args = []
77
+ if not user_data_dir:
78
+ self._user_data_dir = temp_profile_dir()
79
+ self._custom_data_dir = False
80
+ else:
81
+ self.user_data_dir = user_data_dir
82
+ if not browser_executable_path:
83
+ browser_executable_path = find_chrome_executable()
84
+ self._browser_args = browser_args
85
+ self.browser_executable_path = browser_executable_path
86
+ self.headless = headless
87
+ self.incognito = incognito
88
+ self.guest = guest
89
+ self.sandbox = sandbox
90
+ self.host = host
91
+ self.port = port
92
+ self.expert = expert
93
+ self._extensions = []
94
+ # When using posix-ish operating system and running as root,
95
+ # you must use no_sandbox=True
96
+ if is_posix and is_root() and sandbox:
97
+ logger.info("Detected root usage, auto-disabling sandbox mode.")
98
+ self.sandbox = False
99
+ self.autodiscover_targets = True
100
+ self.lang = lang
101
+ # Other keyword args will be accessible by attribute
102
+ self.__dict__.update(kwargs)
103
+ super().__init__()
104
+ self._default_browser_args = [
105
+ "--remote-allow-origins=*",
106
+ "--no-first-run",
107
+ "--no-service-autorun",
108
+ "--disable-auto-reload",
109
+ "--no-default-browser-check",
110
+ "--homepage=about:blank",
111
+ "--no-pings",
112
+ "--wm-window-animations-disabled",
113
+ "--animation-duration-scale=0",
114
+ "--enable-privacy-sandbox-ads-apis",
115
+ "--safebrowsing-disable-download-protection",
116
+ '--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
117
+ "--password-store=basic",
118
+ "--deny-permission-prompts",
119
+ "--disable-infobars",
120
+ "--disable-breakpad",
121
+ "--disable-component-update",
122
+ "--disable-prompt-on-repost",
123
+ "--disable-password-generation",
124
+ "--disable-ipc-flooding-protection",
125
+ "--disable-background-timer-throttling",
126
+ "--disable-search-engine-choice-screen",
127
+ "--disable-backgrounding-occluded-windows",
128
+ "--disable-client-side-phishing-detection",
129
+ "--disable-top-sites",
130
+ "--disable-translate",
131
+ "--disable-renderer-backgrounding",
132
+ "--disable-background-networking",
133
+ "--disable-dev-shm-usage",
134
+ "--disable-features=IsolateOrigins,site-per-process,Translate,"
135
+ "InsecureDownloadWarnings,DownloadBubble,DownloadBubbleV2,"
136
+ "OptimizationTargetPrediction,OptimizationGuideModelDownloading,"
137
+ "SidePanelPinning,UserAgentClientHint,PrivacySandboxSettings4",
138
+ ]
139
+
140
+ @property
141
+ def browser_args(self):
142
+ return sorted(self._default_browser_args + self._browser_args)
143
+
144
+ @property
145
+ def user_data_dir(self):
146
+ return self._user_data_dir
147
+
148
+ @user_data_dir.setter
149
+ def user_data_dir(self, path: PathLike):
150
+ self._user_data_dir = str(path)
151
+ self._custom_data_dir = True
152
+
153
+ @property
154
+ def uses_custom_data_dir(self) -> bool:
155
+ return self._custom_data_dir
156
+
157
+ def add_extension(self, extension_path: PathLike):
158
+ """
159
+ Adds an extension to load. You can set the extension_path to a
160
+ folder (containing the manifest), or an extension zip file (.crx)
161
+ :param extension_path:
162
+ """
163
+ path = pathlib.Path(extension_path)
164
+ if not path.exists():
165
+ raise FileNotFoundError(
166
+ "Could not find anything here: %s" % str(path)
167
+ )
168
+ if path.is_file():
169
+ tf = tempfile.mkdtemp(
170
+ prefix="extension_", suffix=secrets.token_hex(4)
171
+ )
172
+ with zipfile.ZipFile(path, "r") as z:
173
+ z.extractall(tf)
174
+ self._extensions.append(tf)
175
+ elif path.is_dir():
176
+ for item in path.rglob("manifest.*"):
177
+ path = item.parent
178
+ self._extensions.append(path)
179
+
180
+ def __call__(self):
181
+ # The host and port will be added when starting the browser.
182
+ # By the time it starts, the port is probably already taken.
183
+ args = self._default_browser_args.copy()
184
+ args += ["--user-data-dir=%s" % self.user_data_dir]
185
+ args += ["--disable-features=IsolateOrigins,site-per-process"]
186
+ args += ["--disable-session-crashed-bubble"]
187
+ if self.expert:
188
+ args += [
189
+ "--disable-web-security",
190
+ "--disable-site-isolation-trials",
191
+ ]
192
+ if self._browser_args:
193
+ args.extend([arg for arg in self._browser_args if arg not in args])
194
+ if self.headless:
195
+ args.append("--headless=new")
196
+ if self.incognito:
197
+ args.append("--incognito")
198
+ if self.guest:
199
+ args.append("--guest")
200
+ if not self.sandbox:
201
+ args.append("--no-sandbox")
202
+ if self.host:
203
+ args.append("--remote-debugging-host=%s" % self.host)
204
+ if self.port:
205
+ args.append("--remote-debugging-port=%s" % self.port)
206
+ return args
207
+
208
+ def add_argument(self, arg: str):
209
+ if any(
210
+ x in arg.lower()
211
+ for x in [
212
+ "headless",
213
+ "data-dir",
214
+ "data_dir",
215
+ "no-sandbox",
216
+ "no_sandbox",
217
+ "lang",
218
+ ]
219
+ ):
220
+ raise ValueError(
221
+ '"%s" is not allowed. Please use one of the '
222
+ 'attributes of the Config object to set it.'
223
+ % arg
224
+ )
225
+ self._browser_args.append(arg)
226
+
227
+ def __repr__(self):
228
+ s = f"{self.__class__.__name__}"
229
+ for k, v in ({**self.__dict__, **self.__class__.__dict__}).items():
230
+ if k[0] == "_":
231
+ continue
232
+ if not v:
233
+ continue
234
+ if isinstance(v, property):
235
+ v = getattr(self, k)
236
+ if callable(v):
237
+ continue
238
+ s += f"\n\t{k} = {v}"
239
+ return s
240
+
241
+
242
+ def is_root():
243
+ """
244
+ Helper function to determine if the user is trying to launch chrome
245
+ under linux as root, which needs some alternative handling.
246
+ """
247
+ import ctypes
248
+ import os
249
+
250
+ try:
251
+ return os.getuid() == 0
252
+ except AttributeError:
253
+ return ctypes.windll.shell32.IsUserAnAdmin() != 0
254
+
255
+
256
+ def temp_profile_dir():
257
+ """Generate a temp dir (path)"""
258
+ path = os.path.normpath(tempfile.mkdtemp(prefix="uc_"))
259
+ return path
260
+
261
+
262
+ def find_chrome_executable(return_all=False):
263
+ """
264
+ Finds the chrome, beta, canary, chromium executable
265
+ and returns the disk path.
266
+ """
267
+ candidates = []
268
+ if is_posix:
269
+ for item in os.environ.get("PATH").split(os.pathsep):
270
+ for subitem in (
271
+ "google-chrome",
272
+ "chromium",
273
+ "chromium-browser",
274
+ "chrome",
275
+ "google-chrome-stable",
276
+ ):
277
+ candidates.append(os.sep.join((item, subitem)))
278
+ if "darwin" in sys.platform:
279
+ candidates += [
280
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
281
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
282
+ ]
283
+ else:
284
+ for item in map(
285
+ os.environ.get,
286
+ (
287
+ "PROGRAMFILES",
288
+ "PROGRAMFILES(X86)",
289
+ "LOCALAPPDATA",
290
+ "PROGRAMW6432",
291
+ ),
292
+ ):
293
+ if item is not None:
294
+ for subitem in (
295
+ "Google/Chrome/Application",
296
+ "Google/Chrome Beta/Application",
297
+ "Google/Chrome Canary/Application",
298
+ ):
299
+ candidates.append(
300
+ os.sep.join((item, subitem, "chrome.exe"))
301
+ )
302
+ rv = []
303
+ for candidate in candidates:
304
+ if os.path.exists(candidate) and os.access(candidate, os.X_OK):
305
+ logger.debug("%s is a valid candidate... " % candidate)
306
+ rv.append(candidate)
307
+ else:
308
+ logger.debug(
309
+ "%s is not a valid candidate because it doesn't exist "
310
+ "or isn't an executable."
311
+ % candidate
312
+ )
313
+ winner = None
314
+ if return_all and rv:
315
+ return rv
316
+ if rv and len(rv) > 1:
317
+ # Assuming the shortest path wins
318
+ winner = min(rv, key=lambda x: len(x))
319
+ elif len(rv) == 1:
320
+ winner = rv[0]
321
+ if winner:
322
+ return os.path.normpath(winner)
323
+ raise FileNotFoundError(
324
+ "Could not find a valid chrome browser binary. "
325
+ "Please make sure Chrome is installed. "
326
+ "Or use the keyword argument: "
327
+ "'browser_executable_path=/path/to/your/browser'."
328
+ )