seleniumbase 4.32.1__py3-none-any.whl → 4.32.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,317 @@
1
+ """CDP-Driver is based on NoDriver"""
2
+ from __future__ import annotations
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ import types
7
+ import typing
8
+ from typing import Optional, List, Union, Callable
9
+ from .element import Element
10
+ from .browser import Browser
11
+ from .browser import PathLike
12
+ from .config import Config
13
+ from .tab import Tab
14
+ import mycdp as cdp
15
+
16
+ logger = logging.getLogger(__name__)
17
+ T = typing.TypeVar("T")
18
+
19
+
20
+ async def start(
21
+ config: Optional[Config] = None,
22
+ *,
23
+ user_data_dir: Optional[PathLike] = None,
24
+ headless: Optional[bool] = False,
25
+ incognito: Optional[bool] = False,
26
+ guest: Optional[bool] = False,
27
+ browser_executable_path: Optional[PathLike] = None,
28
+ browser_args: Optional[List[str]] = None,
29
+ sandbox: Optional[bool] = True,
30
+ lang: Optional[str] = None,
31
+ host: Optional[str] = None,
32
+ port: Optional[int] = None,
33
+ expert: Optional[bool] = None,
34
+ **kwargs: Optional[dict],
35
+ ) -> Browser:
36
+ """
37
+ Helper function to launch a browser. It accepts several keyword parameters.
38
+ Conveniently, you can just call it bare (no parameters) to quickly launch
39
+ an instance with best practice defaults.
40
+ Note: Due to a Chrome-130 bug, use start_async or start_sync instead.
41
+ (Calling this method directly could lead to an unresponsive browser)
42
+ Note: New args are expected: Use kwargs only!
43
+ Note: This should be called ``await start()``
44
+ :param user_data_dir:
45
+ :type user_data_dir: PathLike
46
+ :param headless:
47
+ :type headless: bool
48
+ :param browser_executable_path:
49
+ :type browser_executable_path: PathLike
50
+ :param browser_args:
51
+ ["--some-chromeparam=somevalue", "some-other-param=someval"]
52
+ :type browser_args: List[str]
53
+ :param sandbox: Default True, but when set to False it adds --no-sandbox
54
+ to the params, also when using linux under a root user,
55
+ it adds False automatically (else Chrome won't start).
56
+ :type sandbox: bool
57
+ :param lang: language string
58
+ :type lang: str
59
+ :param port: If you connect to an existing debuggable session,
60
+ you can specify the port here.
61
+ If both host and port are provided,
62
+ then a local Chrome browser will not be started!
63
+ :type port: int
64
+ :param host: If you connect to an existing debuggable session,
65
+ you can specify the host here.
66
+ If both host and port are provided,
67
+ then a local Chrome browser will not be started!
68
+ :type host: str
69
+ :param expert: When set to True, "expert" mode is enabled.
70
+ This means adding: --disable-web-security --disable-site-isolation-trials,
71
+ as well as some scripts and patching useful for debugging.
72
+ (For example, ensuring shadow-root is always in "open" mode.)
73
+ :type expert: bool
74
+ """
75
+ if not config:
76
+ config = Config(
77
+ user_data_dir,
78
+ headless,
79
+ incognito,
80
+ guest,
81
+ browser_executable_path,
82
+ browser_args,
83
+ sandbox,
84
+ lang,
85
+ host=host,
86
+ port=port,
87
+ expert=expert,
88
+ **kwargs,
89
+ )
90
+ return await Browser.create(config)
91
+
92
+
93
+ async def start_async(*args, **kwargs) -> Browser:
94
+ headless = False
95
+ if "headless" in kwargs:
96
+ headless = kwargs["headless"]
97
+ decoy_args = kwargs
98
+ decoy_args["headless"] = True
99
+ driver = await start(**decoy_args)
100
+ kwargs["headless"] = headless
101
+ kwargs["user_data_dir"] = driver.config.user_data_dir
102
+ driver.stop() # Due to Chrome-130, must stop & start
103
+ time.sleep(0.15)
104
+ return await start(*args, **kwargs)
105
+
106
+
107
+ def start_sync(*args, **kwargs) -> Browser:
108
+ loop = asyncio.get_event_loop()
109
+ headless = False
110
+ if "headless" in kwargs:
111
+ headless = kwargs["headless"]
112
+ decoy_args = kwargs
113
+ decoy_args["headless"] = True
114
+ driver = loop.run_until_complete(start(**decoy_args))
115
+ kwargs["headless"] = headless
116
+ kwargs["user_data_dir"] = driver.config.user_data_dir
117
+ driver.stop() # Due to Chrome-130, must stop & start
118
+ time.sleep(0.15)
119
+ return loop.run_until_complete(start(*args, **kwargs))
120
+
121
+
122
+ async def create_from_driver(driver) -> Browser:
123
+ """Create a Browser instance from a running driver instance."""
124
+ from .config import Config
125
+
126
+ conf = Config()
127
+ host, port = driver.options.debugger_address.split(":")
128
+ conf.host, conf.port = host, int(port)
129
+ # Create Browser instance
130
+ browser = await start(conf)
131
+ browser._process_pid = driver.browser_pid
132
+ # Stop chromedriver binary
133
+ driver.service.stop()
134
+ driver.browser_pid = -1
135
+ driver.user_data_dir = None
136
+ return browser
137
+
138
+
139
+ def free_port() -> int:
140
+ """Determines a free port using sockets."""
141
+ import socket
142
+
143
+ free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
144
+ free_socket.bind(("127.0.0.1", 0))
145
+ free_socket.listen(5)
146
+ port: int = free_socket.getsockname()[1]
147
+ free_socket.close()
148
+ return port
149
+
150
+
151
+ def filter_recurse_all(
152
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
153
+ ) -> List[T]:
154
+ """
155
+ Test each child using predicate(child),
156
+ and return all children for which predicate(child) == True
157
+ :param doc: The cdp.dom.Node object or :py:class:`cdp_driver.Element`
158
+ :param predicate: A function which takes a node as first parameter
159
+ and returns a boolean, where True means include.
160
+ """
161
+ if not hasattr(doc, "children"):
162
+ raise TypeError("Object should have a .children attribute!")
163
+ out = []
164
+ if doc and doc.children:
165
+ for child in doc.children:
166
+ if predicate(child):
167
+ out.append(child)
168
+ if child.shadow_roots is not None:
169
+ out.extend(
170
+ filter_recurse_all(child.shadow_roots[0], predicate)
171
+ )
172
+ out.extend(filter_recurse_all(child, predicate))
173
+ return out
174
+
175
+
176
+ def filter_recurse(
177
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
178
+ ) -> T:
179
+ """
180
+ Test each child using predicate(child),
181
+ and return the first child of which predicate(child) == True
182
+ :param doc: the cdp.dom.Node object or :py:class:`cdp_driver.Element`
183
+ :param predicate: a function which takes a node as first parameter
184
+ and returns a boolean, where True means include.
185
+ """
186
+ if not hasattr(doc, "children"):
187
+ raise TypeError("Object should have a .children attribute!")
188
+ if doc and doc.children:
189
+ for child in doc.children:
190
+ if predicate(child):
191
+ return child
192
+ if child.shadow_roots:
193
+ shadow_root_result = filter_recurse(
194
+ child.shadow_roots[0], predicate
195
+ )
196
+ if shadow_root_result:
197
+ return shadow_root_result
198
+ result = filter_recurse(child, predicate)
199
+ if result:
200
+ return result
201
+
202
+
203
+ def circle(
204
+ x, y=None, radius=10, num=10, dir=0
205
+ ) -> typing.Generator[typing.Tuple[float, float], None, None]:
206
+ """
207
+ A generator will calculate coordinates around a circle.
208
+ :param x: start x position
209
+ :type x: int
210
+ :param y: start y position
211
+ :type y: int
212
+ :param radius: size of the circle
213
+ :type radius: int
214
+ :param num: the amount of points calculated
215
+ (higher => slower, more cpu, but more detailed)
216
+ :type num: int
217
+ """
218
+ import math
219
+
220
+ r = radius
221
+ w = num
222
+ if not y:
223
+ y = x
224
+ a = int(x - r * 2)
225
+ b = int(y - r * 2)
226
+ m = (2 * math.pi) / w
227
+ if dir == 0:
228
+ # Regular direction
229
+ ran = 0, w + 1, 1
230
+ else:
231
+ # Opposite direction
232
+ ran = w + 1, 0, -1
233
+ for i in range(*ran):
234
+ x = a + r * math.sin(m * i)
235
+ y = b + r * math.cos(m * i)
236
+ yield x, y
237
+
238
+
239
+ def remove_from_tree(tree: cdp.dom.Node, node: cdp.dom.Node) -> cdp.dom.Node:
240
+ if not hasattr(tree, "children"):
241
+ raise TypeError("Object should have a .children attribute!")
242
+ if tree and tree.children:
243
+ for child in tree.children:
244
+ if child.backend_node_id == node.backend_node_id:
245
+ tree.children.remove(child)
246
+ remove_from_tree(child, node)
247
+ return tree
248
+
249
+
250
+ async def html_from_tree(
251
+ tree: Union[cdp.dom.Node, Element], target: Tab
252
+ ):
253
+ if not hasattr(tree, "children"):
254
+ raise TypeError("Object should have a .children attribute!")
255
+ out = ""
256
+ if tree and tree.children:
257
+ for child in tree.children:
258
+ if isinstance(child, Element):
259
+ out += await child.get_html()
260
+ else:
261
+ out += await target.send(
262
+ cdp.dom.get_outer_html(
263
+ backend_node_id=child.backend_node_id
264
+ )
265
+ )
266
+ out += await html_from_tree(child, target)
267
+ return out
268
+
269
+
270
+ def compare_target_info(
271
+ info1: cdp.target.TargetInfo, info2: cdp.target.TargetInfo
272
+ ) -> List[typing.Tuple[str, typing.Any, typing.Any]]:
273
+ """
274
+ When logging mode is set to debug, browser object will log when target info
275
+ is changed. To provide more meaningful log messages,
276
+ this function is called to check what has actually changed
277
+ between the 2 (by simple dict comparison).
278
+ It returns a list of tuples
279
+ [ ... ( key_which_has_changed, old_value, new_value) ]
280
+ :param info1:
281
+ :param info2:
282
+ """
283
+ d1 = info1.__dict__
284
+ d2 = info2.__dict__
285
+ return [(k, v, d2[k]) for (k, v) in d1.items() if d2[k] != v]
286
+
287
+
288
+ def loop():
289
+ loop = asyncio.new_event_loop()
290
+ asyncio.set_event_loop(loop)
291
+ return loop
292
+
293
+
294
+ def cdp_get_module(domain: Union[str, types.ModuleType]):
295
+ """
296
+ Get cdp module by given string.
297
+ :param domain:
298
+ """
299
+ import importlib
300
+
301
+ if isinstance(domain, types.ModuleType):
302
+ domain_mod = domain
303
+ else:
304
+ try:
305
+ if domain in ("input",):
306
+ domain = "input_"
307
+ domain_mod = getattr(cdp, domain)
308
+ if not domain_mod:
309
+ raise AttributeError
310
+ except AttributeError:
311
+ try:
312
+ domain_mod = importlib.import_module(domain)
313
+ except ModuleNotFoundError:
314
+ raise ModuleNotFoundError(
315
+ "Could not find cdp module from input '%s'" % domain
316
+ )
317
+ return domain_mod
@@ -0,0 +1,322 @@
1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import secrets
5
+ import sys
6
+ import tempfile
7
+ import zipfile
8
+ from typing import Union, List, Optional
9
+
10
+ __all__ = [
11
+ "Config",
12
+ "find_chrome_executable",
13
+ "temp_profile_dir",
14
+ "is_root",
15
+ "is_posix",
16
+ "PathLike",
17
+ ]
18
+
19
+ logger = logging.getLogger(__name__)
20
+ is_posix = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
21
+
22
+ PathLike = Union[str, pathlib.Path]
23
+ AUTO = None
24
+
25
+
26
+ class Config:
27
+ """Config object"""
28
+
29
+ def __init__(
30
+ self,
31
+ user_data_dir: Optional[PathLike] = AUTO,
32
+ headless: Optional[bool] = False,
33
+ incognito: Optional[bool] = False,
34
+ guest: Optional[bool] = False,
35
+ browser_executable_path: Optional[PathLike] = AUTO,
36
+ browser_args: Optional[List[str]] = AUTO,
37
+ sandbox: Optional[bool] = True,
38
+ lang: Optional[str] = "en-US",
39
+ host: str = AUTO,
40
+ port: int = AUTO,
41
+ expert: bool = AUTO,
42
+ **kwargs: dict,
43
+ ):
44
+ """
45
+ Creates a config object.
46
+ Can be called without any arguments to generate a best-practice config,
47
+ which is recommended.
48
+ Calling the object, eg: myconfig(), returns the list of arguments which
49
+ are provided to the browser.
50
+ Additional args can be added using the :py:obj:`~add_argument method`.
51
+ Instances of this class are usually not instantiated by end users.
52
+ :param user_data_dir: the data directory to use
53
+ :param headless: set to True for headless mode
54
+ :param browser_executable_path:
55
+ Specify browser executable, instead of using autodetect.
56
+ :param browser_args: Forwarded to browser executable.
57
+ Eg: ["--some-chromeparam=somevalue", "some-other-param=someval"]
58
+ :param sandbox: disables sandbox
59
+ :param autodiscover_targets: use autodiscovery of targets
60
+ :param lang:
61
+ Language string to use other than the default "en-US,en;q=0.9"
62
+ :param expert: When set to True, "expert" mode is enabled.
63
+ This adds: --disable-web-security --disable-site-isolation-trials,
64
+ as well as some scripts and patching useful for debugging.
65
+ (For example, ensuring shadow-root is always in "open" mode.)
66
+ :param kwargs:
67
+ :type user_data_dir: PathLike
68
+ :type headless: bool
69
+ :type browser_executable_path: PathLike
70
+ :type browser_args: list[str]
71
+ :type sandbox: bool
72
+ :type lang: str
73
+ :type kwargs: dict
74
+ """
75
+ if not browser_args:
76
+ browser_args = []
77
+ if not user_data_dir:
78
+ self._user_data_dir = temp_profile_dir()
79
+ self._custom_data_dir = False
80
+ else:
81
+ self.user_data_dir = user_data_dir
82
+ if not browser_executable_path:
83
+ browser_executable_path = find_chrome_executable()
84
+ self._browser_args = browser_args
85
+ self.browser_executable_path = browser_executable_path
86
+ self.headless = headless
87
+ self.incognito = incognito
88
+ self.guest = guest
89
+ self.sandbox = sandbox
90
+ self.host = host
91
+ self.port = port
92
+ self.expert = expert
93
+ self._extensions = []
94
+ # When using posix-ish operating system and running as root,
95
+ # you must use no_sandbox=True
96
+ if is_posix and is_root() and sandbox:
97
+ logger.info("Detected root usage, auto-disabling sandbox mode.")
98
+ self.sandbox = False
99
+ self.autodiscover_targets = True
100
+ self.lang = lang
101
+ # Other keyword args will be accessible by attribute
102
+ self.__dict__.update(kwargs)
103
+ super().__init__()
104
+ self._default_browser_args = [
105
+ "--remote-allow-origins=*",
106
+ "--no-first-run",
107
+ "--no-service-autorun",
108
+ "--no-default-browser-check",
109
+ "--homepage=about:blank",
110
+ "--no-pings",
111
+ "--safebrowsing-disable-download-protection",
112
+ '--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
113
+ "--password-store=basic",
114
+ "--deny-permission-prompts",
115
+ "--disable-infobars",
116
+ "--disable-breakpad",
117
+ "--disable-component-update",
118
+ "--disable-prompt-on-repost",
119
+ "--disable-password-generation",
120
+ "--disable-ipc-flooding-protection",
121
+ "--disable-search-engine-choice-screen",
122
+ "--disable-backgrounding-occluded-windows",
123
+ "--disable-client-side-phishing-detection",
124
+ "--disable-top-sites",
125
+ "--disable-renderer-backgrounding",
126
+ "--disable-background-networking",
127
+ "--disable-dev-shm-usage",
128
+ "--disable-features=IsolateOrigins,site-per-process,Translate,"
129
+ "InsecureDownloadWarnings,DownloadBubble,DownloadBubbleV2,"
130
+ "OptimizationTargetPrediction,OptimizationGuideModelDownloading,"
131
+ "SidePanelPinning,UserAgentClientHint,PrivacySandboxSettings4",
132
+ ]
133
+
134
+ @property
135
+ def browser_args(self):
136
+ return sorted(self._default_browser_args + self._browser_args)
137
+
138
+ @property
139
+ def user_data_dir(self):
140
+ return self._user_data_dir
141
+
142
+ @user_data_dir.setter
143
+ def user_data_dir(self, path: PathLike):
144
+ self._user_data_dir = str(path)
145
+ self._custom_data_dir = True
146
+
147
+ @property
148
+ def uses_custom_data_dir(self) -> bool:
149
+ return self._custom_data_dir
150
+
151
+ def add_extension(self, extension_path: PathLike):
152
+ """
153
+ Adds an extension to load. You can set the extension_path to a
154
+ folder (containing the manifest), or an extension zip file (.crx)
155
+ :param extension_path:
156
+ """
157
+ path = pathlib.Path(extension_path)
158
+ if not path.exists():
159
+ raise FileNotFoundError(
160
+ "Could not find anything here: %s" % str(path)
161
+ )
162
+ if path.is_file():
163
+ tf = tempfile.mkdtemp(
164
+ prefix="extension_", suffix=secrets.token_hex(4)
165
+ )
166
+ with zipfile.ZipFile(path, "r") as z:
167
+ z.extractall(tf)
168
+ self._extensions.append(tf)
169
+ elif path.is_dir():
170
+ for item in path.rglob("manifest.*"):
171
+ path = item.parent
172
+ self._extensions.append(path)
173
+
174
+ def __call__(self):
175
+ # The host and port will be added when starting the browser.
176
+ # By the time it starts, the port is probably already taken.
177
+ args = self._default_browser_args.copy()
178
+ args += ["--user-data-dir=%s" % self.user_data_dir]
179
+ args += ["--disable-features=IsolateOrigins,site-per-process"]
180
+ args += ["--disable-session-crashed-bubble"]
181
+ if self.expert:
182
+ args += [
183
+ "--disable-web-security",
184
+ "--disable-site-isolation-trials",
185
+ ]
186
+ if self._browser_args:
187
+ args.extend([arg for arg in self._browser_args if arg not in args])
188
+ if self.headless:
189
+ args.append("--headless=new")
190
+ if self.incognito:
191
+ args.append("--incognito")
192
+ if self.guest:
193
+ args.append("--guest")
194
+ if not self.sandbox:
195
+ args.append("--no-sandbox")
196
+ if self.host:
197
+ args.append("--remote-debugging-host=%s" % self.host)
198
+ if self.port:
199
+ args.append("--remote-debugging-port=%s" % self.port)
200
+ return args
201
+
202
+ def add_argument(self, arg: str):
203
+ if any(
204
+ x in arg.lower()
205
+ for x in [
206
+ "headless",
207
+ "data-dir",
208
+ "data_dir",
209
+ "no-sandbox",
210
+ "no_sandbox",
211
+ "lang",
212
+ ]
213
+ ):
214
+ raise ValueError(
215
+ '"%s" is not allowed. Please use one of the '
216
+ 'attributes of the Config object to set it.'
217
+ % arg
218
+ )
219
+ self._browser_args.append(arg)
220
+
221
+ def __repr__(self):
222
+ s = f"{self.__class__.__name__}"
223
+ for k, v in ({**self.__dict__, **self.__class__.__dict__}).items():
224
+ if k[0] == "_":
225
+ continue
226
+ if not v:
227
+ continue
228
+ if isinstance(v, property):
229
+ v = getattr(self, k)
230
+ if callable(v):
231
+ continue
232
+ s += f"\n\t{k} = {v}"
233
+ return s
234
+
235
+
236
+ def is_root():
237
+ """
238
+ Helper function to determine if the user is trying to launch chrome
239
+ under linux as root, which needs some alternative handling.
240
+ """
241
+ import ctypes
242
+ import os
243
+
244
+ try:
245
+ return os.getuid() == 0
246
+ except AttributeError:
247
+ return ctypes.windll.shell32.IsUserAnAdmin() != 0
248
+
249
+
250
+ def temp_profile_dir():
251
+ """Generate a temp dir (path)"""
252
+ path = os.path.normpath(tempfile.mkdtemp(prefix="uc_"))
253
+ return path
254
+
255
+
256
+ def find_chrome_executable(return_all=False):
257
+ """
258
+ Finds the chrome, beta, canary, chromium executable
259
+ and returns the disk path.
260
+ """
261
+ candidates = []
262
+ if is_posix:
263
+ for item in os.environ.get("PATH").split(os.pathsep):
264
+ for subitem in (
265
+ "google-chrome",
266
+ "chromium",
267
+ "chromium-browser",
268
+ "chrome",
269
+ "google-chrome-stable",
270
+ ):
271
+ candidates.append(os.sep.join((item, subitem)))
272
+ if "darwin" in sys.platform:
273
+ candidates += [
274
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
275
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
276
+ ]
277
+ else:
278
+ for item in map(
279
+ os.environ.get,
280
+ (
281
+ "PROGRAMFILES",
282
+ "PROGRAMFILES(X86)",
283
+ "LOCALAPPDATA",
284
+ "PROGRAMW6432",
285
+ ),
286
+ ):
287
+ if item is not None:
288
+ for subitem in (
289
+ "Google/Chrome/Application",
290
+ "Google/Chrome Beta/Application",
291
+ "Google/Chrome Canary/Application",
292
+ ):
293
+ candidates.append(
294
+ os.sep.join((item, subitem, "chrome.exe"))
295
+ )
296
+ rv = []
297
+ for candidate in candidates:
298
+ if os.path.exists(candidate) and os.access(candidate, os.X_OK):
299
+ logger.debug("%s is a valid candidate... " % candidate)
300
+ rv.append(candidate)
301
+ else:
302
+ logger.debug(
303
+ "%s is not a valid candidate because it doesn't exist "
304
+ "or isn't an executable."
305
+ % candidate
306
+ )
307
+ winner = None
308
+ if return_all and rv:
309
+ return rv
310
+ if rv and len(rv) > 1:
311
+ # Assuming the shortest path wins
312
+ winner = min(rv, key=lambda x: len(x))
313
+ elif len(rv) == 1:
314
+ winner = rv[0]
315
+ if winner:
316
+ return os.path.normpath(winner)
317
+ raise FileNotFoundError(
318
+ "Could not find a valid chrome browser binary. "
319
+ "Please make sure Chrome is installed. "
320
+ "Or use the keyword argument: "
321
+ "'browser_executable_path=/path/to/your/browser'."
322
+ )