seleniumbase 4.32.1__py3-none-any.whl → 4.32.3__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,328 @@
1
+ """CDP-Driver is based on NoDriver"""
2
+ from __future__ import annotations
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ import types
7
+ import typing
8
+ from seleniumbase.fixtures import shared_utils
9
+ from typing import Optional, List, Union, Callable
10
+ from .element import Element
11
+ from .browser import Browser
12
+ from .browser import PathLike
13
+ from .config import Config
14
+ from .tab import Tab
15
+ import mycdp as cdp
16
+
17
+ logger = logging.getLogger(__name__)
18
+ T = typing.TypeVar("T")
19
+
20
+
21
+ async def start(
22
+ config: Optional[Config] = None,
23
+ *,
24
+ user_data_dir: Optional[PathLike] = None,
25
+ headless: Optional[bool] = False,
26
+ incognito: Optional[bool] = False,
27
+ guest: Optional[bool] = False,
28
+ browser_executable_path: Optional[PathLike] = None,
29
+ browser_args: Optional[List[str]] = None,
30
+ sandbox: Optional[bool] = True,
31
+ lang: Optional[str] = None,
32
+ host: Optional[str] = None,
33
+ port: Optional[int] = None,
34
+ expert: Optional[bool] = None,
35
+ **kwargs: Optional[dict],
36
+ ) -> Browser:
37
+ """
38
+ Helper function to launch a browser. It accepts several keyword parameters.
39
+ Conveniently, you can just call it bare (no parameters) to quickly launch
40
+ an instance with best practice defaults.
41
+ Note: Due to a Chrome-130 bug, use start_async or start_sync instead.
42
+ (Calling this method directly could lead to an unresponsive browser)
43
+ Note: New args are expected: Use kwargs only!
44
+ Note: This should be called ``await start()``
45
+ :param user_data_dir:
46
+ :type user_data_dir: PathLike
47
+ :param headless:
48
+ :type headless: bool
49
+ :param browser_executable_path:
50
+ :type browser_executable_path: PathLike
51
+ :param browser_args:
52
+ ["--some-chromeparam=somevalue", "some-other-param=someval"]
53
+ :type browser_args: List[str]
54
+ :param sandbox: Default True, but when set to False it adds --no-sandbox
55
+ to the params, also when using linux under a root user,
56
+ it adds False automatically (else Chrome won't start).
57
+ :type sandbox: bool
58
+ :param lang: language string
59
+ :type lang: str
60
+ :param port: If you connect to an existing debuggable session,
61
+ you can specify the port here.
62
+ If both host and port are provided,
63
+ then a local Chrome browser will not be started!
64
+ :type port: int
65
+ :param host: If you connect to an existing debuggable session,
66
+ you can specify the host here.
67
+ If both host and port are provided,
68
+ then a local Chrome browser will not be started!
69
+ :type host: str
70
+ :param expert: When set to True, "expert" mode is enabled.
71
+ This means adding: --disable-web-security --disable-site-isolation-trials,
72
+ as well as some scripts and patching useful for debugging.
73
+ (For example, ensuring shadow-root is always in "open" mode.)
74
+ :type expert: bool
75
+ """
76
+ if not config:
77
+ config = Config(
78
+ user_data_dir,
79
+ headless,
80
+ incognito,
81
+ guest,
82
+ browser_executable_path,
83
+ browser_args,
84
+ sandbox,
85
+ lang,
86
+ host=host,
87
+ port=port,
88
+ expert=expert,
89
+ **kwargs,
90
+ )
91
+ return await Browser.create(config)
92
+
93
+
94
+ async def start_async(*args, **kwargs) -> Browser:
95
+ headless = False
96
+ binary_location = None
97
+ if "browser_executable_path" in kwargs:
98
+ binary_location = kwargs["browser_executable_path"]
99
+ if shared_utils.is_chrome_130_or_newer(binary_location):
100
+ if "headless" in kwargs:
101
+ headless = kwargs["headless"]
102
+ decoy_args = kwargs
103
+ decoy_args["headless"] = True
104
+ driver = await start(**decoy_args)
105
+ kwargs["headless"] = headless
106
+ kwargs["user_data_dir"] = driver.config.user_data_dir
107
+ time.sleep(0.2)
108
+ driver.stop() # Due to Chrome-130, must stop & start
109
+ time.sleep(0.1)
110
+ return await start(*args, **kwargs)
111
+
112
+
113
+ def start_sync(*args, **kwargs) -> Browser:
114
+ loop = asyncio.get_event_loop()
115
+ headless = False
116
+ binary_location = None
117
+ if "browser_executable_path" in kwargs:
118
+ binary_location = kwargs["browser_executable_path"]
119
+ if shared_utils.is_chrome_130_or_newer(binary_location):
120
+ if "headless" in kwargs:
121
+ headless = kwargs["headless"]
122
+ decoy_args = kwargs
123
+ decoy_args["headless"] = True
124
+ driver = loop.run_until_complete(start(**decoy_args))
125
+ kwargs["headless"] = headless
126
+ kwargs["user_data_dir"] = driver.config.user_data_dir
127
+ time.sleep(0.2)
128
+ driver.stop() # Due to Chrome-130, must stop & start
129
+ time.sleep(0.1)
130
+ return loop.run_until_complete(start(*args, **kwargs))
131
+
132
+
133
+ async def create_from_driver(driver) -> Browser:
134
+ """Create a Browser instance from a running driver instance."""
135
+ from .config import Config
136
+
137
+ conf = Config()
138
+ host, port = driver.options.debugger_address.split(":")
139
+ conf.host, conf.port = host, int(port)
140
+ # Create Browser instance
141
+ browser = await start(conf)
142
+ browser._process_pid = driver.browser_pid
143
+ # Stop chromedriver binary
144
+ driver.service.stop()
145
+ driver.browser_pid = -1
146
+ driver.user_data_dir = None
147
+ return browser
148
+
149
+
150
+ def free_port() -> int:
151
+ """Determines a free port using sockets."""
152
+ import socket
153
+
154
+ free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
155
+ free_socket.bind(("127.0.0.1", 0))
156
+ free_socket.listen(5)
157
+ port: int = free_socket.getsockname()[1]
158
+ free_socket.close()
159
+ return port
160
+
161
+
162
+ def filter_recurse_all(
163
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
164
+ ) -> List[T]:
165
+ """
166
+ Test each child using predicate(child),
167
+ and return all children for which predicate(child) == True
168
+ :param doc: The cdp.dom.Node object or :py:class:`cdp_driver.Element`
169
+ :param predicate: A function which takes a node as first parameter
170
+ and returns a boolean, where True means include.
171
+ """
172
+ if not hasattr(doc, "children"):
173
+ raise TypeError("Object should have a .children attribute!")
174
+ out = []
175
+ if doc and doc.children:
176
+ for child in doc.children:
177
+ if predicate(child):
178
+ out.append(child)
179
+ if child.shadow_roots is not None:
180
+ out.extend(
181
+ filter_recurse_all(child.shadow_roots[0], predicate)
182
+ )
183
+ out.extend(filter_recurse_all(child, predicate))
184
+ return out
185
+
186
+
187
+ def filter_recurse(
188
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
189
+ ) -> T:
190
+ """
191
+ Test each child using predicate(child),
192
+ and return the first child of which predicate(child) == True
193
+ :param doc: the cdp.dom.Node object or :py:class:`cdp_driver.Element`
194
+ :param predicate: a function which takes a node as first parameter
195
+ and returns a boolean, where True means include.
196
+ """
197
+ if not hasattr(doc, "children"):
198
+ raise TypeError("Object should have a .children attribute!")
199
+ if doc and doc.children:
200
+ for child in doc.children:
201
+ if predicate(child):
202
+ return child
203
+ if child.shadow_roots:
204
+ shadow_root_result = filter_recurse(
205
+ child.shadow_roots[0], predicate
206
+ )
207
+ if shadow_root_result:
208
+ return shadow_root_result
209
+ result = filter_recurse(child, predicate)
210
+ if result:
211
+ return result
212
+
213
+
214
+ def circle(
215
+ x, y=None, radius=10, num=10, dir=0
216
+ ) -> typing.Generator[typing.Tuple[float, float], None, None]:
217
+ """
218
+ A generator will calculate coordinates around a circle.
219
+ :param x: start x position
220
+ :type x: int
221
+ :param y: start y position
222
+ :type y: int
223
+ :param radius: size of the circle
224
+ :type radius: int
225
+ :param num: the amount of points calculated
226
+ (higher => slower, more cpu, but more detailed)
227
+ :type num: int
228
+ """
229
+ import math
230
+
231
+ r = radius
232
+ w = num
233
+ if not y:
234
+ y = x
235
+ a = int(x - r * 2)
236
+ b = int(y - r * 2)
237
+ m = (2 * math.pi) / w
238
+ if dir == 0:
239
+ # Regular direction
240
+ ran = 0, w + 1, 1
241
+ else:
242
+ # Opposite direction
243
+ ran = w + 1, 0, -1
244
+ for i in range(*ran):
245
+ x = a + r * math.sin(m * i)
246
+ y = b + r * math.cos(m * i)
247
+ yield x, y
248
+
249
+
250
+ def remove_from_tree(tree: cdp.dom.Node, node: cdp.dom.Node) -> cdp.dom.Node:
251
+ if not hasattr(tree, "children"):
252
+ raise TypeError("Object should have a .children attribute!")
253
+ if tree and tree.children:
254
+ for child in tree.children:
255
+ if child.backend_node_id == node.backend_node_id:
256
+ tree.children.remove(child)
257
+ remove_from_tree(child, node)
258
+ return tree
259
+
260
+
261
+ async def html_from_tree(
262
+ tree: Union[cdp.dom.Node, Element], target: Tab
263
+ ):
264
+ if not hasattr(tree, "children"):
265
+ raise TypeError("Object should have a .children attribute!")
266
+ out = ""
267
+ if tree and tree.children:
268
+ for child in tree.children:
269
+ if isinstance(child, Element):
270
+ out += await child.get_html()
271
+ else:
272
+ out += await target.send(
273
+ cdp.dom.get_outer_html(
274
+ backend_node_id=child.backend_node_id
275
+ )
276
+ )
277
+ out += await html_from_tree(child, target)
278
+ return out
279
+
280
+
281
+ def compare_target_info(
282
+ info1: cdp.target.TargetInfo, info2: cdp.target.TargetInfo
283
+ ) -> List[typing.Tuple[str, typing.Any, typing.Any]]:
284
+ """
285
+ When logging mode is set to debug, browser object will log when target info
286
+ is changed. To provide more meaningful log messages,
287
+ this function is called to check what has actually changed
288
+ between the 2 (by simple dict comparison).
289
+ It returns a list of tuples
290
+ [ ... ( key_which_has_changed, old_value, new_value) ]
291
+ :param info1:
292
+ :param info2:
293
+ """
294
+ d1 = info1.__dict__
295
+ d2 = info2.__dict__
296
+ return [(k, v, d2[k]) for (k, v) in d1.items() if d2[k] != v]
297
+
298
+
299
+ def loop():
300
+ loop = asyncio.new_event_loop()
301
+ asyncio.set_event_loop(loop)
302
+ return loop
303
+
304
+
305
+ def cdp_get_module(domain: Union[str, types.ModuleType]):
306
+ """
307
+ Get cdp module by given string.
308
+ :param domain:
309
+ """
310
+ import importlib
311
+
312
+ if isinstance(domain, types.ModuleType):
313
+ domain_mod = domain
314
+ else:
315
+ try:
316
+ if domain in ("input",):
317
+ domain = "input_"
318
+ domain_mod = getattr(cdp, domain)
319
+ if not domain_mod:
320
+ raise AttributeError
321
+ except AttributeError:
322
+ try:
323
+ domain_mod = importlib.import_module(domain)
324
+ except ModuleNotFoundError:
325
+ raise ModuleNotFoundError(
326
+ "Could not find cdp module from input '%s'" % domain
327
+ )
328
+ return domain_mod
@@ -0,0 +1,328 @@
1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import secrets
5
+ import sys
6
+ import tempfile
7
+ import zipfile
8
+ from typing import Union, List, Optional
9
+
10
+ __all__ = [
11
+ "Config",
12
+ "find_chrome_executable",
13
+ "temp_profile_dir",
14
+ "is_root",
15
+ "is_posix",
16
+ "PathLike",
17
+ ]
18
+
19
+ logger = logging.getLogger(__name__)
20
+ is_posix = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
21
+
22
+ PathLike = Union[str, pathlib.Path]
23
+ AUTO = None
24
+
25
+
26
+ class Config:
27
+ """Config object"""
28
+
29
+ def __init__(
30
+ self,
31
+ user_data_dir: Optional[PathLike] = AUTO,
32
+ headless: Optional[bool] = False,
33
+ incognito: Optional[bool] = False,
34
+ guest: Optional[bool] = False,
35
+ browser_executable_path: Optional[PathLike] = AUTO,
36
+ browser_args: Optional[List[str]] = AUTO,
37
+ sandbox: Optional[bool] = True,
38
+ lang: Optional[str] = "en-US",
39
+ host: str = AUTO,
40
+ port: int = AUTO,
41
+ expert: bool = AUTO,
42
+ **kwargs: dict,
43
+ ):
44
+ """
45
+ Creates a config object.
46
+ Can be called without any arguments to generate a best-practice config,
47
+ which is recommended.
48
+ Calling the object, eg: myconfig(), returns the list of arguments which
49
+ are provided to the browser.
50
+ Additional args can be added using the :py:obj:`~add_argument method`.
51
+ Instances of this class are usually not instantiated by end users.
52
+ :param user_data_dir: the data directory to use
53
+ :param headless: set to True for headless mode
54
+ :param browser_executable_path:
55
+ Specify browser executable, instead of using autodetect.
56
+ :param browser_args: Forwarded to browser executable.
57
+ Eg: ["--some-chromeparam=somevalue", "some-other-param=someval"]
58
+ :param sandbox: disables sandbox
59
+ :param autodiscover_targets: use autodiscovery of targets
60
+ :param lang:
61
+ Language string to use other than the default "en-US,en;q=0.9"
62
+ :param expert: When set to True, "expert" mode is enabled.
63
+ This adds: --disable-web-security --disable-site-isolation-trials,
64
+ as well as some scripts and patching useful for debugging.
65
+ (For example, ensuring shadow-root is always in "open" mode.)
66
+ :param kwargs:
67
+ :type user_data_dir: PathLike
68
+ :type headless: bool
69
+ :type browser_executable_path: PathLike
70
+ :type browser_args: list[str]
71
+ :type sandbox: bool
72
+ :type lang: str
73
+ :type kwargs: dict
74
+ """
75
+ if not browser_args:
76
+ browser_args = []
77
+ if not user_data_dir:
78
+ self._user_data_dir = temp_profile_dir()
79
+ self._custom_data_dir = False
80
+ else:
81
+ self.user_data_dir = user_data_dir
82
+ if not browser_executable_path:
83
+ browser_executable_path = find_chrome_executable()
84
+ self._browser_args = browser_args
85
+ self.browser_executable_path = browser_executable_path
86
+ self.headless = headless
87
+ self.incognito = incognito
88
+ self.guest = guest
89
+ self.sandbox = sandbox
90
+ self.host = host
91
+ self.port = port
92
+ self.expert = expert
93
+ self._extensions = []
94
+ # When using posix-ish operating system and running as root,
95
+ # you must use no_sandbox=True
96
+ if is_posix and is_root() and sandbox:
97
+ logger.info("Detected root usage, auto-disabling sandbox mode.")
98
+ self.sandbox = False
99
+ self.autodiscover_targets = True
100
+ self.lang = lang
101
+ # Other keyword args will be accessible by attribute
102
+ self.__dict__.update(kwargs)
103
+ super().__init__()
104
+ self._default_browser_args = [
105
+ "--remote-allow-origins=*",
106
+ "--no-first-run",
107
+ "--no-service-autorun",
108
+ "--disable-auto-reload",
109
+ "--no-default-browser-check",
110
+ "--homepage=about:blank",
111
+ "--no-pings",
112
+ "--wm-window-animations-disabled",
113
+ "--animation-duration-scale=0",
114
+ "--enable-privacy-sandbox-ads-apis",
115
+ "--safebrowsing-disable-download-protection",
116
+ '--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
117
+ "--password-store=basic",
118
+ "--deny-permission-prompts",
119
+ "--disable-infobars",
120
+ "--disable-breakpad",
121
+ "--disable-component-update",
122
+ "--disable-prompt-on-repost",
123
+ "--disable-password-generation",
124
+ "--disable-ipc-flooding-protection",
125
+ "--disable-background-timer-throttling",
126
+ "--disable-search-engine-choice-screen",
127
+ "--disable-backgrounding-occluded-windows",
128
+ "--disable-client-side-phishing-detection",
129
+ "--disable-top-sites",
130
+ "--disable-translate",
131
+ "--disable-renderer-backgrounding",
132
+ "--disable-background-networking",
133
+ "--disable-dev-shm-usage",
134
+ "--disable-features=IsolateOrigins,site-per-process,Translate,"
135
+ "InsecureDownloadWarnings,DownloadBubble,DownloadBubbleV2,"
136
+ "OptimizationTargetPrediction,OptimizationGuideModelDownloading,"
137
+ "SidePanelPinning,UserAgentClientHint,PrivacySandboxSettings4",
138
+ ]
139
+
140
+ @property
141
+ def browser_args(self):
142
+ return sorted(self._default_browser_args + self._browser_args)
143
+
144
+ @property
145
+ def user_data_dir(self):
146
+ return self._user_data_dir
147
+
148
+ @user_data_dir.setter
149
+ def user_data_dir(self, path: PathLike):
150
+ self._user_data_dir = str(path)
151
+ self._custom_data_dir = True
152
+
153
+ @property
154
+ def uses_custom_data_dir(self) -> bool:
155
+ return self._custom_data_dir
156
+
157
+ def add_extension(self, extension_path: PathLike):
158
+ """
159
+ Adds an extension to load. You can set the extension_path to a
160
+ folder (containing the manifest), or an extension zip file (.crx)
161
+ :param extension_path:
162
+ """
163
+ path = pathlib.Path(extension_path)
164
+ if not path.exists():
165
+ raise FileNotFoundError(
166
+ "Could not find anything here: %s" % str(path)
167
+ )
168
+ if path.is_file():
169
+ tf = tempfile.mkdtemp(
170
+ prefix="extension_", suffix=secrets.token_hex(4)
171
+ )
172
+ with zipfile.ZipFile(path, "r") as z:
173
+ z.extractall(tf)
174
+ self._extensions.append(tf)
175
+ elif path.is_dir():
176
+ for item in path.rglob("manifest.*"):
177
+ path = item.parent
178
+ self._extensions.append(path)
179
+
180
+ def __call__(self):
181
+ # The host and port will be added when starting the browser.
182
+ # By the time it starts, the port is probably already taken.
183
+ args = self._default_browser_args.copy()
184
+ args += ["--user-data-dir=%s" % self.user_data_dir]
185
+ args += ["--disable-features=IsolateOrigins,site-per-process"]
186
+ args += ["--disable-session-crashed-bubble"]
187
+ if self.expert:
188
+ args += [
189
+ "--disable-web-security",
190
+ "--disable-site-isolation-trials",
191
+ ]
192
+ if self._browser_args:
193
+ args.extend([arg for arg in self._browser_args if arg not in args])
194
+ if self.headless:
195
+ args.append("--headless=new")
196
+ if self.incognito:
197
+ args.append("--incognito")
198
+ if self.guest:
199
+ args.append("--guest")
200
+ if not self.sandbox:
201
+ args.append("--no-sandbox")
202
+ if self.host:
203
+ args.append("--remote-debugging-host=%s" % self.host)
204
+ if self.port:
205
+ args.append("--remote-debugging-port=%s" % self.port)
206
+ return args
207
+
208
+ def add_argument(self, arg: str):
209
+ if any(
210
+ x in arg.lower()
211
+ for x in [
212
+ "headless",
213
+ "data-dir",
214
+ "data_dir",
215
+ "no-sandbox",
216
+ "no_sandbox",
217
+ "lang",
218
+ ]
219
+ ):
220
+ raise ValueError(
221
+ '"%s" is not allowed. Please use one of the '
222
+ 'attributes of the Config object to set it.'
223
+ % arg
224
+ )
225
+ self._browser_args.append(arg)
226
+
227
+ def __repr__(self):
228
+ s = f"{self.__class__.__name__}"
229
+ for k, v in ({**self.__dict__, **self.__class__.__dict__}).items():
230
+ if k[0] == "_":
231
+ continue
232
+ if not v:
233
+ continue
234
+ if isinstance(v, property):
235
+ v = getattr(self, k)
236
+ if callable(v):
237
+ continue
238
+ s += f"\n\t{k} = {v}"
239
+ return s
240
+
241
+
242
+ def is_root():
243
+ """
244
+ Helper function to determine if the user is trying to launch chrome
245
+ under linux as root, which needs some alternative handling.
246
+ """
247
+ import ctypes
248
+ import os
249
+
250
+ try:
251
+ return os.getuid() == 0
252
+ except AttributeError:
253
+ return ctypes.windll.shell32.IsUserAnAdmin() != 0
254
+
255
+
256
+ def temp_profile_dir():
257
+ """Generate a temp dir (path)"""
258
+ path = os.path.normpath(tempfile.mkdtemp(prefix="uc_"))
259
+ return path
260
+
261
+
262
+ def find_chrome_executable(return_all=False):
263
+ """
264
+ Finds the chrome, beta, canary, chromium executable
265
+ and returns the disk path.
266
+ """
267
+ candidates = []
268
+ if is_posix:
269
+ for item in os.environ.get("PATH").split(os.pathsep):
270
+ for subitem in (
271
+ "google-chrome",
272
+ "chromium",
273
+ "chromium-browser",
274
+ "chrome",
275
+ "google-chrome-stable",
276
+ ):
277
+ candidates.append(os.sep.join((item, subitem)))
278
+ if "darwin" in sys.platform:
279
+ candidates += [
280
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
281
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
282
+ ]
283
+ else:
284
+ for item in map(
285
+ os.environ.get,
286
+ (
287
+ "PROGRAMFILES",
288
+ "PROGRAMFILES(X86)",
289
+ "LOCALAPPDATA",
290
+ "PROGRAMW6432",
291
+ ),
292
+ ):
293
+ if item is not None:
294
+ for subitem in (
295
+ "Google/Chrome/Application",
296
+ "Google/Chrome Beta/Application",
297
+ "Google/Chrome Canary/Application",
298
+ ):
299
+ candidates.append(
300
+ os.sep.join((item, subitem, "chrome.exe"))
301
+ )
302
+ rv = []
303
+ for candidate in candidates:
304
+ if os.path.exists(candidate) and os.access(candidate, os.X_OK):
305
+ logger.debug("%s is a valid candidate... " % candidate)
306
+ rv.append(candidate)
307
+ else:
308
+ logger.debug(
309
+ "%s is not a valid candidate because it doesn't exist "
310
+ "or isn't an executable."
311
+ % candidate
312
+ )
313
+ winner = None
314
+ if return_all and rv:
315
+ return rv
316
+ if rv and len(rv) > 1:
317
+ # Assuming the shortest path wins
318
+ winner = min(rv, key=lambda x: len(x))
319
+ elif len(rv) == 1:
320
+ winner = rv[0]
321
+ if winner:
322
+ return os.path.normpath(winner)
323
+ raise FileNotFoundError(
324
+ "Could not find a valid chrome browser binary. "
325
+ "Please make sure Chrome is installed. "
326
+ "Or use the keyword argument: "
327
+ "'browser_executable_path=/path/to/your/browser'."
328
+ )