seleniumbase 4.32.1__py3-none-any.whl → 4.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,317 @@
1
+ """CDP-Driver is based on NoDriver"""
2
+ from __future__ import annotations
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ import types
7
+ import typing
8
+ from typing import Optional, List, Union, Callable
9
+ from .element import Element
10
+ from .browser import Browser
11
+ from .browser import PathLike
12
+ from .config import Config
13
+ from .tab import Tab
14
+ import mycdp as cdp
15
+
16
+ logger = logging.getLogger(__name__)
17
+ T = typing.TypeVar("T")
18
+
19
+
20
+ async def start(
21
+ config: Optional[Config] = None,
22
+ *,
23
+ user_data_dir: Optional[PathLike] = None,
24
+ headless: Optional[bool] = False,
25
+ incognito: Optional[bool] = False,
26
+ guest: Optional[bool] = False,
27
+ browser_executable_path: Optional[PathLike] = None,
28
+ browser_args: Optional[List[str]] = None,
29
+ sandbox: Optional[bool] = True,
30
+ lang: Optional[str] = None,
31
+ host: Optional[str] = None,
32
+ port: Optional[int] = None,
33
+ expert: Optional[bool] = None,
34
+ **kwargs: Optional[dict],
35
+ ) -> Browser:
36
+ """
37
+ Helper function to launch a browser. It accepts several keyword parameters.
38
+ Conveniently, you can just call it bare (no parameters) to quickly launch
39
+ an instance with best practice defaults.
40
+ Note: Due to a Chrome-130 bug, use start_async or start_sync instead.
41
+ (Calling this method directly could lead to an unresponsive browser)
42
+ Note: New args are expected: Use kwargs only!
43
+ Note: This should be called ``await start()``
44
+ :param user_data_dir:
45
+ :type user_data_dir: PathLike
46
+ :param headless:
47
+ :type headless: bool
48
+ :param browser_executable_path:
49
+ :type browser_executable_path: PathLike
50
+ :param browser_args:
51
+ ["--some-chromeparam=somevalue", "some-other-param=someval"]
52
+ :type browser_args: List[str]
53
+ :param sandbox: Default True, but when set to False it adds --no-sandbox
54
+ to the params, also when using linux under a root user,
55
+ it adds False automatically (else Chrome won't start).
56
+ :type sandbox: bool
57
+ :param lang: language string
58
+ :type lang: str
59
+ :param port: If you connect to an existing debuggable session,
60
+ you can specify the port here.
61
+ If both host and port are provided,
62
+ then a local Chrome browser will not be started!
63
+ :type port: int
64
+ :param host: If you connect to an existing debuggable session,
65
+ you can specify the host here.
66
+ If both host and port are provided,
67
+ then a local Chrome browser will not be started!
68
+ :type host: str
69
+ :param expert: When set to True, "expert" mode is enabled.
70
+ This means adding: --disable-web-security --disable-site-isolation-trials,
71
+ as well as some scripts and patching useful for debugging.
72
+ (For example, ensuring shadow-root is always in "open" mode.)
73
+ :type expert: bool
74
+ """
75
+ if not config:
76
+ config = Config(
77
+ user_data_dir,
78
+ headless,
79
+ incognito,
80
+ guest,
81
+ browser_executable_path,
82
+ browser_args,
83
+ sandbox,
84
+ lang,
85
+ host=host,
86
+ port=port,
87
+ expert=expert,
88
+ **kwargs,
89
+ )
90
+ return await Browser.create(config)
91
+
92
+
93
+ async def start_async(*args, **kwargs) -> Browser:
94
+ headless = False
95
+ if "headless" in kwargs:
96
+ headless = kwargs["headless"]
97
+ decoy_args = kwargs
98
+ decoy_args["headless"] = True
99
+ driver = await start(**decoy_args)
100
+ kwargs["headless"] = headless
101
+ kwargs["user_data_dir"] = driver.config.user_data_dir
102
+ driver.stop() # Due to Chrome-130, must stop & start
103
+ time.sleep(0.15)
104
+ return await start(*args, **kwargs)
105
+
106
+
107
+ def start_sync(*args, **kwargs) -> Browser:
108
+ loop = asyncio.get_event_loop()
109
+ headless = False
110
+ if "headless" in kwargs:
111
+ headless = kwargs["headless"]
112
+ decoy_args = kwargs
113
+ decoy_args["headless"] = True
114
+ driver = loop.run_until_complete(start(**decoy_args))
115
+ kwargs["headless"] = headless
116
+ kwargs["user_data_dir"] = driver.config.user_data_dir
117
+ driver.stop() # Due to Chrome-130, must stop & start
118
+ time.sleep(0.15)
119
+ return loop.run_until_complete(start(*args, **kwargs))
120
+
121
+
122
+ async def create_from_driver(driver) -> Browser:
123
+ """Create a Browser instance from a running driver instance."""
124
+ from .config import Config
125
+
126
+ conf = Config()
127
+ host, port = driver.options.debugger_address.split(":")
128
+ conf.host, conf.port = host, int(port)
129
+ # Create Browser instance
130
+ browser = await start(conf)
131
+ browser._process_pid = driver.browser_pid
132
+ # Stop chromedriver binary
133
+ driver.service.stop()
134
+ driver.browser_pid = -1
135
+ driver.user_data_dir = None
136
+ return browser
137
+
138
+
139
+ def free_port() -> int:
140
+ """Determines a free port using sockets."""
141
+ import socket
142
+
143
+ free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
144
+ free_socket.bind(("127.0.0.1", 0))
145
+ free_socket.listen(5)
146
+ port: int = free_socket.getsockname()[1]
147
+ free_socket.close()
148
+ return port
149
+
150
+
151
+ def filter_recurse_all(
152
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
153
+ ) -> List[T]:
154
+ """
155
+ Test each child using predicate(child),
156
+ and return all children for which predicate(child) == True
157
+ :param doc: The cdp.dom.Node object or :py:class:`cdp_driver.Element`
158
+ :param predicate: A function which takes a node as first parameter
159
+ and returns a boolean, where True means include.
160
+ """
161
+ if not hasattr(doc, "children"):
162
+ raise TypeError("Object should have a .children attribute!")
163
+ out = []
164
+ if doc and doc.children:
165
+ for child in doc.children:
166
+ if predicate(child):
167
+ out.append(child)
168
+ if child.shadow_roots is not None:
169
+ out.extend(
170
+ filter_recurse_all(child.shadow_roots[0], predicate)
171
+ )
172
+ out.extend(filter_recurse_all(child, predicate))
173
+ return out
174
+
175
+
176
+ def filter_recurse(
177
+ doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
178
+ ) -> T:
179
+ """
180
+ Test each child using predicate(child),
181
+ and return the first child of which predicate(child) == True
182
+ :param doc: the cdp.dom.Node object or :py:class:`cdp_driver.Element`
183
+ :param predicate: a function which takes a node as first parameter
184
+ and returns a boolean, where True means include.
185
+ """
186
+ if not hasattr(doc, "children"):
187
+ raise TypeError("Object should have a .children attribute!")
188
+ if doc and doc.children:
189
+ for child in doc.children:
190
+ if predicate(child):
191
+ return child
192
+ if child.shadow_roots:
193
+ shadow_root_result = filter_recurse(
194
+ child.shadow_roots[0], predicate
195
+ )
196
+ if shadow_root_result:
197
+ return shadow_root_result
198
+ result = filter_recurse(child, predicate)
199
+ if result:
200
+ return result
201
+
202
+
203
+ def circle(
204
+ x, y=None, radius=10, num=10, dir=0
205
+ ) -> typing.Generator[typing.Tuple[float, float], None, None]:
206
+ """
207
+ A generator will calculate coordinates around a circle.
208
+ :param x: start x position
209
+ :type x: int
210
+ :param y: start y position
211
+ :type y: int
212
+ :param radius: size of the circle
213
+ :type radius: int
214
+ :param num: the amount of points calculated
215
+ (higher => slower, more cpu, but more detailed)
216
+ :type num: int
217
+ """
218
+ import math
219
+
220
+ r = radius
221
+ w = num
222
+ if not y:
223
+ y = x
224
+ a = int(x - r * 2)
225
+ b = int(y - r * 2)
226
+ m = (2 * math.pi) / w
227
+ if dir == 0:
228
+ # Regular direction
229
+ ran = 0, w + 1, 1
230
+ else:
231
+ # Opposite direction
232
+ ran = w + 1, 0, -1
233
+ for i in range(*ran):
234
+ x = a + r * math.sin(m * i)
235
+ y = b + r * math.cos(m * i)
236
+ yield x, y
237
+
238
+
239
+ def remove_from_tree(tree: cdp.dom.Node, node: cdp.dom.Node) -> cdp.dom.Node:
240
+ if not hasattr(tree, "children"):
241
+ raise TypeError("Object should have a .children attribute!")
242
+ if tree and tree.children:
243
+ for child in tree.children:
244
+ if child.backend_node_id == node.backend_node_id:
245
+ tree.children.remove(child)
246
+ remove_from_tree(child, node)
247
+ return tree
248
+
249
+
250
+ async def html_from_tree(
251
+ tree: Union[cdp.dom.Node, Element], target: Tab
252
+ ):
253
+ if not hasattr(tree, "children"):
254
+ raise TypeError("Object should have a .children attribute!")
255
+ out = ""
256
+ if tree and tree.children:
257
+ for child in tree.children:
258
+ if isinstance(child, Element):
259
+ out += await child.get_html()
260
+ else:
261
+ out += await target.send(
262
+ cdp.dom.get_outer_html(
263
+ backend_node_id=child.backend_node_id
264
+ )
265
+ )
266
+ out += await html_from_tree(child, target)
267
+ return out
268
+
269
+
270
+ def compare_target_info(
271
+ info1: cdp.target.TargetInfo, info2: cdp.target.TargetInfo
272
+ ) -> List[typing.Tuple[str, typing.Any, typing.Any]]:
273
+ """
274
+ When logging mode is set to debug, browser object will log when target info
275
+ is changed. To provide more meaningful log messages,
276
+ this function is called to check what has actually changed
277
+ between the 2 (by simple dict comparison).
278
+ It returns a list of tuples
279
+ [ ... ( key_which_has_changed, old_value, new_value) ]
280
+ :param info1:
281
+ :param info2:
282
+ """
283
+ d1 = info1.__dict__
284
+ d2 = info2.__dict__
285
+ return [(k, v, d2[k]) for (k, v) in d1.items() if d2[k] != v]
286
+
287
+
288
+ def loop():
289
+ loop = asyncio.new_event_loop()
290
+ asyncio.set_event_loop(loop)
291
+ return loop
292
+
293
+
294
+ def cdp_get_module(domain: Union[str, types.ModuleType]):
295
+ """
296
+ Get cdp module by given string.
297
+ :param domain:
298
+ """
299
+ import importlib
300
+
301
+ if isinstance(domain, types.ModuleType):
302
+ domain_mod = domain
303
+ else:
304
+ try:
305
+ if domain in ("input",):
306
+ domain = "input_"
307
+ domain_mod = getattr(cdp, domain)
308
+ if not domain_mod:
309
+ raise AttributeError
310
+ except AttributeError:
311
+ try:
312
+ domain_mod = importlib.import_module(domain)
313
+ except ModuleNotFoundError:
314
+ raise ModuleNotFoundError(
315
+ "Could not find cdp module from input '%s'" % domain
316
+ )
317
+ return domain_mod
@@ -0,0 +1,322 @@
1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import secrets
5
+ import sys
6
+ import tempfile
7
+ import zipfile
8
+ from typing import Union, List, Optional
9
+
10
+ __all__ = [
11
+ "Config",
12
+ "find_chrome_executable",
13
+ "temp_profile_dir",
14
+ "is_root",
15
+ "is_posix",
16
+ "PathLike",
17
+ ]
18
+
19
+ logger = logging.getLogger(__name__)
20
+ is_posix = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
21
+
22
+ PathLike = Union[str, pathlib.Path]
23
+ AUTO = None
24
+
25
+
26
+ class Config:
27
+ """Config object"""
28
+
29
+ def __init__(
30
+ self,
31
+ user_data_dir: Optional[PathLike] = AUTO,
32
+ headless: Optional[bool] = False,
33
+ incognito: Optional[bool] = False,
34
+ guest: Optional[bool] = False,
35
+ browser_executable_path: Optional[PathLike] = AUTO,
36
+ browser_args: Optional[List[str]] = AUTO,
37
+ sandbox: Optional[bool] = True,
38
+ lang: Optional[str] = "en-US",
39
+ host: str = AUTO,
40
+ port: int = AUTO,
41
+ expert: bool = AUTO,
42
+ **kwargs: dict,
43
+ ):
44
+ """
45
+ Creates a config object.
46
+ Can be called without any arguments to generate a best-practice config,
47
+ which is recommended.
48
+ Calling the object, eg: myconfig(), returns the list of arguments which
49
+ are provided to the browser.
50
+ Additional args can be added using the :py:obj:`~add_argument method`.
51
+ Instances of this class are usually not instantiated by end users.
52
+ :param user_data_dir: the data directory to use
53
+ :param headless: set to True for headless mode
54
+ :param browser_executable_path:
55
+ Specify browser executable, instead of using autodetect.
56
+ :param browser_args: Forwarded to browser executable.
57
+ Eg: ["--some-chromeparam=somevalue", "some-other-param=someval"]
58
+ :param sandbox: disables sandbox
59
+ :param autodiscover_targets: use autodiscovery of targets
60
+ :param lang:
61
+ Language string to use other than the default "en-US,en;q=0.9"
62
+ :param expert: When set to True, "expert" mode is enabled.
63
+ This adds: --disable-web-security --disable-site-isolation-trials,
64
+ as well as some scripts and patching useful for debugging.
65
+ (For example, ensuring shadow-root is always in "open" mode.)
66
+ :param kwargs:
67
+ :type user_data_dir: PathLike
68
+ :type headless: bool
69
+ :type browser_executable_path: PathLike
70
+ :type browser_args: list[str]
71
+ :type sandbox: bool
72
+ :type lang: str
73
+ :type kwargs: dict
74
+ """
75
+ if not browser_args:
76
+ browser_args = []
77
+ if not user_data_dir:
78
+ self._user_data_dir = temp_profile_dir()
79
+ self._custom_data_dir = False
80
+ else:
81
+ self.user_data_dir = user_data_dir
82
+ if not browser_executable_path:
83
+ browser_executable_path = find_chrome_executable()
84
+ self._browser_args = browser_args
85
+ self.browser_executable_path = browser_executable_path
86
+ self.headless = headless
87
+ self.incognito = incognito
88
+ self.guest = guest
89
+ self.sandbox = sandbox
90
+ self.host = host
91
+ self.port = port
92
+ self.expert = expert
93
+ self._extensions = []
94
+ # When using posix-ish operating system and running as root,
95
+ # you must use no_sandbox=True
96
+ if is_posix and is_root() and sandbox:
97
+ logger.info("Detected root usage, auto-disabling sandbox mode.")
98
+ self.sandbox = False
99
+ self.autodiscover_targets = True
100
+ self.lang = lang
101
+ # Other keyword args will be accessible by attribute
102
+ self.__dict__.update(kwargs)
103
+ super().__init__()
104
+ self._default_browser_args = [
105
+ "--remote-allow-origins=*",
106
+ "--no-first-run",
107
+ "--no-service-autorun",
108
+ "--no-default-browser-check",
109
+ "--homepage=about:blank",
110
+ "--no-pings",
111
+ "--safebrowsing-disable-download-protection",
112
+ '--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
113
+ "--password-store=basic",
114
+ "--deny-permission-prompts",
115
+ "--disable-infobars",
116
+ "--disable-breakpad",
117
+ "--disable-component-update",
118
+ "--disable-prompt-on-repost",
119
+ "--disable-password-generation",
120
+ "--disable-ipc-flooding-protection",
121
+ "--disable-search-engine-choice-screen",
122
+ "--disable-backgrounding-occluded-windows",
123
+ "--disable-client-side-phishing-detection",
124
+ "--disable-top-sites",
125
+ "--disable-renderer-backgrounding",
126
+ "--disable-background-networking",
127
+ "--disable-dev-shm-usage",
128
+ "--disable-features=IsolateOrigins,site-per-process,Translate,"
129
+ "InsecureDownloadWarnings,DownloadBubble,DownloadBubbleV2,"
130
+ "OptimizationTargetPrediction,OptimizationGuideModelDownloading,"
131
+ "SidePanelPinning,UserAgentClientHint,PrivacySandboxSettings4",
132
+ ]
133
+
134
+ @property
135
+ def browser_args(self):
136
+ return sorted(self._default_browser_args + self._browser_args)
137
+
138
+ @property
139
+ def user_data_dir(self):
140
+ return self._user_data_dir
141
+
142
+ @user_data_dir.setter
143
+ def user_data_dir(self, path: PathLike):
144
+ self._user_data_dir = str(path)
145
+ self._custom_data_dir = True
146
+
147
+ @property
148
+ def uses_custom_data_dir(self) -> bool:
149
+ return self._custom_data_dir
150
+
151
+ def add_extension(self, extension_path: PathLike):
152
+ """
153
+ Adds an extension to load. You can set the extension_path to a
154
+ folder (containing the manifest), or an extension zip file (.crx)
155
+ :param extension_path:
156
+ """
157
+ path = pathlib.Path(extension_path)
158
+ if not path.exists():
159
+ raise FileNotFoundError(
160
+ "Could not find anything here: %s" % str(path)
161
+ )
162
+ if path.is_file():
163
+ tf = tempfile.mkdtemp(
164
+ prefix="extension_", suffix=secrets.token_hex(4)
165
+ )
166
+ with zipfile.ZipFile(path, "r") as z:
167
+ z.extractall(tf)
168
+ self._extensions.append(tf)
169
+ elif path.is_dir():
170
+ for item in path.rglob("manifest.*"):
171
+ path = item.parent
172
+ self._extensions.append(path)
173
+
174
+ def __call__(self):
175
+ # The host and port will be added when starting the browser.
176
+ # By the time it starts, the port is probably already taken.
177
+ args = self._default_browser_args.copy()
178
+ args += ["--user-data-dir=%s" % self.user_data_dir]
179
+ args += ["--disable-features=IsolateOrigins,site-per-process"]
180
+ args += ["--disable-session-crashed-bubble"]
181
+ if self.expert:
182
+ args += [
183
+ "--disable-web-security",
184
+ "--disable-site-isolation-trials",
185
+ ]
186
+ if self._browser_args:
187
+ args.extend([arg for arg in self._browser_args if arg not in args])
188
+ if self.headless:
189
+ args.append("--headless=new")
190
+ if self.incognito:
191
+ args.append("--incognito")
192
+ if self.guest:
193
+ args.append("--guest")
194
+ if not self.sandbox:
195
+ args.append("--no-sandbox")
196
+ if self.host:
197
+ args.append("--remote-debugging-host=%s" % self.host)
198
+ if self.port:
199
+ args.append("--remote-debugging-port=%s" % self.port)
200
+ return args
201
+
202
+ def add_argument(self, arg: str):
203
+ if any(
204
+ x in arg.lower()
205
+ for x in [
206
+ "headless",
207
+ "data-dir",
208
+ "data_dir",
209
+ "no-sandbox",
210
+ "no_sandbox",
211
+ "lang",
212
+ ]
213
+ ):
214
+ raise ValueError(
215
+ '"%s" is not allowed. Please use one of the '
216
+ 'attributes of the Config object to set it.'
217
+ % arg
218
+ )
219
+ self._browser_args.append(arg)
220
+
221
+ def __repr__(self):
222
+ s = f"{self.__class__.__name__}"
223
+ for k, v in ({**self.__dict__, **self.__class__.__dict__}).items():
224
+ if k[0] == "_":
225
+ continue
226
+ if not v:
227
+ continue
228
+ if isinstance(v, property):
229
+ v = getattr(self, k)
230
+ if callable(v):
231
+ continue
232
+ s += f"\n\t{k} = {v}"
233
+ return s
234
+
235
+
236
+ def is_root():
237
+ """
238
+ Helper function to determine if the user is trying to launch chrome
239
+ under linux as root, which needs some alternative handling.
240
+ """
241
+ import ctypes
242
+ import os
243
+
244
+ try:
245
+ return os.getuid() == 0
246
+ except AttributeError:
247
+ return ctypes.windll.shell32.IsUserAnAdmin() != 0
248
+
249
+
250
+ def temp_profile_dir():
251
+ """Generate a temp dir (path)"""
252
+ path = os.path.normpath(tempfile.mkdtemp(prefix="uc_"))
253
+ return path
254
+
255
+
256
+ def find_chrome_executable(return_all=False):
257
+ """
258
+ Finds the chrome, beta, canary, chromium executable
259
+ and returns the disk path.
260
+ """
261
+ candidates = []
262
+ if is_posix:
263
+ for item in os.environ.get("PATH").split(os.pathsep):
264
+ for subitem in (
265
+ "google-chrome",
266
+ "chromium",
267
+ "chromium-browser",
268
+ "chrome",
269
+ "google-chrome-stable",
270
+ ):
271
+ candidates.append(os.sep.join((item, subitem)))
272
+ if "darwin" in sys.platform:
273
+ candidates += [
274
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
275
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
276
+ ]
277
+ else:
278
+ for item in map(
279
+ os.environ.get,
280
+ (
281
+ "PROGRAMFILES",
282
+ "PROGRAMFILES(X86)",
283
+ "LOCALAPPDATA",
284
+ "PROGRAMW6432",
285
+ ),
286
+ ):
287
+ if item is not None:
288
+ for subitem in (
289
+ "Google/Chrome/Application",
290
+ "Google/Chrome Beta/Application",
291
+ "Google/Chrome Canary/Application",
292
+ ):
293
+ candidates.append(
294
+ os.sep.join((item, subitem, "chrome.exe"))
295
+ )
296
+ rv = []
297
+ for candidate in candidates:
298
+ if os.path.exists(candidate) and os.access(candidate, os.X_OK):
299
+ logger.debug("%s is a valid candidate... " % candidate)
300
+ rv.append(candidate)
301
+ else:
302
+ logger.debug(
303
+ "%s is not a valid candidate because it doesn't exist "
304
+ "or isn't an executable."
305
+ % candidate
306
+ )
307
+ winner = None
308
+ if return_all and rv:
309
+ return rv
310
+ if rv and len(rv) > 1:
311
+ # Assuming the shortest path wins
312
+ winner = min(rv, key=lambda x: len(x))
313
+ elif len(rv) == 1:
314
+ winner = rv[0]
315
+ if winner:
316
+ return os.path.normpath(winner)
317
+ raise FileNotFoundError(
318
+ "Could not find a valid chrome browser binary. "
319
+ "Please make sure Chrome is installed. "
320
+ "Or use the keyword argument: "
321
+ "'browser_executable_path=/path/to/your/browser'."
322
+ )