seleniumbase 4.32.1__py3-none-any.whl → 4.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1319 @@
1
+ from __future__ import annotations
2
+ import asyncio
3
+ import logging
4
+ import pathlib
5
+ import warnings
6
+ from typing import Dict, List, Union, Optional, Tuple
7
+ from . import browser as cdp_browser
8
+ from . import element
9
+ from . import cdp_util as util
10
+ from .config import PathLike
11
+ from .connection import Connection, ProtocolException
12
+ import mycdp as cdp
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Tab(Connection):
18
+ """
19
+ :ref:`tab` is the controlling mechanism/connection to a 'target',
20
+ for most of us 'target' can be read as 'tab'. However it could also
21
+ be an iframe, serviceworker or background script for example,
22
+ although there isn't much to control for those.
23
+ If you open a new window by using
24
+ :py:meth:`browser.get(..., new_window=True)`
25
+ Your url will open a new window. This window is a 'tab'.
26
+ When you browse to another page, the tab will be the same (browser view).
27
+ It's important to keep some reference to tab objects, in case you're
28
+ done interacting with elements and want to operate on the page level again.
29
+
30
+ Custom CDP commands
31
+ ---------------------------
32
+ Tab object provide many useful and often-used methods. It is also possible
33
+ to utilize the included cdp classes to to something totally custom.
34
+
35
+ The cdp package is a set of so-called "domains" with each having methods,
36
+ events and types.
37
+ To send a cdp method, for example :py:obj:`cdp.page.navigate`,
38
+ you'll have to check whether the method accepts any parameters
39
+ and whether they are required or not.
40
+
41
+ You can use:
42
+
43
+ ```python
44
+ await tab.send(cdp.page.navigate(url='https://Your-URL-Here'))
45
+ ```
46
+
47
+ So tab.send() accepts a generator object,
48
+ which is created by calling a cdp method.
49
+ This way you can build very detailed and customized commands.
50
+ (Note: Finding correct command combos can be a time-consuming task.
51
+ A whole bunch of useful methods have been added,
52
+ preferably having the same apis or lookalikes, as in selenium.)
53
+
54
+ Some useful, often needed and simply required methods
55
+ ===================================================================
56
+
57
+ :py:meth:`~find` | find(text)
58
+ ----------------------------------------
59
+ Finds and returns a single element by text match.
60
+ By default, returns the first element found.
61
+ Much more powerful is the best_match flag,
62
+ although also much more expensive.
63
+ When no match is found, it will retry for <timeout> seconds (default: 10),
64
+ so this is also suitable to use as wait condition.
65
+
66
+ :py:meth:`~find` | find(text, best_match=True) or find(text, True)
67
+ -----------------------------------------------------------------------
68
+ Much more powerful (and expensive) than the above is
69
+ the use of the `find(text, best_match=True)` flag.
70
+ It will still return 1 element, but when multiple matches are found,
71
+ it picks the one having the most similar text length.
72
+ How would that help?
73
+ For example, you search for "login",
74
+ you'd probably want the "login" button element,
75
+ and not thousands of scripts/meta/headings,
76
+ which happens to contain a string of "login".
77
+
78
+ When no match is found, it will retry for <timeout> seconds (default: 10),
79
+ so this is also suitable to use as wait condition.
80
+
81
+ :py:meth:`~select` | select(selector)
82
+ ----------------------------------------
83
+ Finds and returns a single element by css selector match.
84
+ When no match is found, it will retry for <timeout> seconds (default: 10),
85
+ so this is also suitable to use as wait condition.
86
+
87
+ :py:meth:`~select_all` | select_all(selector)
88
+ ------------------------------------------------
89
+ Finds and returns all elements by css selector match.
90
+ When no match is found, it will retry for <timeout> seconds (default: 10),
91
+ so this is also suitable to use as wait condition.
92
+
93
+ await :py:obj:`Tab`
94
+ ---------------------------
95
+ Calling `await tab` will do a lot of stuff under the hood,
96
+ and ensures all references are up to date.
97
+ Also it allows for the script to "breathe",
98
+ as it is oftentime faster than your browser or webpage.
99
+ So whenever you get stuck and things crashes or element could not be found,
100
+ you should probably let it "breathe" by calling `await page`
101
+ and/or `await page.sleep()`.
102
+
103
+ It ensures :py:obj:`~url` will be updated to the most recent one,
104
+ which is quite important in some other methods.
105
+
106
+ Using other and custom CDP commands
107
+ ======================================================
108
+ Using the included cdp module, you can easily craft commands,
109
+ which will always return an generator object.
110
+ This generator object can be easily sent to the :py:meth:`~send` method.
111
+
112
+ :py:meth:`~send`
113
+ ---------------------------
114
+ This is probably the most important method,
115
+ although you won't ever call it, unless you want to go really custom.
116
+ The send method accepts a :py:obj:`cdp` command.
117
+ Each of which can be found in the cdp section.
118
+
119
+ When you import * from this package, cdp will be in your namespace,
120
+ and contains all domains/actions/events you can act upon.
121
+ """
122
+ browser: cdp_browser.Browser
123
+ _download_behavior: List[str] = None
124
+
125
+ def __init__(
126
+ self,
127
+ websocket_url: str,
128
+ target: cdp.target.TargetInfo,
129
+ browser: Optional["cdp_browser.Browser"] = None,
130
+ **kwargs,
131
+ ):
132
+ super().__init__(websocket_url, target, browser, **kwargs)
133
+ self.browser = browser
134
+ self._dom = None
135
+ self._window_id = None
136
+
137
+ @property
138
+ def inspector_url(self):
139
+ """
140
+ Get the inspector url.
141
+ This url can be used in another browser to show you
142
+ the devtools interface for current tab.
143
+ Useful for debugging and headless mode.
144
+ """
145
+ return f"http://{self.browser.config.host}:{self.browser.config.port}/devtools/inspector.html?ws={self.websocket_url[5:]}" # noqa
146
+
147
+ def inspector_open(self):
148
+ import webbrowser
149
+
150
+ webbrowser.open(self.inspector_url, new=2)
151
+
152
+ async def open_external_inspector(self):
153
+ """
154
+ Opens the system's browser containing the devtools inspector page
155
+ for this tab. Could be handy, especially to debug in headless mode.
156
+ """
157
+ import webbrowser
158
+
159
+ webbrowser.open(self.inspector_url)
160
+
161
+ async def find(
162
+ self,
163
+ text: str,
164
+ best_match: bool = False,
165
+ return_enclosing_element: bool = True,
166
+ timeout: Union[int, float] = 10,
167
+ ):
168
+ """
169
+ Find single element by text.
170
+ Can also be used to wait for such element to appear.
171
+ :param text:
172
+ Text to search for. Note: Script contents are also considered text.
173
+ :type text: str
174
+ :param best_match: :param best_match:
175
+ When True (default), it will return the element which has the most
176
+ comparable string length. This could help a lot. Eg:
177
+ If you search for "login", you probably want the login button element,
178
+ and not thousands of tags/scripts containing a "login" string.
179
+ When False, it returns just the first match (but is way faster).
180
+ :type best_match: bool
181
+ :param return_enclosing_element:
182
+ Since we deal with nodes instead of elements,
183
+ the find function most often returns so called text nodes,
184
+ which is actually a element of plain text,
185
+ which is the somehow imaginary "child" of a "span", "p", "script"
186
+ or any other elements which have text between their opening
187
+ and closing tags.
188
+ Most often when we search by text, we actually aim for the
189
+ element containing the text instead of a lousy plain text node,
190
+ so by default the containing element is returned.
191
+ There are exceptions. Eg:
192
+ Elements that use the "placeholder=" property.
193
+ :type return_enclosing_element: bool
194
+ :param timeout:
195
+ Raise timeout exception when after this many seconds nothing is found.
196
+ :type timeout: float,int
197
+ """
198
+ loop = asyncio.get_running_loop()
199
+ start_time = loop.time()
200
+ text = text.strip()
201
+ item = None
202
+ try:
203
+ item = await self.find_element_by_text(
204
+ text, best_match, return_enclosing_element
205
+ )
206
+ except (Exception, TypeError):
207
+ pass
208
+ while not item:
209
+ await self
210
+ item = await self.find_element_by_text(
211
+ text, best_match, return_enclosing_element
212
+ )
213
+ if loop.time() - start_time > timeout:
214
+ raise asyncio.TimeoutError(
215
+ "Time ran out while waiting for: {%s}" % text
216
+ )
217
+ await self.sleep(0.5)
218
+ return item
219
+
220
+ async def select(
221
+ self,
222
+ selector: str,
223
+ timeout: Union[int, float] = 10,
224
+ ) -> element.Element:
225
+ """
226
+ Find a single element by css selector.
227
+ Can also be used to wait for such an element to appear.
228
+ :param selector: css selector,
229
+ eg a[href], button[class*=close], a > img[src]
230
+ :type selector: str
231
+ :param timeout:
232
+ Raise timeout exception when after this many seconds nothing is found.
233
+ :type timeout: float,int
234
+ """
235
+ loop = asyncio.get_running_loop()
236
+ start_time = loop.time()
237
+ selector = selector.strip()
238
+ item = None
239
+ try:
240
+ item = await self.query_selector(selector)
241
+ except (Exception, TypeError):
242
+ pass
243
+ while not item:
244
+ await self
245
+ item = await self.query_selector(selector)
246
+ if loop.time() - start_time > timeout:
247
+ raise asyncio.TimeoutError(
248
+ "Time ran out while waiting for: {%s}" % selector
249
+ )
250
+ await self.sleep(0.5)
251
+ return item
252
+
253
+ async def find_all(
254
+ self,
255
+ text: str,
256
+ timeout: Union[int, float] = 10,
257
+ ) -> List[element.Element]:
258
+ """
259
+ Find multiple elements by text.
260
+ Can also be used to wait for such elements to appear.
261
+ :param text: Text to search for.
262
+ Note: Script contents are also considered text.
263
+ :type text: str
264
+ :param timeout:
265
+ Raise timeout exception when after this many seconds nothing is found.
266
+ :type timeout: float,int
267
+ """
268
+ loop = asyncio.get_running_loop()
269
+ now = loop.time()
270
+ text = text.strip()
271
+ items = []
272
+ try:
273
+ items = await self.find_elements_by_text(text)
274
+ except (Exception, TypeError):
275
+ pass
276
+ while not items:
277
+ await self
278
+ items = await self.find_elements_by_text(text)
279
+ if loop.time() - now > timeout:
280
+ raise asyncio.TimeoutError(
281
+ "Time ran out while waiting for: {%s}" % text
282
+ )
283
+ await self.sleep(0.5)
284
+ return items
285
+
286
+ async def select_all(
287
+ self,
288
+ selector: str,
289
+ timeout: Union[int, float] = 10,
290
+ include_frames=False,
291
+ ) -> List[element.Element]:
292
+ """
293
+ Find multiple elements by CSS Selector.
294
+ Can also be used to wait for such elements to appear.
295
+ :param selector: css selector,
296
+ eg a[href], button[class*=close], a > img[src]
297
+ :type selector: str
298
+ :param timeout:
299
+ Raise timeout exception when after this many seconds nothing is found.
300
+ :type timeout: float,int
301
+ :param include_frames: Whether to include results in iframes.
302
+ :type include_frames: bool
303
+ """
304
+ loop = asyncio.get_running_loop()
305
+ now = loop.time()
306
+ selector = selector.strip()
307
+ items = []
308
+ if include_frames:
309
+ frames = await self.query_selector_all("iframe")
310
+ # Unfortunately, asyncio.gather is not an option here
311
+ for fr in frames:
312
+ items.extend(await fr.query_selector_all(selector))
313
+ items.extend(await self.query_selector_all(selector))
314
+ while not items:
315
+ await self
316
+ items = await self.query_selector_all(selector)
317
+ if loop.time() - now > timeout:
318
+ raise asyncio.TimeoutError(
319
+ "Time ran out while waiting for: {%s}" % selector
320
+ )
321
+ await self.sleep(0.5)
322
+ return items
323
+
324
+ async def get(
325
+ self,
326
+ url="chrome://welcome",
327
+ new_tab: bool = False,
328
+ new_window: bool = False,
329
+ ):
330
+ """
331
+ Top level get. Utilizes the first tab to retrieve the given url.
332
+ This is a convenience function known from selenium.
333
+ This function handles waits/sleeps and detects when DOM events fired,
334
+ so it's the safest way of navigating.
335
+ :param url: the url to navigate to
336
+ :param new_tab: open new tab
337
+ :param new_window: open new window
338
+ :return: Page
339
+ """
340
+ if not self.browser:
341
+ raise AttributeError(
342
+ "This page/tab has no browser attribute, "
343
+ "so you can't use get()"
344
+ )
345
+ if new_window and not new_tab:
346
+ new_tab = True
347
+ if new_tab:
348
+ return await self.browser.get(url, new_tab, new_window)
349
+ else:
350
+ frame_id, loader_id, *_ = await self.send(cdp.page.navigate(url))
351
+ await self
352
+ return self
353
+
354
+ async def query_selector_all(
355
+ self,
356
+ selector: str,
357
+ _node: Optional[Union[cdp.dom.Node, "element.Element"]] = None,
358
+ ):
359
+ """
360
+ Equivalent of JavaScript "document.querySelectorAll".
361
+ This is considered one of the main methods to use in this package.
362
+ It returns all matching :py:obj:`element.Element` objects.
363
+ :param selector: css selector.
364
+ (first time? => https://www.w3schools.com/cssref/css_selectors.php )
365
+ :type selector: str
366
+ :param _node: internal use
367
+ """
368
+ if not _node:
369
+ doc: cdp.dom.Node = await self.send(cdp.dom.get_document(-1, True))
370
+ else:
371
+ doc = _node
372
+ if _node.node_name == "IFRAME":
373
+ doc = _node.content_document
374
+ node_ids = []
375
+ try:
376
+ node_ids = await self.send(
377
+ cdp.dom.query_selector_all(doc.node_id, selector)
378
+ )
379
+ except ProtocolException as e:
380
+ if _node is not None:
381
+ if "could not find node" in e.message.lower():
382
+ if getattr(_node, "__last", None):
383
+ del _node.__last
384
+ return []
385
+ # If the supplied node is not found,
386
+ # then the DOM has changed since acquiring the element.
387
+ # Therefore, we need to update our node, and try again.
388
+ await _node.update()
389
+ _node.__last = (
390
+ True # Make sure this isn't turned into infinite loop.
391
+ )
392
+ return await self.query_selector_all(selector, _node)
393
+ else:
394
+ await self.send(cdp.dom.disable())
395
+ raise
396
+ if not node_ids:
397
+ return []
398
+ items = []
399
+ for nid in node_ids:
400
+ node = util.filter_recurse(doc, lambda n: n.node_id == nid)
401
+ # Pass along the retrieved document tree to improve performance.
402
+ if not node:
403
+ continue
404
+ elem = element.create(node, self, doc)
405
+ items.append(elem)
406
+ return items
407
+
408
+ async def query_selector(
409
+ self,
410
+ selector: str,
411
+ _node: Optional[Union[cdp.dom.Node, element.Element]] = None,
412
+ ):
413
+ """
414
+ Find a single element based on a CSS Selector string.
415
+ :param selector: CSS Selector(s)
416
+ :type selector: str
417
+ """
418
+ selector = selector.strip()
419
+ if not _node:
420
+ doc: cdp.dom.Node = await self.send(cdp.dom.get_document(-1, True))
421
+ else:
422
+ doc = _node
423
+ if _node.node_name == "IFRAME":
424
+ doc = _node.content_document
425
+ node_id = None
426
+ try:
427
+ node_id = await self.send(
428
+ cdp.dom.query_selector(doc.node_id, selector)
429
+ )
430
+ except ProtocolException as e:
431
+ if _node is not None:
432
+ if "could not find node" in e.message.lower():
433
+ if getattr(_node, "__last", None):
434
+ del _node.__last
435
+ return []
436
+ # If supplied node is not found,
437
+ # the dom has changed since acquiring the element,
438
+ # therefore, update our passed node and try again.
439
+ await _node.update()
440
+ _node.__last = (
441
+ True # Make sure this isn't turned into infinite loop.
442
+ )
443
+ return await self.query_selector(selector, _node)
444
+ else:
445
+ await self.send(cdp.dom.disable())
446
+ raise
447
+ if not node_id:
448
+ return
449
+ node = util.filter_recurse(doc, lambda n: n.node_id == node_id)
450
+ if not node:
451
+ return
452
+ return element.create(node, self, doc)
453
+
454
+ async def find_elements_by_text(
455
+ self,
456
+ text: str,
457
+ ) -> List[element.Element]:
458
+ """
459
+ Returns element which match the given text.
460
+ Note: This may (or will) also return any other element
461
+ (like inline scripts), which happen to contain that text.
462
+ :param text:
463
+ """
464
+ text = text.strip()
465
+ doc = await self.send(cdp.dom.get_document(-1, True))
466
+ search_id, nresult = await self.send(
467
+ cdp.dom.perform_search(text, True)
468
+ )
469
+ if nresult:
470
+ node_ids = await self.send(
471
+ cdp.dom.get_search_results(search_id, 0, nresult)
472
+ )
473
+ else:
474
+ node_ids = []
475
+ await self.send(cdp.dom.discard_search_results(search_id))
476
+ items = []
477
+ for nid in node_ids:
478
+ node = util.filter_recurse(doc, lambda n: n.node_id == nid)
479
+ if not node:
480
+ node = await self.send(cdp.dom.resolve_node(node_id=nid))
481
+ if not node:
482
+ continue
483
+ # remote_object = await self.send(
484
+ # cdp.dom.resolve_node(backend_node_id=node.backend_node_id)
485
+ # )
486
+ # node_id = await self.send(
487
+ # cdp.dom.request_node(object_id=remote_object.object_id)
488
+ # )
489
+ try:
490
+ elem = element.create(node, self, doc)
491
+ except BaseException:
492
+ continue
493
+ if elem.node_type == 3:
494
+ # If found element is a text node (which is plain text,
495
+ # and useless for our purpose), we return the parent element
496
+ # of the node (which is often a tag which can have text
497
+ # between their opening and closing tags (that is most tags,
498
+ # except for example "img" and "video", "br").
499
+ if not elem.parent:
500
+ # Check if parent actually has a parent
501
+ # and update it to be absolutely sure.
502
+ await elem.update()
503
+ items.append(
504
+ elem.parent or elem
505
+ ) # When there's no parent, use the text node itself.
506
+ continue
507
+ else:
508
+ # Add the element itself.
509
+ items.append(elem)
510
+ # Since we already fetched the entire doc, including shadow and frames,
511
+ # let's also search through the iframes.
512
+ iframes = util.filter_recurse_all(
513
+ doc, lambda node: node.node_name == "IFRAME"
514
+ )
515
+ if iframes:
516
+ iframes_elems = [
517
+ element.create(iframe, self, iframe.content_document)
518
+ for iframe in iframes
519
+ ]
520
+ for iframe_elem in iframes_elems:
521
+ if iframe_elem.content_document:
522
+ iframe_text_nodes = util.filter_recurse_all(
523
+ iframe_elem,
524
+ lambda node: node.node_type == 3 # noqa
525
+ and text.lower() in node.node_value.lower(),
526
+ )
527
+ if iframe_text_nodes:
528
+ iframe_text_elems = [
529
+ element.create(text_node, self, iframe_elem.tree)
530
+ for text_node in iframe_text_nodes
531
+ ]
532
+ items.extend(
533
+ text_node.parent for text_node in iframe_text_elems
534
+ )
535
+ await self.send(cdp.dom.disable())
536
+ return items or []
537
+
538
+ async def find_element_by_text(
539
+ self,
540
+ text: str,
541
+ best_match: Optional[bool] = False,
542
+ return_enclosing_element: Optional[bool] = True,
543
+ ) -> Union[element.Element, None]:
544
+ """
545
+ Finds and returns the first element containing <text>, or best match.
546
+ :param text:
547
+ :param best_match:
548
+ When True, which is MUCH more expensive (thus much slower),
549
+ will find the closest match based on length.
550
+ When searching for "login", you probably want the button element,
551
+ and not thousands of tags/scripts containing the "login" string.
552
+ :type best_match: bool
553
+ :param return_enclosing_element:
554
+ """
555
+ doc = await self.send(cdp.dom.get_document(-1, True))
556
+ text = text.strip()
557
+ search_id, nresult = await self.send(
558
+ cdp.dom.perform_search(text, True)
559
+ )
560
+ node_ids = await self.send(
561
+ cdp.dom.get_search_results(search_id, 0, nresult)
562
+ )
563
+ await self.send(cdp.dom.discard_search_results(search_id))
564
+ if not node_ids:
565
+ node_ids = []
566
+ items = []
567
+ for nid in node_ids:
568
+ node = util.filter_recurse(doc, lambda n: n.node_id == nid)
569
+ try:
570
+ elem = element.create(node, self, doc)
571
+ except BaseException:
572
+ continue
573
+ if elem.node_type == 3:
574
+ # If found element is a text node
575
+ # (which is plain text, and useless for our purpose),
576
+ # then return the parent element of the node
577
+ # (which is often a tag which can have text between their
578
+ # opening and closing tags (that is most tags,
579
+ # except for example "img" and "video", "br").
580
+ if not elem.parent:
581
+ # Check if parent has a parent, and update it to be sure.
582
+ await elem.update()
583
+ items.append(
584
+ elem.parent or elem
585
+ ) # When it really has no parent, use the text node itself
586
+ continue
587
+ else:
588
+ # Add the element itself
589
+ items.append(elem)
590
+ # Since the entire doc is already fetched, including shadow and frames,
591
+ # also search through the iframes.
592
+ iframes = util.filter_recurse_all(
593
+ doc, lambda node: node.node_name == "IFRAME"
594
+ )
595
+ if iframes:
596
+ iframes_elems = [
597
+ element.create(iframe, self, iframe.content_document)
598
+ for iframe in iframes
599
+ ]
600
+ for iframe_elem in iframes_elems:
601
+ iframe_text_nodes = util.filter_recurse_all(
602
+ iframe_elem,
603
+ lambda node: node.node_type == 3 # noqa
604
+ and text.lower() in node.node_value.lower(),
605
+ )
606
+ if iframe_text_nodes:
607
+ iframe_text_elems = [
608
+ element.create(text_node, self, iframe_elem.tree)
609
+ for text_node in iframe_text_nodes
610
+ ]
611
+ items.extend(
612
+ text_node.parent for text_node in iframe_text_elems
613
+ )
614
+ try:
615
+ if not items:
616
+ return
617
+ if best_match:
618
+ closest_by_length = min(
619
+ items, key=lambda el: abs(len(text) - len(el.text_all))
620
+ )
621
+ elem = closest_by_length or items[0]
622
+ return elem
623
+ else:
624
+ # Return the first result
625
+ for elem in items:
626
+ if elem:
627
+ return elem
628
+ finally:
629
+ await self.send(cdp.dom.disable())
630
+
631
+ async def back(self):
632
+ """History back"""
633
+ await self.send(cdp.runtime.evaluate("window.history.back()"))
634
+
635
+ async def forward(self):
636
+ """History forward"""
637
+ await self.send(cdp.runtime.evaluate("window.history.forward()"))
638
+
639
+ async def reload(
640
+ self,
641
+ ignore_cache: Optional[bool] = True,
642
+ script_to_evaluate_on_load: Optional[str] = None,
643
+ ):
644
+ """
645
+ Reloads the page
646
+ :param ignore_cache: When set to True (default),
647
+ it ignores cache, and re-downloads the items.
648
+ :param script_to_evaluate_on_load: Script to run on load.
649
+ """
650
+ await self.send(
651
+ cdp.page.reload(
652
+ ignore_cache=ignore_cache,
653
+ script_to_evaluate_on_load=script_to_evaluate_on_load,
654
+ ),
655
+ )
656
+
657
+ async def evaluate(
658
+ self, expression: str, await_promise=False, return_by_value=True
659
+ ):
660
+ remote_object, errors = await self.send(
661
+ cdp.runtime.evaluate(
662
+ expression=expression,
663
+ user_gesture=True,
664
+ await_promise=await_promise,
665
+ return_by_value=return_by_value,
666
+ allow_unsafe_eval_blocked_by_csp=True,
667
+ )
668
+ )
669
+ if errors:
670
+ raise ProtocolException(errors)
671
+ if remote_object:
672
+ if return_by_value:
673
+ if remote_object.value:
674
+ return remote_object.value
675
+ else:
676
+ return remote_object, errors
677
+
678
+ async def js_dumps(
679
+ self, obj_name: str, return_by_value: Optional[bool] = True
680
+ ) -> Union[
681
+ Dict,
682
+ Tuple[cdp.runtime.RemoteObject, cdp.runtime.ExceptionDetails],
683
+ ]:
684
+ """
685
+ Dump Given js object with its properties and values as a dict.
686
+ Note: Complex objects might not be serializable,
687
+ therefore this method is not a "source of truth"
688
+ :param obj_name: the js object to dump
689
+ :type obj_name: str
690
+ :param return_by_value: If you want an tuple of cdp objects
691
+ (returnvalue, errors), then set this to False.
692
+ :type return_by_value: bool
693
+
694
+ Example
695
+ -------
696
+
697
+ x = await self.js_dumps('window')
698
+ print(x)
699
+ '...{
700
+ 'pageYOffset': 0,
701
+ 'visualViewport': {},
702
+ 'screenX': 10,
703
+ 'screenY': 10,
704
+ 'outerWidth': 1050,
705
+ 'outerHeight': 832,
706
+ 'devicePixelRatio': 1,
707
+ 'screenLeft': 10,
708
+ 'screenTop': 10,
709
+ 'styleMedia': {},
710
+ 'onsearch': None,
711
+ 'isSecureContext': True,
712
+ 'trustedTypes': {},
713
+ 'performance': {'timeOrigin': 1707823094767.9,
714
+ 'timing': {'connectStart': 0,
715
+ 'navigationStart': 1707823094768,
716
+ ]...
717
+ """
718
+ js_code_a = (
719
+ """
720
+ function ___dump(obj, _d = 0) {
721
+ let _typesA = ['object', 'function'];
722
+ let _typesB = ['number', 'string', 'boolean'];
723
+ if (_d == 2) {
724
+ console.log('maxdepth reached for ', obj);
725
+ return
726
+ }
727
+ let tmp = {}
728
+ for (let k in obj) {
729
+ if (obj[k] == window) continue;
730
+ let v;
731
+ try {
732
+ if (obj[k] === null
733
+ || obj[k] === undefined
734
+ || obj[k] === NaN) {
735
+ console.log('obj[k] is null or undefined or Nan',
736
+ k, '=>', obj[k])
737
+ tmp[k] = obj[k];
738
+ continue
739
+ }
740
+ } catch (e) {
741
+ tmp[k] = null;
742
+ continue
743
+ }
744
+ if (_typesB.includes(typeof obj[k])) {
745
+ tmp[k] = obj[k]
746
+ continue
747
+ }
748
+ try {
749
+ if (typeof obj[k] === 'function') {
750
+ tmp[k] = obj[k].toString()
751
+ continue
752
+ }
753
+ if (typeof obj[k] === 'object') {
754
+ tmp[k] = ___dump(obj[k], _d + 1);
755
+ continue
756
+ }
757
+ } catch (e) {}
758
+ try {
759
+ tmp[k] = JSON.stringify(obj[k])
760
+ continue
761
+ } catch (e) {
762
+ }
763
+ try {
764
+ tmp[k] = obj[k].toString();
765
+ continue
766
+ } catch (e) {}
767
+ }
768
+ return tmp
769
+ }
770
+ function ___dumpY(obj) {
771
+ var objKeys = (obj) => {
772
+ var [target, result] = [obj, []];
773
+ while (target !== null) {
774
+ result = result.concat(
775
+ Object.getOwnPropertyNames(target)
776
+ );
777
+ target = Object.getPrototypeOf(target);
778
+ }
779
+ return result;
780
+ }
781
+ return Object.fromEntries(
782
+ objKeys(obj).map(_ => [_, ___dump(obj[_])]))
783
+ }
784
+ ___dumpY( %s )
785
+ """
786
+ % obj_name
787
+ )
788
+ js_code_b = (
789
+ """
790
+ ((obj, visited = new WeakSet()) => {
791
+ if (visited.has(obj)) {
792
+ return {}
793
+ }
794
+ visited.add(obj)
795
+ var result = {}, _tmp;
796
+ for (var i in obj) {
797
+ try {
798
+ if (i === 'enabledPlugin'
799
+ || typeof obj[i] === 'function') {
800
+ continue;
801
+ } else if (typeof obj[i] === 'object') {
802
+ _tmp = recurse(obj[i], visited);
803
+ if (Object.keys(_tmp).length) {
804
+ result[i] = _tmp;
805
+ }
806
+ } else {
807
+ result[i] = obj[i];
808
+ }
809
+ } catch (error) {
810
+ // console.error('Error:', error);
811
+ }
812
+ }
813
+ return result;
814
+ })(%s)
815
+ """
816
+ % obj_name
817
+ )
818
+ # No self.evaluate here to prevent infinite loop on certain expressions
819
+ remote_object, exception_details = await self.send(
820
+ cdp.runtime.evaluate(
821
+ js_code_a,
822
+ await_promise=True,
823
+ return_by_value=return_by_value,
824
+ allow_unsafe_eval_blocked_by_csp=True,
825
+ )
826
+ )
827
+ if exception_details:
828
+ # Try second variant
829
+ remote_object, exception_details = await self.send(
830
+ cdp.runtime.evaluate(
831
+ js_code_b,
832
+ await_promise=True,
833
+ return_by_value=return_by_value,
834
+ allow_unsafe_eval_blocked_by_csp=True,
835
+ )
836
+ )
837
+ if exception_details:
838
+ raise ProtocolException(exception_details)
839
+ if return_by_value:
840
+ if remote_object.value:
841
+ return remote_object.value
842
+ else:
843
+ return remote_object, exception_details
844
+
845
+ async def close(self):
846
+ """Close the current target (ie: tab,window,page)"""
847
+ if self.target and self.target.target_id:
848
+ await self.send(
849
+ cdp.target.close_target(target_id=self.target.target_id)
850
+ )
851
+
852
+ async def get_window(self) -> Tuple[
853
+ cdp.browser.WindowID, cdp.browser.Bounds
854
+ ]:
855
+ """Get the window Bounds"""
856
+ window_id, bounds = await self.send(
857
+ cdp.browser.get_window_for_target(self.target_id)
858
+ )
859
+ return window_id, bounds
860
+
861
+ async def get_content(self):
862
+ """Gets the current page source content (html)"""
863
+ doc: cdp.dom.Node = await self.send(cdp.dom.get_document(-1, True))
864
+ return await self.send(
865
+ cdp.dom.get_outer_html(backend_node_id=doc.backend_node_id)
866
+ )
867
+
868
+ async def maximize(self):
869
+ """Maximize page/tab/window"""
870
+ return await self.set_window_state(state="maximize")
871
+
872
+ async def minimize(self):
873
+ """Minimize page/tab/window"""
874
+ return await self.set_window_state(state="minimize")
875
+
876
+ async def fullscreen(self):
877
+ """Minimize page/tab/window"""
878
+ return await self.set_window_state(state="fullscreen")
879
+
880
+ async def medimize(self):
881
+ return await self.set_window_state(state="normal")
882
+
883
+ async def set_window_size(self, left=0, top=0, width=1280, height=1024):
884
+ """
885
+ Set window size and position.
886
+ :param left:
887
+ Pixels from the left of the screen to the window top-left corner.
888
+ :param top:
889
+ Pixels from the top of the screen to the window top-left corner.
890
+ :param width: width of the window in pixels
891
+ :param height: height of the window in pixels
892
+ """
893
+ return await self.set_window_state(left, top, width, height)
894
+
895
+ async def activate(self):
896
+ """Active this target (Eg: tab, window, page)"""
897
+ await self.send(cdp.target.activate_target(self.target.target_id))
898
+
899
+ async def bring_to_front(self):
900
+ """Alias to self.activate"""
901
+ await self.activate()
902
+
903
+ async def set_window_state(
904
+ self, left=0, top=0, width=1280, height=720, state="normal"
905
+ ):
906
+ """
907
+ Sets the window size or state.
908
+ For state you can provide the full name like minimized, maximized,
909
+ normal, fullscreen, or something which leads to either of those,
910
+ like min, mini, mi, max, ma, maxi, full, fu, no, nor.
911
+ In case state is set other than "normal",
912
+ the left, top, width, and height are ignored.
913
+ :param left:
914
+ desired offset from left, in pixels
915
+ :type left: int
916
+ :param top:
917
+ desired offset from the top, in pixels
918
+ :type top: int
919
+ :param width:
920
+ desired width in pixels
921
+ :type width: int
922
+ :param height:
923
+ desired height in pixels
924
+ :type height: int
925
+ :param state:
926
+ can be one of the following strings:
927
+ - normal
928
+ - fullscreen
929
+ - maximized
930
+ - minimized
931
+ :type state: str
932
+ """
933
+ available_states = ["minimized", "maximized", "fullscreen", "normal"]
934
+ window_id: cdp.browser.WindowID
935
+ bounds: cdp.browser.Bounds
936
+ (window_id, bounds) = await self.get_window()
937
+ for state_name in available_states:
938
+ if all(x in state_name for x in state.lower()):
939
+ break
940
+ else:
941
+ raise NameError(
942
+ "could not determine any of %s from input '%s'"
943
+ % (",".join(available_states), state)
944
+ )
945
+ window_state = getattr(
946
+ cdp.browser.WindowState,
947
+ state_name.upper(),
948
+ cdp.browser.WindowState.NORMAL,
949
+ )
950
+ if window_state == cdp.browser.WindowState.NORMAL:
951
+ bounds = cdp.browser.Bounds(
952
+ left, top, width, height, window_state
953
+ )
954
+ else:
955
+ # min, max, full can only be used when current state == NORMAL,
956
+ # therefore, first switch to NORMAL
957
+ await self.set_window_state(state="normal")
958
+ bounds = cdp.browser.Bounds(window_state=window_state)
959
+
960
+ await self.send(
961
+ cdp.browser.set_window_bounds(window_id, bounds=bounds)
962
+ )
963
+
964
+ async def scroll_down(self, amount=25):
965
+ """
966
+ Scrolls the page down.
967
+ :param amount: Number in percentage.
968
+ 25 is a quarter of page, 50 half, and 1000 is 10x the page.
969
+ :type amount: int
970
+ """
971
+ window_id: cdp.browser.WindowID
972
+ bounds: cdp.browser.Bounds
973
+ (window_id, bounds) = await self.get_window()
974
+ await self.send(
975
+ cdp.input_.synthesize_scroll_gesture(
976
+ x=0,
977
+ y=0,
978
+ y_distance=-(bounds.height * (amount / 100)),
979
+ y_overscroll=0,
980
+ x_overscroll=0,
981
+ prevent_fling=True,
982
+ repeat_delay_ms=0,
983
+ speed=7777,
984
+ )
985
+ )
986
+
987
+ async def scroll_up(self, amount=25):
988
+ """
989
+ Scrolls the page up.
990
+ :param amount: Number in percentage.
991
+ 25 is a quarter of page, 50 half, and 1000 is 10x the page.
992
+ :type amount: int
993
+ """
994
+ window_id: cdp.browser.WindowID
995
+ bounds: cdp.browser.Bounds
996
+ (window_id, bounds) = await self.get_window()
997
+ await self.send(
998
+ cdp.input_.synthesize_scroll_gesture(
999
+ x=0,
1000
+ y=0,
1001
+ y_distance=(bounds.height * (amount / 100)),
1002
+ x_overscroll=0,
1003
+ prevent_fling=True,
1004
+ repeat_delay_ms=0,
1005
+ speed=7777,
1006
+ )
1007
+ )
1008
+
1009
+ async def wait_for(
1010
+ self,
1011
+ selector: Optional[str] = "",
1012
+ text: Optional[str] = "",
1013
+ timeout: Optional[Union[int, float]] = 10,
1014
+ ) -> element.Element:
1015
+ """
1016
+ Variant on query_selector_all and find_elements_by_text.
1017
+ This variant takes either selector or text,
1018
+ and will block until the requested element(s) are found.
1019
+ It will block for a maximum of <timeout> seconds,
1020
+ after which a TimeoutError will be raised.
1021
+ :param selector: css selector
1022
+ :param text: text
1023
+ :param timeout:
1024
+ :return: Element
1025
+ :raises: asyncio.TimeoutError
1026
+ """
1027
+ loop = asyncio.get_running_loop()
1028
+ now = loop.time()
1029
+ if selector:
1030
+ item = await self.query_selector(selector)
1031
+ while not item:
1032
+ item = await self.query_selector(selector)
1033
+ if loop.time() - now > timeout:
1034
+ raise asyncio.TimeoutError(
1035
+ "Time ran out while waiting for: {%s}" % selector
1036
+ )
1037
+ await self.sleep(0.5)
1038
+ return item
1039
+ if text:
1040
+ item = await self.find_element_by_text(text)
1041
+ while not item:
1042
+ item = await self.find_element_by_text(text)
1043
+ if loop.time() - now > timeout:
1044
+ raise asyncio.TimeoutError(
1045
+ "Time ran out while waiting for: {%s}" % text
1046
+ )
1047
+ await self.sleep(0.5)
1048
+ return item
1049
+
1050
+ async def download_file(
1051
+ self, url: str, filename: Optional[PathLike] = None
1052
+ ):
1053
+ """
1054
+ Downloads the file by the given url.
1055
+ :param url: The URL of the file.
1056
+ :param filename: The name for the file.
1057
+ If not specified, the name is composed from the url file name
1058
+ """
1059
+ if not self._download_behavior:
1060
+ directory_path = pathlib.Path.cwd() / "downloads"
1061
+ directory_path.mkdir(exist_ok=True)
1062
+ await self.set_download_path(directory_path)
1063
+
1064
+ warnings.warn(
1065
+ f"No download path set, so creating and using a default of "
1066
+ f"{directory_path}"
1067
+ )
1068
+ if not filename:
1069
+ filename = url.rsplit("/")[-1]
1070
+ filename = filename.split("?")[0]
1071
+ code = """
1072
+ (elem) => {
1073
+ async function _downloadFile(
1074
+ imageSrc,
1075
+ nameOfDownload,
1076
+ ) {
1077
+ const response = await fetch(imageSrc);
1078
+ const blobImage = await response.blob();
1079
+ const href = URL.createObjectURL(blobImage);
1080
+ const anchorElement = document.createElement('a');
1081
+ anchorElement.href = href;
1082
+ anchorElement.download = nameOfDownload;
1083
+ document.body.appendChild(anchorElement);
1084
+ anchorElement.click();
1085
+ setTimeout(() => {
1086
+ document.body.removeChild(anchorElement);
1087
+ window.URL.revokeObjectURL(href);
1088
+ }, 500);
1089
+ }
1090
+ _downloadFile('%s', '%s')
1091
+ }
1092
+ """ % (
1093
+ url,
1094
+ filename,
1095
+ )
1096
+ body = (await self.query_selector_all("body"))[0]
1097
+ await body.update()
1098
+ await self.send(
1099
+ cdp.runtime.call_function_on(
1100
+ code,
1101
+ object_id=body.object_id,
1102
+ arguments=[cdp.runtime.CallArgument(object_id=body.object_id)],
1103
+ )
1104
+ )
1105
+
1106
+ async def save_screenshot(
1107
+ self,
1108
+ filename: Optional[PathLike] = "auto",
1109
+ format: Optional[str] = "png",
1110
+ full_page: Optional[bool] = False,
1111
+ ) -> str:
1112
+ """
1113
+ Saves a screenshot of the page.
1114
+ This is not the same as :py:obj:`Element.save_screenshot`,
1115
+ which saves a screenshot of a single element only.
1116
+ :param filename: uses this as the save path
1117
+ :type filename: PathLike
1118
+ :param format: jpeg or png (defaults to jpeg)
1119
+ :type format: str
1120
+ :param full_page:
1121
+ When False (default), it captures the current viewport.
1122
+ When True, it captures the entire page.
1123
+ :type full_page: bool
1124
+ :return: The path/filename of the saved screenshot.
1125
+ :rtype: str
1126
+ """
1127
+ import urllib.parse
1128
+ import datetime
1129
+
1130
+ await self.sleep() # Update the target's URL
1131
+ path = None
1132
+ if format.lower() in ["jpg", "jpeg"]:
1133
+ ext = ".jpg"
1134
+ format = "jpeg"
1135
+ elif format.lower() in ["png"]:
1136
+ ext = ".png"
1137
+ format = "png"
1138
+ if not filename or filename == "auto":
1139
+ parsed = urllib.parse.urlparse(self.target.url)
1140
+ parts = parsed.path.split("/")
1141
+ last_part = parts[-1]
1142
+ last_part = last_part.rsplit("?", 1)[0]
1143
+ dt_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1144
+ candidate = f"{parsed.hostname}__{last_part}_{dt_str}"
1145
+ path = pathlib.Path(candidate + ext) # noqa
1146
+ else:
1147
+ path = pathlib.Path(filename)
1148
+ path.parent.mkdir(parents=True, exist_ok=True)
1149
+ data = await self.send(
1150
+ cdp.page.capture_screenshot(
1151
+ format_=format, capture_beyond_viewport=full_page
1152
+ )
1153
+ )
1154
+ if not data:
1155
+ raise ProtocolException(
1156
+ "Could not take screenshot. "
1157
+ "Most possible cause is the page "
1158
+ "has not finished loading yet."
1159
+ )
1160
+ import base64
1161
+
1162
+ data_bytes = base64.b64decode(data)
1163
+ if not path:
1164
+ raise RuntimeError("Invalid filename or path: '%s'" % filename)
1165
+ path.write_bytes(data_bytes)
1166
+ return str(path)
1167
+
1168
+ async def set_download_path(self, path: PathLike):
1169
+ """
1170
+ Sets the download path.
1171
+ When not set, a default folder is used.
1172
+ :param path:
1173
+ """
1174
+ await self.send(
1175
+ cdp.browser.set_download_behavior(
1176
+ behavior="allow", download_path=str(path.resolve())
1177
+ )
1178
+ )
1179
+ self._download_behavior = ["allow", str(path.resolve())]
1180
+
1181
+ async def get_all_linked_sources(self) -> List["element.Element"]:
1182
+ """Get all elements of tag: link, a, img, scripts meta, video, audio"""
1183
+ all_assets = await self.query_selector_all(
1184
+ selector="a,link,img,script,meta"
1185
+ )
1186
+ return [element.create(asset, self) for asset in all_assets]
1187
+
1188
+ async def get_all_urls(self, absolute=True) -> List[str]:
1189
+ """
1190
+ Convenience function, which returns all links (a,link,img,script,meta).
1191
+ :param absolute:
1192
+ Try to build all the links in absolute form
1193
+ instead of "as is", often relative.
1194
+ :return: List of URLs.
1195
+ """
1196
+ import urllib.parse
1197
+
1198
+ res = []
1199
+ all_assets = await self.query_selector_all(
1200
+ selector="a,link,img,script,meta"
1201
+ )
1202
+ for asset in all_assets:
1203
+ if not absolute:
1204
+ res.append(asset.src or asset.href)
1205
+ else:
1206
+ for k, v in asset.attrs.items():
1207
+ if k in ("src", "href"):
1208
+ if "#" in v:
1209
+ continue
1210
+ if not any([_ in v for _ in ("http", "//", "/")]):
1211
+ continue
1212
+ abs_url = urllib.parse.urljoin(
1213
+ "/".join(self.url.rsplit("/")[:3]), v
1214
+ )
1215
+ if not abs_url.startswith(("http", "//", "ws")):
1216
+ continue
1217
+ res.append(abs_url)
1218
+ return res
1219
+
1220
+ async def verify_cf(self):
1221
+ """(An attempt)"""
1222
+ checkbox = None
1223
+ checkbox_sibling = await self.wait_for(text="verify you are human")
1224
+ if checkbox_sibling:
1225
+ parent = checkbox_sibling.parent
1226
+ while parent:
1227
+ checkbox = await parent.query_selector("input[type=checkbox]")
1228
+ if checkbox:
1229
+ break
1230
+ parent = parent.parent
1231
+ await checkbox.mouse_move()
1232
+ await checkbox.mouse_click()
1233
+
1234
+ async def get_document(self):
1235
+ return await self.send(cdp.dom.get_document())
1236
+
1237
+ async def get_flattened_document(self):
1238
+ return await self.send(cdp.dom.get_flattened_document())
1239
+
1240
+ async def get_local_storage(self):
1241
+ """
1242
+ Get local storage items as dict of strings.
1243
+ Proper deserialization may need to be done.
1244
+ """
1245
+ if not self.target.url:
1246
+ await self
1247
+ origin = "/".join(self.url.split("/", 3)[:-1])
1248
+ items = await self.send(
1249
+ cdp.dom_storage.get_dom_storage_items(
1250
+ cdp.dom_storage.StorageId(
1251
+ is_local_storage=True, security_origin=origin
1252
+ )
1253
+ )
1254
+ )
1255
+ retval = {}
1256
+ for item in items:
1257
+ retval[item[0]] = item[1]
1258
+ return retval
1259
+
1260
+ async def set_local_storage(self, items: dict):
1261
+ """
1262
+ Set local storage.
1263
+ Dict items must be strings.
1264
+ Simple types will be converted to strings automatically.
1265
+ :param items: dict containing {key:str, value:str}
1266
+ :type items: dict[str,str]
1267
+ """
1268
+ if not self.target.url:
1269
+ await self
1270
+ origin = "/".join(self.url.split("/", 3)[:-1])
1271
+ await asyncio.gather(
1272
+ *[
1273
+ self.send(
1274
+ cdp.dom_storage.set_dom_storage_item(
1275
+ storage_id=cdp.dom_storage.StorageId(
1276
+ is_local_storage=True, security_origin=origin
1277
+ ),
1278
+ key=str(key),
1279
+ value=str(val),
1280
+ )
1281
+ )
1282
+ for key, val in items.items()
1283
+ ]
1284
+ )
1285
+
1286
+ def __call__(
1287
+ self,
1288
+ text: Optional[str] = "",
1289
+ selector: Optional[str] = "",
1290
+ timeout: Optional[Union[int, float]] = 10,
1291
+ ):
1292
+ """
1293
+ Alias to query_selector_all or find_elements_by_text,
1294
+ depending on whether text= is set or selector= is set.
1295
+ :param selector: css selector string
1296
+ :type selector: str
1297
+ """
1298
+ return self.wait_for(text, selector, timeout)
1299
+
1300
+ def __eq__(self, other: Tab):
1301
+ try:
1302
+ return other.target == self.target
1303
+ except (AttributeError, TypeError):
1304
+ return False
1305
+
1306
+ def __getattr__(self, item):
1307
+ try:
1308
+ return getattr(self._target, item)
1309
+ except AttributeError:
1310
+ raise AttributeError(
1311
+ f'"{self.__class__.__name__}" has no attribute "%s"' % item
1312
+ )
1313
+
1314
+ def __repr__(self):
1315
+ extra = ""
1316
+ if self.target.url:
1317
+ extra = f"[url: {self.target.url}]"
1318
+ s = f"<{type(self).__name__} [{self.target_id}] [{self.type_}] {extra}>" # noqa
1319
+ return s