PlaywrightCapture 1.24.9__tar.gz → 1.24.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.24.9
3
+ Version: 1.24.11
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -26,12 +26,12 @@ Requires-Dist: aiohttp[speedups] (>=3.9.5,<4.0.0)
26
26
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
27
27
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
29
- Requires-Dist: playwright (>=1.43.0,<2.0.0)
29
+ Requires-Dist: playwright (>=1.44.0,<2.0.0)
30
30
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
31
  Requires-Dist: puremagic (>=1.23,<2.0)
32
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
33
33
  Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
34
- Requires-Dist: setuptools (>=69.5.1,<70.0.0)
34
+ Requires-Dist: setuptools (>=70.0.0,<71.0.0)
35
35
  Requires-Dist: tzdata (>=2024.1,<2025.0)
36
36
  Requires-Dist: w3lib (>=2.1.2,<3.0.0)
37
37
  Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
@@ -474,7 +474,7 @@ class Capture():
474
474
  if await page.locator("#didomi-notice-agree-button").is_visible():
475
475
  await page.locator("#didomi-notice-agree-button").click(timeout=2000)
476
476
 
477
- await page.add_locator_handler(page.locator(".didomi-popup-view").last, handler)
477
+ await page.add_locator_handler(page.locator(".didomi-popup-view").last, handler, times=1, no_wait_after=True)
478
478
  self.logger.info('Didomi handler added')
479
479
 
480
480
  async def __dialog_onetrust_clickthrough(self, page: Page) -> None:
@@ -484,7 +484,8 @@ class Capture():
484
484
 
485
485
  await page.add_locator_handler(
486
486
  page.locator('#onetrust-banner-sdk').last,
487
- handler
487
+ handler,
488
+ times=1, no_wait_after=True
488
489
  )
489
490
  self.logger.info('OT handler added')
490
491
 
@@ -495,7 +496,8 @@ class Capture():
495
496
 
496
497
  await page.add_locator_handler(
497
498
  page.locator('#hs-eu-cookie-confirmation').last,
498
- handler
499
+ handler,
500
+ times=1, no_wait_after=True
499
501
  )
500
502
  self.logger.info('HS handler added')
501
503
 
@@ -506,7 +508,8 @@ class Capture():
506
508
 
507
509
  await page.add_locator_handler(
508
510
  page.locator('#CybotCookiebotDialogBody'),
509
- handler
511
+ handler,
512
+ times=1, no_wait_after=True
510
513
  )
511
514
  self.logger.info('Cookiebot handler added')
512
515
 
@@ -522,7 +525,8 @@ class Capture():
522
525
 
523
526
  await page.add_locator_handler(
524
527
  page.get_by_role("alertdialog").last,
525
- handler
528
+ handler,
529
+ times=1, no_wait_after=True
526
530
  )
527
531
  self.logger.info('alert dialog handler added')
528
532
 
@@ -541,18 +545,20 @@ class Capture():
541
545
  self.logger.info('Consent window found, but no button to click through.')
542
546
  await page.add_locator_handler(
543
547
  page.get_by_role("dialog").last,
544
- handler
548
+ handler,
549
+ times=1, no_wait_after=True
545
550
  )
546
551
  self.logger.info('dialog handler added')
547
552
 
548
553
  async def __dialog_complianz_clickthrough(self, page: Page) -> None:
549
554
  async def handler() -> None:
550
- if await page.locator('.cmplz-show').locator("button.cmplz-accept").is_visible():
551
- await page.locator('.cmplz-show').locator("button.cmplz-accept").click(timeout=2000)
555
+ if await page.locator('.cmplz-show').first.locator("button.cmplz-accept").is_visible():
556
+ await page.locator('.cmplz-show').first.locator("button.cmplz-accept").click(timeout=2000)
552
557
 
553
558
  await page.add_locator_handler(
554
- page.locator('.cmplz-show'),
555
- handler
559
+ page.locator('.cmplz-show').first,
560
+ handler,
561
+ times=1, no_wait_after=True
556
562
  )
557
563
  self.logger.info('Complianz handler added')
558
564
 
@@ -563,7 +569,8 @@ class Capture():
563
569
 
564
570
  await page.add_locator_handler(
565
571
  page.locator('.con-wizard'),
566
- handler
572
+ handler,
573
+ times=1, no_wait_after=True
567
574
  )
568
575
  self.logger.info('Yahoo handler added')
569
576
 
@@ -574,7 +581,8 @@ class Capture():
574
581
 
575
582
  await page.add_locator_handler(
576
583
  page.locator('#ppms_cm_popup_overlay'),
577
- handler
584
+ handler,
585
+ times=1, no_wait_after=True
578
586
  )
579
587
  self.logger.info('Yahoo handler added')
580
588
 
@@ -644,7 +652,13 @@ class Capture():
644
652
  capturing_sub = True
645
653
  else:
646
654
  capturing_sub = False
647
- page = await self.context.new_page()
655
+ try:
656
+ page = await self.context.new_page()
657
+ except Error as e:
658
+ self.logger.warning(f'The context is in a broken state: {e}')
659
+ self.should_retry = True
660
+ return to_return
661
+
648
662
  if allow_tracking:
649
663
  # Add authorization clickthroughs
650
664
  await self.__dialog_didomi_clickthrough(page)
@@ -704,8 +718,13 @@ class Capture():
704
718
  else:
705
719
  raise initial_error
706
720
  else:
707
- await page.bring_to_front()
708
- self.logger.debug('Page moved to front.')
721
+ try:
722
+ await page.bring_to_front()
723
+ self.logger.debug('Page moved to front.')
724
+ except Error as e:
725
+ self.should_retry = True
726
+ self.logger.warning('Page in a broken state.')
727
+ raise e
709
728
 
710
729
  # page instrumentation
711
730
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after document loaded
@@ -902,6 +921,7 @@ class Capture():
902
921
  'Navigation failed because page was closed!',
903
922
  'Target page, context or browser has been closed',
904
923
  'Protocol error (Page.bringToFront): Not attached to an active page',
924
+ 'Peer failed to perform TLS handshake: A packet with illegal or unsupported version was received.',
905
925
  'Peer failed to perform TLS handshake: The TLS connection was non-properly terminated.',
906
926
  'Peer failed to perform TLS handshake: Error sending data: Connection reset by peer',
907
927
  'Peer failed to perform TLS handshake: Error receiving data: Connection reset by peer',
@@ -909,7 +929,10 @@ class Capture():
909
929
  'Peer sent fatal TLS alert: Internal error',
910
930
  'Load cannot follow more than 20 redirections',
911
931
  'Page crashed',
912
- 'Error receiving data: Connection reset by peer']:
932
+ 'Error receiving data: Connection reset by peer',
933
+ 'Internal SOCKSv5 proxy server error.',
934
+ 'Host unreachable through SOCKSv5 server.',
935
+ 'HTTP/2 Error: NO_ERROR']:
913
936
  # Other errors, let's give it another shot
914
937
  self.logger.info(f'Issue with {url} (retrying): {e.message}')
915
938
  self.should_retry = True
@@ -1155,6 +1178,7 @@ class Capture():
1155
1178
  'NS_ERROR_ABORT',
1156
1179
  'NS_ERROR_CONNECTION_REFUSED',
1157
1180
  'NS_ERROR_NET_INTERRUPT',
1181
+ 'NS_ERROR_NET_PARTIAL_TRANSFER',
1158
1182
  'NS_ERROR_NET_RESET',
1159
1183
  'NS_ERROR_NET_TIMEOUT',
1160
1184
  'NS_ERROR_REDIRECT_LOOP',
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "PlaywrightCapture"
3
- version = "1.24.9"
3
+ version = "1.24.11"
4
4
  description = "A simple library to capture websites using playwright"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -19,7 +19,7 @@ classifiers=[
19
19
 
20
20
  [tool.poetry.dependencies]
21
21
  python = "^3.8"
22
- playwright = "^1.43.0"
22
+ playwright = "^1.44.0"
23
23
  dateparser = "^1.2.0"
24
24
  beautifulsoup4 = {version= "^4.12.3", extras = ["lxml", "charset_normalizer"]}
25
25
  w3lib = "^2.1.2"
@@ -28,7 +28,7 @@ SpeechRecognition = {version = "^3.10.4", optional = true}
28
28
  pytz = {"version" = "^2024.1", python = "<3.9"}
29
29
  tzdata = "^2024.1"
30
30
  playwright-stealth = "^1.0.6"
31
- setuptools = "^69.5.1"
31
+ setuptools = "^70.0.0"
32
32
  puremagic = "^1.23"
33
33
  async-timeout = {version = "^4.0.3", python = "<3.11"}
34
34
  aiohttp = {extras = ["speedups"], version = "^3.9.5"}
@@ -42,7 +42,7 @@ optional = true
42
42
 
43
43
  [tool.poetry.group.dev.dependencies]
44
44
  types-beautifulsoup4 = "^4.12.0.20240511"
45
- pytest = "^8.2.0"
45
+ pytest = "^8.2.1"
46
46
  mypy = "^1.10.0"
47
47
  types-dateparser = "^1.2.0.20240420"
48
48
  types-pytz = "^2024.1.0.20240417"