PlaywrightCapture 1.24.10__py3-none-any.whl → 1.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -474,7 +474,7 @@ class Capture():
474
474
  if await page.locator("#didomi-notice-agree-button").is_visible():
475
475
  await page.locator("#didomi-notice-agree-button").click(timeout=2000)
476
476
 
477
- await page.add_locator_handler(page.locator(".didomi-popup-view").last, handler)
477
+ await page.add_locator_handler(page.locator(".didomi-popup-view").last, handler, times=1, no_wait_after=True)
478
478
  self.logger.info('Didomi handler added')
479
479
 
480
480
  async def __dialog_onetrust_clickthrough(self, page: Page) -> None:
@@ -484,7 +484,8 @@ class Capture():
484
484
 
485
485
  await page.add_locator_handler(
486
486
  page.locator('#onetrust-banner-sdk').last,
487
- handler
487
+ handler,
488
+ times=1, no_wait_after=True
488
489
  )
489
490
  self.logger.info('OT handler added')
490
491
 
@@ -495,7 +496,8 @@ class Capture():
495
496
 
496
497
  await page.add_locator_handler(
497
498
  page.locator('#hs-eu-cookie-confirmation').last,
498
- handler
499
+ handler,
500
+ times=1, no_wait_after=True
499
501
  )
500
502
  self.logger.info('HS handler added')
501
503
 
@@ -506,7 +508,8 @@ class Capture():
506
508
 
507
509
  await page.add_locator_handler(
508
510
  page.locator('#CybotCookiebotDialogBody'),
509
- handler
511
+ handler,
512
+ times=1, no_wait_after=True
510
513
  )
511
514
  self.logger.info('Cookiebot handler added')
512
515
 
@@ -518,11 +521,12 @@ class Capture():
518
521
  elif await page.locator('#onetrust-button-group').locator("#onetrust-accept-btn-handler").is_visible():
519
522
  await page.locator('#onetrust-button-group').locator("#onetrust-accept-btn-handler").click(timeout=1000)
520
523
  else:
521
- self.logger.info('Consent window found, but no button to click through.')
524
+ self.logger.info('Consent window found (alert dialog), but no button to click through.')
522
525
 
523
526
  await page.add_locator_handler(
524
527
  page.get_by_role("alertdialog").last,
525
- handler
528
+ handler,
529
+ times=1, no_wait_after=True
526
530
  )
527
531
  self.logger.info('alert dialog handler added')
528
532
 
@@ -538,21 +542,23 @@ class Capture():
538
542
  self.logger.info('Consent window found, clicking through.')
539
543
  await page.get_by_test_id("uc-accept-all-button").click(timeout=2000)
540
544
  else:
541
- self.logger.info('Consent window found, but no button to click through.')
545
+ self.logger.info('Consent window found (dialog), but no button to click through.')
542
546
  await page.add_locator_handler(
543
547
  page.get_by_role("dialog").last,
544
- handler
548
+ handler,
549
+ times=1, no_wait_after=True
545
550
  )
546
551
  self.logger.info('dialog handler added')
547
552
 
548
553
  async def __dialog_complianz_clickthrough(self, page: Page) -> None:
549
554
  async def handler() -> None:
550
- if await page.locator('.cmplz-show').locator("button.cmplz-accept").is_visible():
551
- await page.locator('.cmplz-show').locator("button.cmplz-accept").click(timeout=2000)
555
+ if await page.locator('.cmplz-show').first.locator("button.cmplz-accept").is_visible():
556
+ await page.locator('.cmplz-show').first.locator("button.cmplz-accept").click(timeout=2000)
552
557
 
553
558
  await page.add_locator_handler(
554
- page.locator('.cmplz-show'),
555
- handler
559
+ page.locator('.cmplz-show').first,
560
+ handler,
561
+ times=1, no_wait_after=True
556
562
  )
557
563
  self.logger.info('Complianz handler added')
558
564
 
@@ -563,7 +569,8 @@ class Capture():
563
569
 
564
570
  await page.add_locator_handler(
565
571
  page.locator('.con-wizard'),
566
- handler
572
+ handler,
573
+ times=1, no_wait_after=True
567
574
  )
568
575
  self.logger.info('Yahoo handler added')
569
576
 
@@ -574,9 +581,10 @@ class Capture():
574
581
 
575
582
  await page.add_locator_handler(
576
583
  page.locator('#ppms_cm_popup_overlay'),
577
- handler
584
+ handler,
585
+ times=1, no_wait_after=True
578
586
  )
579
- self.logger.info('Yahoo handler added')
587
+ self.logger.info('Piwik handler added')
580
588
 
581
589
  async def capture_page(self, url: str, *, max_depth_capture_time: int,
582
590
  referer: str | None=None,
@@ -644,7 +652,13 @@ class Capture():
644
652
  capturing_sub = True
645
653
  else:
646
654
  capturing_sub = False
647
- page = await self.context.new_page()
655
+ try:
656
+ page = await self.context.new_page()
657
+ except Error as e:
658
+ self.logger.warning(f'The context is in a broken state: {e}')
659
+ self.should_retry = True
660
+ return to_return
661
+
648
662
  if allow_tracking:
649
663
  # Add authorization clickthroughs
650
664
  await self.__dialog_didomi_clickthrough(page)
@@ -704,8 +718,12 @@ class Capture():
704
718
  else:
705
719
  raise initial_error
706
720
  else:
707
- await page.bring_to_front()
708
- self.logger.debug('Page moved to front.')
721
+ try:
722
+ await page.bring_to_front()
723
+ self.logger.debug('Page moved to front.')
724
+ except Error as e:
725
+ self.logger.warning('Page in a broken state.')
726
+ raise e
709
727
 
710
728
  # page instrumentation
711
729
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after document loaded
@@ -738,8 +756,11 @@ class Capture():
738
756
  if allow_tracking:
739
757
  await self._wait_for_random_timeout(page, 2)
740
758
  # This event is required trigger the add_locator_handler
741
- if await page.locator("body").first.is_visible():
742
- await page.locator("body").first.click(button="right", timeout=2000)
759
+ try:
760
+ if await page.locator("body").first.is_visible():
761
+ await page.locator("body").first.click(button="right", timeout=5000)
762
+ except Exception as e:
763
+ self.logger.warning(f'Could not find body: {e}')
743
764
 
744
765
  # move mouse
745
766
  await page.mouse.move(x=random.uniform(300, 800), y=random.uniform(200, 500))
@@ -901,15 +922,20 @@ class Capture():
901
922
  'Navigation interrupted by another one',
902
923
  'Navigation failed because page was closed!',
903
924
  'Target page, context or browser has been closed',
904
- 'Protocol error (Page.bringToFront): Not attached to an active page',
925
+ 'Peer failed to perform TLS handshake: A packet with illegal or unsupported version was received.',
905
926
  'Peer failed to perform TLS handshake: The TLS connection was non-properly terminated.',
906
927
  'Peer failed to perform TLS handshake: Error sending data: Connection reset by peer',
907
928
  'Peer failed to perform TLS handshake: Error receiving data: Connection reset by peer',
908
- 'Peer sent fatal TLS alert: The server name sent was not recognized',
929
+ 'Peer sent fatal TLS alert: Handshake failed',
909
930
  'Peer sent fatal TLS alert: Internal error',
931
+ 'Peer sent fatal TLS alert: The server name sent was not recognized',
910
932
  'Load cannot follow more than 20 redirections',
911
933
  'Page crashed',
912
- 'Error receiving data: Connection reset by peer']:
934
+ 'Error receiving data: Connection reset by peer',
935
+ 'Internal SOCKSv5 proxy server error.',
936
+ 'Host unreachable through SOCKSv5 server.',
937
+ 'HTTP/2 Error: NO_ERROR',
938
+ 'HTTP/2 Error: PROTOCOL_ERROR']:
913
939
  # Other errors, let's give it another shot
914
940
  self.logger.info(f'Issue with {url} (retrying): {e.message}')
915
941
  self.should_retry = True
@@ -919,16 +945,18 @@ class Capture():
919
945
  self.should_retry = True
920
946
  elif e.name in ['net::ERR_INVALID_AUTH_CREDENTIALS',
921
947
  'net::ERR_BAD_SSL_CLIENT_AUTH_CERT',
922
- 'net::ERR_UNEXPECTED_PROXY_AUTH']:
923
- # No need to retry, the credentials are wrong/missing.
948
+ 'net::ERR_CERT_DATE_INVALID',
949
+ 'net::ERR_UNEXPECTED_PROXY_AUTH',
950
+ 'net::ERR_UNSAFE_PORT']:
951
+ # No need to retry, the credentials/certs are wrong/missing.
924
952
  pass
925
- elif e.name and any([msg in e.name for msg in ['is interrupted by another navigation to']]):
953
+ elif e.name and any([msg in e.name for msg in ['is interrupted by another navigation to', 'Page.bringToFront']]):
926
954
  self.should_retry = True
927
955
  elif e.name and any([msg in e.name for msg in ['Error resolving', 'Could not connect to']]):
928
956
  pass
929
957
  else:
930
958
  # Unexpected ones
931
- self.logger.exception(f'Something went poorly with {url}: {e.message}')
959
+ self.logger.exception(f'Something went poorly with {url}: "{e.name}" - {e.message}')
932
960
  except Exception as e:
933
961
  # we may get a non-playwright exception to.
934
962
  # The ones we try to handle here should be treated as if they were.
@@ -1155,6 +1183,7 @@ class Capture():
1155
1183
  'NS_ERROR_ABORT',
1156
1184
  'NS_ERROR_CONNECTION_REFUSED',
1157
1185
  'NS_ERROR_NET_INTERRUPT',
1186
+ 'NS_ERROR_NET_PARTIAL_TRANSFER',
1158
1187
  'NS_ERROR_NET_RESET',
1159
1188
  'NS_ERROR_NET_TIMEOUT',
1160
1189
  'NS_ERROR_REDIRECT_LOOP',
@@ -1170,8 +1199,13 @@ class Capture():
1170
1199
  'net::ERR_EMPTY_RESPONSE',
1171
1200
  'net::ERR_HTTP_RESPONSE_CODE_FAILURE',
1172
1201
  'net::ERR_HTTP2_PROTOCOL_ERROR',
1202
+ 'net::ERR_INVALID_REDIRECT',
1173
1203
  'net::ERR_INVALID_RESPONSE',
1174
1204
  'net::ERR_NAME_NOT_RESOLVED',
1205
+ 'net::ERR_NETWORK_ACCESS_DENIED',
1206
+ 'net::ERR_QUIC_PROTOCOL_ERROR',
1207
+ 'net::ERR_RESPONSE_HEADERS_TRUNCATED',
1208
+ 'net::ERR_SOCKET_NOT_CONNECTED',
1175
1209
  'net::ERR_SOCKS_CONNECTION_FAILED',
1176
1210
  'net::ERR_SSL_KEY_USAGE_INCOMPATIBLE',
1177
1211
  'net::ERR_SSL_PROTOCOL_ERROR',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.24.10
3
+ Version: 1.25.0
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -26,14 +26,14 @@ Requires-Dist: aiohttp[speedups] (>=3.9.5,<4.0.0)
26
26
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
27
27
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
29
- Requires-Dist: playwright (>=1.44.0,<2.0.0)
29
+ Requires-Dist: playwright (>=1.45.0,<2.0.0)
30
30
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
- Requires-Dist: puremagic (>=1.23,<2.0)
31
+ Requires-Dist: puremagic (>=1.25,<2.0)
32
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
33
33
  Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
34
- Requires-Dist: setuptools (>=69.5.1,<70.0.0)
34
+ Requires-Dist: setuptools (>=70.2.0,<71.0.0)
35
35
  Requires-Dist: tzdata (>=2024.1,<2025.0)
36
- Requires-Dist: w3lib (>=2.1.2,<3.0.0)
36
+ Requires-Dist: w3lib (>=2.2.1,<3.0.0)
37
37
  Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
38
38
  Description-Content-Type: text/markdown
39
39
 
@@ -0,0 +1,9 @@
1
+ playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
+ playwrightcapture/capture.py,sha256=zzoZQItpKDbxpfF0PqFANfeWTmQlSwnvChuz_l1Ah-I,67333
3
+ playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
+ playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
+ playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ playwrightcapture-1.25.0.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
+ playwrightcapture-1.25.0.dist-info/METADATA,sha256=XBYGqQxi3Qvc-ktd1lLGFBfSKRmCLkH5UkbzNPeL8kA,3173
8
+ playwrightcapture-1.25.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
+ playwrightcapture-1.25.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=SQKfPz_PoySwvW3GCMRTTsElYVgZ5c9lB55srIxis8s,65604
3
- playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
- playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
- playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- playwrightcapture-1.24.10.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
- playwrightcapture-1.24.10.dist-info/METADATA,sha256=wd_znffwaPZexymxHzn2Tzl2P0UHFHcbiWeZuSpyHpg,3174
8
- playwrightcapture-1.24.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
- playwrightcapture-1.24.10.dist-info/RECORD,,