PlaywrightCapture 1.24.11__py3-none-any.whl → 1.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -521,7 +521,7 @@ class Capture():
521
521
  elif await page.locator('#onetrust-button-group').locator("#onetrust-accept-btn-handler").is_visible():
522
522
  await page.locator('#onetrust-button-group').locator("#onetrust-accept-btn-handler").click(timeout=1000)
523
523
  else:
524
- self.logger.info('Consent window found, but no button to click through.')
524
+ self.logger.info('Consent window found (alert dialog), but no button to click through.')
525
525
 
526
526
  await page.add_locator_handler(
527
527
  page.get_by_role("alertdialog").last,
@@ -542,7 +542,7 @@ class Capture():
542
542
  self.logger.info('Consent window found, clicking through.')
543
543
  await page.get_by_test_id("uc-accept-all-button").click(timeout=2000)
544
544
  else:
545
- self.logger.info('Consent window found, but no button to click through.')
545
+ self.logger.info('Consent window found (dialog), but no button to click through.')
546
546
  await page.add_locator_handler(
547
547
  page.get_by_role("dialog").last,
548
548
  handler,
@@ -584,7 +584,7 @@ class Capture():
584
584
  handler,
585
585
  times=1, no_wait_after=True
586
586
  )
587
- self.logger.info('Yahoo handler added')
587
+ self.logger.info('Piwik handler added')
588
588
 
589
589
  async def capture_page(self, url: str, *, max_depth_capture_time: int,
590
590
  referer: str | None=None,
@@ -722,7 +722,6 @@ class Capture():
722
722
  await page.bring_to_front()
723
723
  self.logger.debug('Page moved to front.')
724
724
  except Error as e:
725
- self.should_retry = True
726
725
  self.logger.warning('Page in a broken state.')
727
726
  raise e
728
727
 
@@ -757,8 +756,11 @@ class Capture():
757
756
  if allow_tracking:
758
757
  await self._wait_for_random_timeout(page, 2)
759
758
  # This event is required trigger the add_locator_handler
760
- if await page.locator("body").first.is_visible():
761
- await page.locator("body").first.click(button="right", timeout=2000)
759
+ try:
760
+ if await page.locator("body").first.is_visible():
761
+ await page.locator("body").first.click(button="right", timeout=5000)
762
+ except Exception as e:
763
+ self.logger.warning(f'Could not find body: {e}')
762
764
 
763
765
  # move mouse
764
766
  await page.mouse.move(x=random.uniform(300, 800), y=random.uniform(200, 500))
@@ -920,19 +922,20 @@ class Capture():
920
922
  'Navigation interrupted by another one',
921
923
  'Navigation failed because page was closed!',
922
924
  'Target page, context or browser has been closed',
923
- 'Protocol error (Page.bringToFront): Not attached to an active page',
924
925
  'Peer failed to perform TLS handshake: A packet with illegal or unsupported version was received.',
925
926
  'Peer failed to perform TLS handshake: The TLS connection was non-properly terminated.',
926
927
  'Peer failed to perform TLS handshake: Error sending data: Connection reset by peer',
927
928
  'Peer failed to perform TLS handshake: Error receiving data: Connection reset by peer',
928
- 'Peer sent fatal TLS alert: The server name sent was not recognized',
929
+ 'Peer sent fatal TLS alert: Handshake failed',
929
930
  'Peer sent fatal TLS alert: Internal error',
931
+ 'Peer sent fatal TLS alert: The server name sent was not recognized',
930
932
  'Load cannot follow more than 20 redirections',
931
933
  'Page crashed',
932
934
  'Error receiving data: Connection reset by peer',
933
935
  'Internal SOCKSv5 proxy server error.',
934
936
  'Host unreachable through SOCKSv5 server.',
935
- 'HTTP/2 Error: NO_ERROR']:
937
+ 'HTTP/2 Error: NO_ERROR',
938
+ 'HTTP/2 Error: PROTOCOL_ERROR']:
936
939
  # Other errors, let's give it another shot
937
940
  self.logger.info(f'Issue with {url} (retrying): {e.message}')
938
941
  self.should_retry = True
@@ -942,16 +945,18 @@ class Capture():
942
945
  self.should_retry = True
943
946
  elif e.name in ['net::ERR_INVALID_AUTH_CREDENTIALS',
944
947
  'net::ERR_BAD_SSL_CLIENT_AUTH_CERT',
945
- 'net::ERR_UNEXPECTED_PROXY_AUTH']:
946
- # No need to retry, the credentials are wrong/missing.
948
+ 'net::ERR_CERT_DATE_INVALID',
949
+ 'net::ERR_UNEXPECTED_PROXY_AUTH',
950
+ 'net::ERR_UNSAFE_PORT']:
951
+ # No need to retry, the credentials/certs are wrong/missing.
947
952
  pass
948
- elif e.name and any([msg in e.name for msg in ['is interrupted by another navigation to']]):
953
+ elif e.name and any([msg in e.name for msg in ['is interrupted by another navigation to', 'Page.bringToFront']]):
949
954
  self.should_retry = True
950
955
  elif e.name and any([msg in e.name for msg in ['Error resolving', 'Could not connect to']]):
951
956
  pass
952
957
  else:
953
958
  # Unexpected ones
954
- self.logger.exception(f'Something went poorly with {url}: {e.message}')
959
+ self.logger.exception(f'Something went poorly with {url}: "{e.name}" - {e.message}')
955
960
  except Exception as e:
956
961
  # we may get a non-playwright exception to.
957
962
  # The ones we try to handle here should be treated as if they were.
@@ -1194,8 +1199,13 @@ class Capture():
1194
1199
  'net::ERR_EMPTY_RESPONSE',
1195
1200
  'net::ERR_HTTP_RESPONSE_CODE_FAILURE',
1196
1201
  'net::ERR_HTTP2_PROTOCOL_ERROR',
1202
+ 'net::ERR_INVALID_REDIRECT',
1197
1203
  'net::ERR_INVALID_RESPONSE',
1198
1204
  'net::ERR_NAME_NOT_RESOLVED',
1205
+ 'net::ERR_NETWORK_ACCESS_DENIED',
1206
+ 'net::ERR_QUIC_PROTOCOL_ERROR',
1207
+ 'net::ERR_RESPONSE_HEADERS_TRUNCATED',
1208
+ 'net::ERR_SOCKET_NOT_CONNECTED',
1199
1209
  'net::ERR_SOCKS_CONNECTION_FAILED',
1200
1210
  'net::ERR_SSL_KEY_USAGE_INCOMPATIBLE',
1201
1211
  'net::ERR_SSL_PROTOCOL_ERROR',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.24.11
3
+ Version: 1.25.0
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -26,14 +26,14 @@ Requires-Dist: aiohttp[speedups] (>=3.9.5,<4.0.0)
26
26
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
27
27
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
29
- Requires-Dist: playwright (>=1.44.0,<2.0.0)
29
+ Requires-Dist: playwright (>=1.45.0,<2.0.0)
30
30
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
- Requires-Dist: puremagic (>=1.23,<2.0)
31
+ Requires-Dist: puremagic (>=1.25,<2.0)
32
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
33
33
  Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
34
- Requires-Dist: setuptools (>=70.0.0,<71.0.0)
34
+ Requires-Dist: setuptools (>=70.2.0,<71.0.0)
35
35
  Requires-Dist: tzdata (>=2024.1,<2025.0)
36
- Requires-Dist: w3lib (>=2.1.2,<3.0.0)
36
+ Requires-Dist: w3lib (>=2.2.1,<3.0.0)
37
37
  Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
38
38
  Description-Content-Type: text/markdown
39
39
 
@@ -0,0 +1,9 @@
1
+ playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
+ playwrightcapture/capture.py,sha256=zzoZQItpKDbxpfF0PqFANfeWTmQlSwnvChuz_l1Ah-I,67333
3
+ playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
+ playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
+ playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ playwrightcapture-1.25.0.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
+ playwrightcapture-1.25.0.dist-info/METADATA,sha256=XBYGqQxi3Qvc-ktd1lLGFBfSKRmCLkH5UkbzNPeL8kA,3173
8
+ playwrightcapture-1.25.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
+ playwrightcapture-1.25.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=bEbKQKnUT4mPXzgQF8NI6hKK1LmVcBnt2MXB9BXNwgQ,66759
3
- playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
- playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
- playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- playwrightcapture-1.24.11.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
- playwrightcapture-1.24.11.dist-info/METADATA,sha256=c5hVyRttyjdcwwG-CVfDqmqm_a33VWePttyxKtaBJyw,3174
8
- playwrightcapture-1.24.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
- playwrightcapture-1.24.11.dist-info/RECORD,,