PlaywrightCapture 1.28.3__py3-none-any.whl → 1.28.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -736,7 +736,7 @@ class Capture():
736
736
  got_button: bool = False
737
737
  try:
738
738
  try:
739
- async with timeout(5):
739
+ async with timeout(3):
740
740
  if await frame.locator("button.button__acceptAll").is_visible():
741
741
  self.logger.info('Consent window found, clicking through.')
742
742
  got_button = True
@@ -746,7 +746,7 @@ class Capture():
746
746
 
747
747
  for label in labels_to_click:
748
748
  try:
749
- async with timeout(5):
749
+ async with timeout(3):
750
750
  if await frame.get_by_label(label).is_visible():
751
751
  got_button = True
752
752
  self.logger.debug(f'Got button by label on frame: {label}')
@@ -756,7 +756,7 @@ class Capture():
756
756
  self.logger.warning(f'Consent timeout (label {label}) : {e}')
757
757
 
758
758
  try:
759
- async with timeout(5):
759
+ async with timeout(3):
760
760
  if await frame.get_by_role("button", name=label).is_visible():
761
761
  got_button = True
762
762
  self.logger.debug(f'Got button by role on frame: {label}')
@@ -780,7 +780,15 @@ class Capture():
780
780
  except Exception as e:
781
781
  self.logger.info(f'Error while moving time forward: {e}')
782
782
 
783
- async def __instrumentation(self, page: Page, url: str, allow_tracking: bool, clock_set: bool) -> None:
783
+ async def __instrumentation(self, page: Page, url: str, allow_tracking: bool) -> None:
784
+ try:
785
+ # NOTE: the clock must be installed after the page is loaded, otherwise it sometimes cause the complete capture to hang.
786
+ await page.clock.install()
787
+ clock_set = True
788
+ except Error as e:
789
+ self.logger.warning(f'Unable to install the clock: {e}')
790
+ clock_set = False
791
+
784
792
  # page instrumentation
785
793
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after document loaded
786
794
  self.logger.debug('Start instrumentation.')
@@ -923,7 +931,6 @@ class Capture():
923
931
  with_screenshot: bool=True,
924
932
  with_favicon: bool=False,
925
933
  allow_tracking: bool=False,
926
- clock_set: bool=False
927
934
  ) -> CaptureResponse:
928
935
 
929
936
  to_return: CaptureResponse = {}
@@ -991,13 +998,6 @@ class Capture():
991
998
  self.should_retry = True
992
999
  return to_return
993
1000
 
994
- try:
995
- await page.clock.install()
996
- clock_set = True
997
- except Error as e:
998
- self.logger.warning(f'Unable to install the clock: {e}')
999
- clock_set = False
1000
-
1001
1001
  if allow_tracking:
1002
1002
  # Add authorization clickthroughs
1003
1003
  await self.__dialog_didomi_clickthrough(page)
@@ -1020,8 +1020,8 @@ class Capture():
1020
1020
 
1021
1021
  try:
1022
1022
  try:
1023
- await page.goto(url, wait_until='domcontentloaded', referer=referer if referer else '')
1024
1023
  page.on("download", handle_download)
1024
+ await page.goto(url, wait_until='domcontentloaded', referer=referer if referer else '')
1025
1025
  except Error as initial_error:
1026
1026
  self._update_exceptions(initial_error)
1027
1027
  # So this one is really annoying: chromium raises a net::ERR_ABORTED when it hits a download
@@ -1066,7 +1066,7 @@ class Capture():
1066
1066
 
1067
1067
  try:
1068
1068
  if self.headless:
1069
- await self.__instrumentation(page, url, allow_tracking, clock_set)
1069
+ await self.__instrumentation(page, url, allow_tracking)
1070
1070
  else:
1071
1071
  self.logger.debug('Headed mode, skipping instrumentation.')
1072
1072
  await self._wait_for_random_timeout(page, self._capture_timeout - 5)
@@ -1134,7 +1134,7 @@ class Capture():
1134
1134
  page=page, depth=depth,
1135
1135
  rendered_hostname_only=rendered_hostname_only,
1136
1136
  max_depth_capture_time=max_capture_time,
1137
- clock_set=clock_set, with_screenshot=with_screenshot)
1137
+ with_screenshot=with_screenshot)
1138
1138
  to_return['children'].append(child_capture) # type: ignore[union-attr]
1139
1139
  except (TimeoutError, asyncio.TimeoutError):
1140
1140
  self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
@@ -1200,12 +1200,12 @@ class Capture():
1200
1200
  self.logger.debug('Finishing up capture.')
1201
1201
  if not capturing_sub:
1202
1202
  try:
1203
- to_return['storage'] = await self.context.storage_state(indexed_db=True)
1204
- to_return['cookies'] = await self.context.cookies()
1205
- self.logger.debug('Done with cookies.')
1203
+ to_return['storage'] = await self._failsafe_get_storage()
1204
+ to_return['cookies'] = await self._failsafe_get_cookies()
1205
+ self.logger.debug('Done with cookies and storage.')
1206
1206
  except Exception as e:
1207
1207
  if 'error' not in to_return:
1208
- to_return['error'] = f'Unable to get the cookies: {e}'
1208
+ to_return['error'] = f'Unable to get the storage: {e}'
1209
1209
  # frames_tree = self.make_frame_tree(page.main_frame)
1210
1210
  try:
1211
1211
  async with timeout(60):
@@ -1227,6 +1227,22 @@ class Capture():
1227
1227
  self.logger.debug('Capture done')
1228
1228
  return to_return
1229
1229
 
1230
+ async def _failsafe_get_cookies(self) -> list[Cookie] | None:
1231
+ try:
1232
+ async with timeout(15):
1233
+ return await self.context.cookies()
1234
+ except (TimeoutError, asyncio.TimeoutError):
1235
+ self.logger.warning("Unable to get cookies (timeout).")
1236
+ return None
1237
+
1238
+ async def _failsafe_get_storage(self) -> StorageState | None:
1239
+ try:
1240
+ async with timeout(15):
1241
+ return await self.context.storage_state(indexed_db=True)
1242
+ except (TimeoutError, asyncio.TimeoutError):
1243
+ self.logger.warning("Unable to get storage (timeout).")
1244
+ return None
1245
+
1230
1246
  async def _failsafe_get_screenshot(self, page: Page) -> bytes:
1231
1247
  self.logger.debug("Capturing a screenshot of the full page.")
1232
1248
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: PlaywrightCapture
3
- Version: 1.28.3
3
+ Version: 1.28.4
4
4
  Summary: A simple library to capture websites using playwright
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -20,7 +20,7 @@ Classifier: Topic :: Security
20
20
  Provides-Extra: recaptcha
21
21
  Requires-Dist: SpeechRecognition (>=3.14.2) ; extra == "recaptcha"
22
22
  Requires-Dist: aiohttp-socks (>=0.10.1)
23
- Requires-Dist: aiohttp[speedups] (>=3.11.14)
23
+ Requires-Dist: aiohttp[speedups] (>=3.11.16)
24
24
  Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
25
25
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.3)
26
26
  Requires-Dist: dateparser (>=1.2.1)
@@ -0,0 +1,9 @@
1
+ playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
+ playwrightcapture/capture.py,sha256=Iicc_nNjlztCMGIJ9wSB6UhKoIcVJCh_00BssV68XDU,82297
3
+ playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
+ playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
5
+ playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ playwrightcapture-1.28.4.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
+ playwrightcapture-1.28.4.dist-info/METADATA,sha256=o32IMwzDiGFMVmlmaHJF0JTg0p2r5_kZ9KLXuJlVI9M,3075
8
+ playwrightcapture-1.28.4.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
9
+ playwrightcapture-1.28.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,9 +0,0 @@
1
- playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=ep4zmE0HhV74Cr2iGWq16obQzkIg17wTiGHkEnq6YBc,81644
3
- playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
- playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
5
- playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- playwrightcapture-1.28.3.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
- playwrightcapture-1.28.3.dist-info/METADATA,sha256=lIke_K-KyemmKzUJ12uW0tz_0IwoWF9_KvIPJZGDP7k,3075
8
- playwrightcapture-1.28.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
9
- playwrightcapture-1.28.3.dist-info/RECORD,,