PlaywrightCapture 1.28.3__py3-none-any.whl → 1.28.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +35 -19
- {playwrightcapture-1.28.3.dist-info → playwrightcapture-1.28.4.dist-info}/METADATA +2 -2
- playwrightcapture-1.28.4.dist-info/RECORD +9 -0
- {playwrightcapture-1.28.3.dist-info → playwrightcapture-1.28.4.dist-info}/WHEEL +1 -1
- playwrightcapture-1.28.3.dist-info/RECORD +0 -9
- {playwrightcapture-1.28.3.dist-info → playwrightcapture-1.28.4.dist-info}/LICENSE +0 -0
playwrightcapture/capture.py
CHANGED
@@ -736,7 +736,7 @@ class Capture():
|
|
736
736
|
got_button: bool = False
|
737
737
|
try:
|
738
738
|
try:
|
739
|
-
async with timeout(
|
739
|
+
async with timeout(3):
|
740
740
|
if await frame.locator("button.button__acceptAll").is_visible():
|
741
741
|
self.logger.info('Consent window found, clicking through.')
|
742
742
|
got_button = True
|
@@ -746,7 +746,7 @@ class Capture():
|
|
746
746
|
|
747
747
|
for label in labels_to_click:
|
748
748
|
try:
|
749
|
-
async with timeout(
|
749
|
+
async with timeout(3):
|
750
750
|
if await frame.get_by_label(label).is_visible():
|
751
751
|
got_button = True
|
752
752
|
self.logger.debug(f'Got button by label on frame: {label}')
|
@@ -756,7 +756,7 @@ class Capture():
|
|
756
756
|
self.logger.warning(f'Consent timeout (label {label}) : {e}')
|
757
757
|
|
758
758
|
try:
|
759
|
-
async with timeout(
|
759
|
+
async with timeout(3):
|
760
760
|
if await frame.get_by_role("button", name=label).is_visible():
|
761
761
|
got_button = True
|
762
762
|
self.logger.debug(f'Got button by role on frame: {label}')
|
@@ -780,7 +780,15 @@ class Capture():
|
|
780
780
|
except Exception as e:
|
781
781
|
self.logger.info(f'Error while moving time forward: {e}')
|
782
782
|
|
783
|
-
async def __instrumentation(self, page: Page, url: str, allow_tracking: bool
|
783
|
+
async def __instrumentation(self, page: Page, url: str, allow_tracking: bool) -> None:
|
784
|
+
try:
|
785
|
+
# NOTE: the clock must be installed after the page is loaded, otherwise it sometimes cause the complete capture to hang.
|
786
|
+
await page.clock.install()
|
787
|
+
clock_set = True
|
788
|
+
except Error as e:
|
789
|
+
self.logger.warning(f'Unable to install the clock: {e}')
|
790
|
+
clock_set = False
|
791
|
+
|
784
792
|
# page instrumentation
|
785
793
|
await self._wait_for_random_timeout(page, 5) # Wait 5 sec after document loaded
|
786
794
|
self.logger.debug('Start instrumentation.')
|
@@ -923,7 +931,6 @@ class Capture():
|
|
923
931
|
with_screenshot: bool=True,
|
924
932
|
with_favicon: bool=False,
|
925
933
|
allow_tracking: bool=False,
|
926
|
-
clock_set: bool=False
|
927
934
|
) -> CaptureResponse:
|
928
935
|
|
929
936
|
to_return: CaptureResponse = {}
|
@@ -991,13 +998,6 @@ class Capture():
|
|
991
998
|
self.should_retry = True
|
992
999
|
return to_return
|
993
1000
|
|
994
|
-
try:
|
995
|
-
await page.clock.install()
|
996
|
-
clock_set = True
|
997
|
-
except Error as e:
|
998
|
-
self.logger.warning(f'Unable to install the clock: {e}')
|
999
|
-
clock_set = False
|
1000
|
-
|
1001
1001
|
if allow_tracking:
|
1002
1002
|
# Add authorization clickthroughs
|
1003
1003
|
await self.__dialog_didomi_clickthrough(page)
|
@@ -1020,8 +1020,8 @@ class Capture():
|
|
1020
1020
|
|
1021
1021
|
try:
|
1022
1022
|
try:
|
1023
|
-
await page.goto(url, wait_until='domcontentloaded', referer=referer if referer else '')
|
1024
1023
|
page.on("download", handle_download)
|
1024
|
+
await page.goto(url, wait_until='domcontentloaded', referer=referer if referer else '')
|
1025
1025
|
except Error as initial_error:
|
1026
1026
|
self._update_exceptions(initial_error)
|
1027
1027
|
# So this one is really annoying: chromium raises a net::ERR_ABORTED when it hits a download
|
@@ -1066,7 +1066,7 @@ class Capture():
|
|
1066
1066
|
|
1067
1067
|
try:
|
1068
1068
|
if self.headless:
|
1069
|
-
await self.__instrumentation(page, url, allow_tracking
|
1069
|
+
await self.__instrumentation(page, url, allow_tracking)
|
1070
1070
|
else:
|
1071
1071
|
self.logger.debug('Headed mode, skipping instrumentation.')
|
1072
1072
|
await self._wait_for_random_timeout(page, self._capture_timeout - 5)
|
@@ -1134,7 +1134,7 @@ class Capture():
|
|
1134
1134
|
page=page, depth=depth,
|
1135
1135
|
rendered_hostname_only=rendered_hostname_only,
|
1136
1136
|
max_depth_capture_time=max_capture_time,
|
1137
|
-
|
1137
|
+
with_screenshot=with_screenshot)
|
1138
1138
|
to_return['children'].append(child_capture) # type: ignore[union-attr]
|
1139
1139
|
except (TimeoutError, asyncio.TimeoutError):
|
1140
1140
|
self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
|
@@ -1200,12 +1200,12 @@ class Capture():
|
|
1200
1200
|
self.logger.debug('Finishing up capture.')
|
1201
1201
|
if not capturing_sub:
|
1202
1202
|
try:
|
1203
|
-
to_return['storage'] = await self.
|
1204
|
-
to_return['cookies'] = await self.
|
1205
|
-
self.logger.debug('Done with cookies.')
|
1203
|
+
to_return['storage'] = await self._failsafe_get_storage()
|
1204
|
+
to_return['cookies'] = await self._failsafe_get_cookies()
|
1205
|
+
self.logger.debug('Done with cookies and storage.')
|
1206
1206
|
except Exception as e:
|
1207
1207
|
if 'error' not in to_return:
|
1208
|
-
to_return['error'] = f'Unable to get the
|
1208
|
+
to_return['error'] = f'Unable to get the storage: {e}'
|
1209
1209
|
# frames_tree = self.make_frame_tree(page.main_frame)
|
1210
1210
|
try:
|
1211
1211
|
async with timeout(60):
|
@@ -1227,6 +1227,22 @@ class Capture():
|
|
1227
1227
|
self.logger.debug('Capture done')
|
1228
1228
|
return to_return
|
1229
1229
|
|
1230
|
+
async def _failsafe_get_cookies(self) -> list[Cookie] | None:
|
1231
|
+
try:
|
1232
|
+
async with timeout(15):
|
1233
|
+
return await self.context.cookies()
|
1234
|
+
except (TimeoutError, asyncio.TimeoutError):
|
1235
|
+
self.logger.warning("Unable to get cookies (timeout).")
|
1236
|
+
return None
|
1237
|
+
|
1238
|
+
async def _failsafe_get_storage(self) -> StorageState | None:
|
1239
|
+
try:
|
1240
|
+
async with timeout(15):
|
1241
|
+
return await self.context.storage_state(indexed_db=True)
|
1242
|
+
except (TimeoutError, asyncio.TimeoutError):
|
1243
|
+
self.logger.warning("Unable to get storage (timeout).")
|
1244
|
+
return None
|
1245
|
+
|
1230
1246
|
async def _failsafe_get_screenshot(self, page: Page) -> bytes:
|
1231
1247
|
self.logger.debug("Capturing a screenshot of the full page.")
|
1232
1248
|
try:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.28.
|
3
|
+
Version: 1.28.4
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
5
|
License: BSD-3-Clause
|
6
6
|
Author: Raphaël Vinot
|
@@ -20,7 +20,7 @@ Classifier: Topic :: Security
|
|
20
20
|
Provides-Extra: recaptcha
|
21
21
|
Requires-Dist: SpeechRecognition (>=3.14.2) ; extra == "recaptcha"
|
22
22
|
Requires-Dist: aiohttp-socks (>=0.10.1)
|
23
|
-
Requires-Dist: aiohttp[speedups] (>=3.11.
|
23
|
+
Requires-Dist: aiohttp[speedups] (>=3.11.16)
|
24
24
|
Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
|
25
25
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.3)
|
26
26
|
Requires-Dist: dateparser (>=1.2.1)
|
@@ -0,0 +1,9 @@
|
|
1
|
+
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
+
playwrightcapture/capture.py,sha256=Iicc_nNjlztCMGIJ9wSB6UhKoIcVJCh_00BssV68XDU,82297
|
3
|
+
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
|
+
playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
|
5
|
+
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
playwrightcapture-1.28.4.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
+
playwrightcapture-1.28.4.dist-info/METADATA,sha256=o32IMwzDiGFMVmlmaHJF0JTg0p2r5_kZ9KLXuJlVI9M,3075
|
8
|
+
playwrightcapture-1.28.4.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
9
|
+
playwrightcapture-1.28.4.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=ep4zmE0HhV74Cr2iGWq16obQzkIg17wTiGHkEnq6YBc,81644
|
3
|
-
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
|
-
playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
|
5
|
-
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
playwrightcapture-1.28.3.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
-
playwrightcapture-1.28.3.dist-info/METADATA,sha256=lIke_K-KyemmKzUJ12uW0tz_0IwoWF9_KvIPJZGDP7k,3075
|
8
|
-
playwrightcapture-1.28.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
9
|
-
playwrightcapture-1.28.3.dist-info/RECORD,,
|
File without changes
|