PlaywrightCapture 1.28.4__py3-none-any.whl → 1.28.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +60 -53
- {playwrightcapture-1.28.4.dist-info → playwrightcapture-1.28.6.dist-info}/METADATA +1 -1
- {playwrightcapture-1.28.4.dist-info → playwrightcapture-1.28.6.dist-info}/RECORD +5 -5
- {playwrightcapture-1.28.4.dist-info → playwrightcapture-1.28.6.dist-info}/LICENSE +0 -0
- {playwrightcapture-1.28.4.dist-info → playwrightcapture-1.28.6.dist-info}/WHEEL +0 -0
playwrightcapture/capture.py
CHANGED
@@ -159,8 +159,9 @@ class Capture():
|
|
159
159
|
master_logger = logging.getLogger('playwrightcapture')
|
160
160
|
master_logger.setLevel(loglevel)
|
161
161
|
self.logger: Logger | PlaywrightCaptureLogAdapter
|
162
|
-
|
163
|
-
|
162
|
+
self.uuid = uuid
|
163
|
+
if self.uuid is not None:
|
164
|
+
self.logger = PlaywrightCaptureLogAdapter(master_logger, {'uuid': self.uuid})
|
164
165
|
else:
|
165
166
|
self.logger = master_logger
|
166
167
|
self.browser_name: BROWSER = browser if browser else 'chromium'
|
@@ -934,6 +935,7 @@ class Capture():
|
|
934
935
|
) -> CaptureResponse:
|
935
936
|
|
936
937
|
to_return: CaptureResponse = {}
|
938
|
+
errors: list[str] = []
|
937
939
|
got_favicons = False
|
938
940
|
|
939
941
|
# We don't need to be super strict on the lock, as it simply triggers a wait for network idle before stoping the capture
|
@@ -996,6 +998,7 @@ class Capture():
|
|
996
998
|
except Error as e:
|
997
999
|
self.logger.warning(f'Unable to create new page, the context is in a broken state: {e}')
|
998
1000
|
self.should_retry = True
|
1001
|
+
to_return['error'] = f'Unable to create new page: {e}'
|
999
1002
|
return to_return
|
1000
1003
|
|
1001
1004
|
if allow_tracking:
|
@@ -1049,8 +1052,8 @@ class Capture():
|
|
1049
1052
|
error_msg = download.failure()
|
1050
1053
|
if not error_msg:
|
1051
1054
|
raise e
|
1052
|
-
|
1053
|
-
self.logger.info(
|
1055
|
+
errors.append(f"Error while downloading: {error_msg}")
|
1056
|
+
self.logger.info(f'Error while downloading: {error_msg}')
|
1054
1057
|
self.should_retry = True
|
1055
1058
|
except Exception:
|
1056
1059
|
raise e
|
@@ -1073,20 +1076,6 @@ class Capture():
|
|
1073
1076
|
except Exception as e:
|
1074
1077
|
self.logger.exception(f'Error during instrumentation: {e}')
|
1075
1078
|
|
1076
|
-
if multiple_downloads:
|
1077
|
-
if len(multiple_downloads) == 1:
|
1078
|
-
to_return["downloaded_filename"] = multiple_downloads[0][0]
|
1079
|
-
to_return["downloaded_file"] = multiple_downloads[0][1]
|
1080
|
-
else:
|
1081
|
-
# we have multiple downloads, making it a zip
|
1082
|
-
mem_zip = BytesIO()
|
1083
|
-
to_return["downloaded_filename"] = 'multiple_downloads.zip'
|
1084
|
-
with ZipFile(mem_zip, 'w') as z:
|
1085
|
-
for i, f_details in enumerate(multiple_downloads):
|
1086
|
-
filename, file_content = f_details
|
1087
|
-
z.writestr(f'{i}_{filename}', file_content)
|
1088
|
-
to_return["downloaded_file"] = mem_zip.getvalue()
|
1089
|
-
|
1090
1079
|
if content := await self._failsafe_get_content(page):
|
1091
1080
|
to_return['html'] = content
|
1092
1081
|
|
@@ -1150,7 +1139,7 @@ class Capture():
|
|
1150
1139
|
if consecutive_errors >= 5:
|
1151
1140
|
# if we have more than 5 consecutive errors, the capture is most probably broken, breaking.
|
1152
1141
|
self.logger.warning('Got more than 5 consecutive errors while capturing children, breaking.')
|
1153
|
-
|
1142
|
+
errors.append("Got more than 5 consecutive errors while capturing children")
|
1154
1143
|
self.should_retry = True
|
1155
1144
|
break
|
1156
1145
|
|
@@ -1162,19 +1151,19 @@ class Capture():
|
|
1162
1151
|
self.logger.info(f'Unable to go back: {e}.')
|
1163
1152
|
|
1164
1153
|
except PlaywrightTimeoutError as e:
|
1165
|
-
|
1154
|
+
errors.append(f"The capture took too long - {e.message}")
|
1166
1155
|
self.should_retry = True
|
1167
1156
|
except (asyncio.TimeoutError, TimeoutError):
|
1168
|
-
|
1157
|
+
errors.append("Something in the capture took too long")
|
1169
1158
|
self.should_retry = True
|
1170
1159
|
except TargetClosedError as e:
|
1171
|
-
|
1160
|
+
errors.append(f"The target was closed - {e}")
|
1172
1161
|
self.should_retry = True
|
1173
1162
|
except Error as e:
|
1174
|
-
# NOTE: there are a lot of errors that look like duplicates and they are
|
1175
|
-
# it is tricky to figure our which one
|
1163
|
+
# NOTE: there are a lot of errors that look like duplicates and they are triggered at different times in the process.
|
1164
|
+
# it is tricky to figure our which one should (and should not) trigger a retry. Below is our best guess and it will change over time.
|
1176
1165
|
self._update_exceptions(e)
|
1177
|
-
|
1166
|
+
errors.append(e.message)
|
1178
1167
|
to_return['error_name'] = e.name
|
1179
1168
|
# TODO: check e.message and figure out if it is worth retrying or not.
|
1180
1169
|
# NOTE: e.name is generally (always?) "Error"
|
@@ -1183,6 +1172,7 @@ class Capture():
|
|
1183
1172
|
elif self._retry_network_error(e) or self._retry_browser_error(e):
|
1184
1173
|
# this one sounds like something we can retry...
|
1185
1174
|
self.logger.info(f'Issue with {url} (retrying): {e.message}')
|
1175
|
+
errors.append(f'Issue with {url}: {e.message}')
|
1186
1176
|
self.should_retry = True
|
1187
1177
|
else:
|
1188
1178
|
# Unexpected ones
|
@@ -1190,25 +1180,56 @@ class Capture():
|
|
1190
1180
|
except Exception as e:
|
1191
1181
|
# we may get a non-playwright exception to.
|
1192
1182
|
# The ones we try to handle here should be treated as if they were.
|
1193
|
-
|
1194
|
-
if
|
1183
|
+
errors.append(str(e))
|
1184
|
+
if str(e) in ['Connection closed while reading from the driver']:
|
1195
1185
|
self.logger.info(f'Issue with {url} (retrying): {e}')
|
1186
|
+
errors.append(f'Issue with {url}: {e}')
|
1196
1187
|
self.should_retry = True
|
1197
1188
|
else:
|
1198
1189
|
raise e
|
1199
1190
|
finally:
|
1200
1191
|
self.logger.debug('Finishing up capture.')
|
1201
1192
|
if not capturing_sub:
|
1193
|
+
if multiple_downloads:
|
1194
|
+
if len(multiple_downloads) == 1:
|
1195
|
+
to_return["downloaded_filename"] = multiple_downloads[0][0]
|
1196
|
+
to_return["downloaded_file"] = multiple_downloads[0][1]
|
1197
|
+
else:
|
1198
|
+
# we have multiple downloads, making it a zip, make sure the filename is unique
|
1199
|
+
mem_zip = BytesIO()
|
1200
|
+
to_return["downloaded_filename"] = f'{self.uuid}_multiple_downloads.zip'
|
1201
|
+
with ZipFile(mem_zip, 'w') as z:
|
1202
|
+
for i, f_details in enumerate(multiple_downloads):
|
1203
|
+
filename, file_content = f_details
|
1204
|
+
z.writestr(f'{i}_{filename}', file_content)
|
1205
|
+
to_return["downloaded_file"] = mem_zip.getvalue()
|
1206
|
+
|
1202
1207
|
try:
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1208
|
+
async with timeout(15):
|
1209
|
+
to_return['cookies'] = await self.context.cookies()
|
1210
|
+
except (TimeoutError, asyncio.TimeoutError):
|
1211
|
+
self.logger.warning("Unable to get cookies (timeout).")
|
1212
|
+
errors.append("Unable to get the cookies (timeout).")
|
1213
|
+
self.should_retry = True
|
1214
|
+
except Error as e:
|
1215
|
+
self.logger.warning(f"Unable to get cookies: {e}")
|
1216
|
+
errors.append(f'Unable to get the cookies: {e}')
|
1217
|
+
self.should_retry = True
|
1218
|
+
|
1219
|
+
try:
|
1220
|
+
async with timeout(15):
|
1221
|
+
to_return['storage'] = await self.context.storage_state(indexed_db=True)
|
1222
|
+
except (TimeoutError, asyncio.TimeoutError):
|
1223
|
+
self.logger.warning("Unable to get storage (timeout).")
|
1224
|
+
errors.append("Unable to get the storage (timeout).")
|
1225
|
+
self.should_retry = True
|
1226
|
+
except Error as e:
|
1227
|
+
self.logger.warning(f"Unable to get the storage: {e}")
|
1228
|
+
errors.append(f'Unable to get the storage: {e}')
|
1229
|
+
self.should_retry = True
|
1209
1230
|
# frames_tree = self.make_frame_tree(page.main_frame)
|
1210
1231
|
try:
|
1211
|
-
async with timeout(
|
1232
|
+
async with timeout(30):
|
1212
1233
|
page.remove_listener("requestfinished", store_request)
|
1213
1234
|
await page.close(reason="Closing the page because the capture finished.")
|
1214
1235
|
self.logger.debug('Page closed.')
|
@@ -1219,30 +1240,16 @@ class Capture():
|
|
1219
1240
|
self.logger.debug('Got HAR.')
|
1220
1241
|
except (TimeoutError, asyncio.TimeoutError):
|
1221
1242
|
self.logger.warning("Unable to close page and context at the end of the capture.")
|
1243
|
+
errors.append("Unable to close page and context at the end of the capture.")
|
1222
1244
|
self.should_retry = True
|
1223
1245
|
except Exception as e:
|
1224
1246
|
self.logger.warning(f"Other exception while finishing up the capture: {e}.")
|
1225
|
-
|
1226
|
-
to_return['error'] = f'Unable to generate HAR file: {e}'
|
1247
|
+
errors.append(f'Unable to generate HAR file: {e}')
|
1227
1248
|
self.logger.debug('Capture done')
|
1249
|
+
if errors:
|
1250
|
+
to_return['error'] = '\n'.join(errors)
|
1228
1251
|
return to_return
|
1229
1252
|
|
1230
|
-
async def _failsafe_get_cookies(self) -> list[Cookie] | None:
|
1231
|
-
try:
|
1232
|
-
async with timeout(15):
|
1233
|
-
return await self.context.cookies()
|
1234
|
-
except (TimeoutError, asyncio.TimeoutError):
|
1235
|
-
self.logger.warning("Unable to get cookies (timeout).")
|
1236
|
-
return None
|
1237
|
-
|
1238
|
-
async def _failsafe_get_storage(self) -> StorageState | None:
|
1239
|
-
try:
|
1240
|
-
async with timeout(15):
|
1241
|
-
return await self.context.storage_state(indexed_db=True)
|
1242
|
-
except (TimeoutError, asyncio.TimeoutError):
|
1243
|
-
self.logger.warning("Unable to get storage (timeout).")
|
1244
|
-
return None
|
1245
|
-
|
1246
1253
|
async def _failsafe_get_screenshot(self, page: Page) -> bytes:
|
1247
1254
|
self.logger.debug("Capturing a screenshot of the full page.")
|
1248
1255
|
try:
|
@@ -1293,7 +1300,7 @@ class Capture():
|
|
1293
1300
|
tries = 3
|
1294
1301
|
while tries:
|
1295
1302
|
try:
|
1296
|
-
async with timeout(
|
1303
|
+
async with timeout(15):
|
1297
1304
|
return await page.content()
|
1298
1305
|
except (Error, TimeoutError, asyncio.TimeoutError):
|
1299
1306
|
self.logger.debug('Unable to get page content, trying again.')
|
@@ -1,9 +1,9 @@
|
|
1
1
|
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=
|
2
|
+
playwrightcapture/capture.py,sha256=6gxcAsoy_pDXYwxXg3uNmHyVAM9R9mKuc1oSGrlC4m8,82882
|
3
3
|
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
4
|
playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
|
5
5
|
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
playwrightcapture-1.28.
|
7
|
-
playwrightcapture-1.28.
|
8
|
-
playwrightcapture-1.28.
|
9
|
-
playwrightcapture-1.28.
|
6
|
+
playwrightcapture-1.28.6.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
+
playwrightcapture-1.28.6.dist-info/METADATA,sha256=Bdn-A2yRG4A3i25_jNqCpzSDdutkAK6-QVnYq2rNWjE,3075
|
8
|
+
playwrightcapture-1.28.6.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
9
|
+
playwrightcapture-1.28.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|