PlaywrightCapture 1.24.2__tar.gz → 1.24.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/PKG-INFO +2 -2
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/playwrightcapture/capture.py +27 -6
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/pyproject.toml +2 -2
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/LICENSE +0 -0
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/README.md +0 -0
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/playwrightcapture/__init__.py +0 -0
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/playwrightcapture/exceptions.py +0 -0
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/playwrightcapture/helpers.py +0 -0
- {playwrightcapture-1.24.2 → playwrightcapture-1.24.4}/playwrightcapture/py.typed +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.24.
|
3
|
+
Version: 1.24.4
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
5
|
Home-page: https://github.com/Lookyloo/PlaywrightCapture
|
6
6
|
License: BSD-3-Clause
|
@@ -29,7 +29,7 @@ Requires-Dist: puremagic (>=1.21,<2.0)
|
|
29
29
|
Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
|
30
30
|
Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
|
31
31
|
Requires-Dist: requests[socks] (>=2.31.0,<3.0.0) ; extra == "recaptcha"
|
32
|
-
Requires-Dist: setuptools (>=69.
|
32
|
+
Requires-Dist: setuptools (>=69.5.1,<70.0.0)
|
33
33
|
Requires-Dist: tzdata (>=2024.1,<2025.0)
|
34
34
|
Requires-Dist: w3lib (>=2.1.2,<3.0.0)
|
35
35
|
Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
|
@@ -681,7 +681,10 @@ class Capture():
|
|
681
681
|
self.should_retry = True
|
682
682
|
except Exception:
|
683
683
|
raise e
|
684
|
-
|
684
|
+
else:
|
685
|
+
if not self._exception_is_network_error(initial_error):
|
686
|
+
# TODO: Do something?
|
687
|
+
self.logger.warning(f'Unexpected error: {initial_error}')
|
685
688
|
raise initial_error
|
686
689
|
else:
|
687
690
|
await page.bring_to_front()
|
@@ -718,8 +721,8 @@ class Capture():
|
|
718
721
|
if allow_tracking:
|
719
722
|
await self._wait_for_random_timeout(page, 2)
|
720
723
|
# This event is required trigger the add_locator_handler
|
721
|
-
if await page.locator("body").is_visible():
|
722
|
-
await page.locator("body").click(button="right", timeout=2000)
|
724
|
+
if await page.locator("body").first.is_visible():
|
725
|
+
await page.locator("body").first.click(button="right", timeout=2000)
|
723
726
|
|
724
727
|
# move mouse
|
725
728
|
await page.mouse.move(x=random.uniform(300, 800), y=random.uniform(200, 500))
|
@@ -864,7 +867,9 @@ class Capture():
|
|
864
867
|
'Connection closed',
|
865
868
|
'Navigation interrupted by another one',
|
866
869
|
'Navigation failed because page was closed!',
|
867
|
-
'Protocol error (Page.bringToFront): Not attached to an active page'
|
870
|
+
'Protocol error (Page.bringToFront): Not attached to an active page',
|
871
|
+
'Peer failed to perform TLS handshake: The TLS connection was non-properly terminated.',
|
872
|
+
'Load cannot follow more than 20 redirections']:
|
868
873
|
# Other errors, let's give it another shot
|
869
874
|
self.logger.info(f'Issue with {url} (retrying): {e.message}')
|
870
875
|
self.should_retry = True
|
@@ -872,6 +877,9 @@ class Capture():
|
|
872
877
|
# The browser barfed, let's try again
|
873
878
|
self.logger.info(f'Browser barfed on {url} (retrying): {e.message}')
|
874
879
|
self.should_retry = True
|
880
|
+
elif e.name in ['net::ERR_INVALID_AUTH_CREDENTIALS']:
|
881
|
+
# No need to retry, the credentials are wrong/missing.
|
882
|
+
pass
|
875
883
|
else:
|
876
884
|
# Unexpected ones
|
877
885
|
self.logger.exception(f'Something went poorly with {url}: {e.message}')
|
@@ -932,8 +940,8 @@ class Capture():
|
|
932
940
|
max_wait = force_max_wait_in_sec
|
933
941
|
else:
|
934
942
|
max_wait = self._capture_timeout / self.__network_not_idle
|
935
|
-
max_wait *= 1000
|
936
943
|
self.logger.debug(f'Waiting for network idle, max wait: {max_wait}s')
|
944
|
+
max_wait *= 1000
|
937
945
|
# If we don't have networkidle relatively quick, it's probably because we're playing a video.
|
938
946
|
await page.wait_for_load_state('networkidle', timeout=max_wait)
|
939
947
|
except PlaywrightTimeoutError:
|
@@ -1081,6 +1089,10 @@ class Capture():
|
|
1081
1089
|
elif '; ' in name:
|
1082
1090
|
name, _ = name.split('; ', maxsplit=1)
|
1083
1091
|
# This is kinda dirty.
|
1092
|
+
|
1093
|
+
# The format changed in Playwright 1.43.0, the name of the method that failed is set before the exception itself.
|
1094
|
+
if ': ' in name:
|
1095
|
+
_, name = name.split(': ', maxsplit=1)
|
1084
1096
|
exception._name = name.strip()
|
1085
1097
|
|
1086
1098
|
def _exception_is_network_error(self, exception: Error) -> bool:
|
@@ -1099,8 +1111,11 @@ class Capture():
|
|
1099
1111
|
'net::ERR_CONNECTION_CLOSED',
|
1100
1112
|
'net::ERR_CONNECTION_REFUSED',
|
1101
1113
|
'net::ERR_CONNECTION_RESET',
|
1114
|
+
'net::ERR_CONNECTION_TIMED_OUT',
|
1102
1115
|
'net::ERR_EMPTY_RESPONSE',
|
1116
|
+
'net::ERR_HTTP_RESPONSE_CODE_FAILURE',
|
1103
1117
|
'net::ERR_HTTP2_PROTOCOL_ERROR',
|
1118
|
+
'net::ERR_INVALID_RESPONSE',
|
1104
1119
|
'net::ERR_NAME_NOT_RESOLVED',
|
1105
1120
|
'net::ERR_SOCKS_CONNECTION_FAILED',
|
1106
1121
|
'net::ERR_SSL_UNRECOGNIZED_NAME_ALERT',
|
@@ -1247,8 +1262,14 @@ class Capture():
|
|
1247
1262
|
# unable to identify the mimetype
|
1248
1263
|
self.logger.debug(f'Unable to identify the mimetype for favicon from {u}')
|
1249
1264
|
else:
|
1250
|
-
if mimetype
|
1265
|
+
if not mimetype:
|
1266
|
+
# empty, ignore
|
1267
|
+
pass
|
1268
|
+
elif mimetype.startswith('image'):
|
1251
1269
|
to_return.add(favicon)
|
1270
|
+
elif mimetype.startswith('text'):
|
1271
|
+
# Just ignore, it's probably a 404 page
|
1272
|
+
pass
|
1252
1273
|
else:
|
1253
1274
|
self.logger.warning(f'Unexpected mimetype for favicon from {u}: {mimetype}')
|
1254
1275
|
self.logger.debug(f'Done with favicon from {u}.')
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "PlaywrightCapture"
|
3
|
-
version = "1.24.
|
3
|
+
version = "1.24.4"
|
4
4
|
description = "A simple library to capture websites using playwright"
|
5
5
|
authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
|
6
6
|
license = "BSD-3-Clause"
|
@@ -29,7 +29,7 @@ SpeechRecognition = {version = "^3.10.3", optional = true}
|
|
29
29
|
pytz = {"version" = "^2024.1", python = "<3.9"}
|
30
30
|
tzdata = "^2024.1"
|
31
31
|
playwright-stealth = "^1.0.6"
|
32
|
-
setuptools = "^69.
|
32
|
+
setuptools = "^69.5.1"
|
33
33
|
puremagic = "^1.21"
|
34
34
|
|
35
35
|
[tool.poetry.extras]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|