PlaywrightCapture 1.25.9__py3-none-any.whl → 1.25.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +48 -27
- {playwrightcapture-1.25.9.dist-info → playwrightcapture-1.25.10.dist-info}/METADATA +4 -4
- playwrightcapture-1.25.10.dist-info/RECORD +9 -0
- playwrightcapture-1.25.9.dist-info/RECORD +0 -9
- {playwrightcapture-1.25.9.dist-info → playwrightcapture-1.25.10.dist-info}/LICENSE +0 -0
- {playwrightcapture-1.25.9.dist-info → playwrightcapture-1.25.10.dist-info}/WHEEL +0 -0
playwrightcapture/capture.py
CHANGED
@@ -96,28 +96,40 @@ class PlaywrightCaptureLogAdapter(LoggerAdapter): # type: ignore[type-arg]
|
|
96
96
|
return msg, kwargs
|
97
97
|
|
98
98
|
|
99
|
+
# good test pages:
|
100
|
+
# https://bot.incolumitas.com/
|
101
|
+
# https://fingerprintjs.github.io/BotD/main/
|
102
|
+
|
99
103
|
@dataclass
|
100
104
|
class PCStealthConfig(StealthConfig): # type: ignore[misc]
|
101
105
|
|
102
106
|
@property
|
103
107
|
def enabled_scripts(self) -> Generator[str, None, None]:
|
104
|
-
self.webdriver = True
|
105
|
-
self.webgl_vendor = True
|
106
108
|
self.chrome_app = True
|
107
109
|
self.chrome_csi = True
|
108
|
-
self.chrome_load_times = True
|
109
110
|
self.chrome_runtime = True
|
111
|
+
self.chrome_load_times = True
|
112
|
+
self.navigator_plugins = True
|
113
|
+
self.hairline = True
|
110
114
|
self.iframe_content_window = True
|
111
115
|
self.media_codecs = True
|
116
|
+
|
117
|
+
# permissions are handled directly in playwright
|
118
|
+
self.navigator_permissions = False
|
119
|
+
# Platform is correct now
|
120
|
+
self.navigator_platform = False
|
121
|
+
# probably useless, but it will fallback to 4 regardless
|
112
122
|
self.navigator_hardware_concurrency = 4
|
123
|
+
# Webgl vendor is correct now
|
124
|
+
self.webgl_vendor = False
|
125
|
+
# Set by the viewport
|
126
|
+
self.outerdimensions = False
|
127
|
+
|
128
|
+
# Not working with Playwright 1.45+
|
113
129
|
self.navigator_languages = False # Causes issue
|
114
|
-
self.navigator_permissions = True
|
115
|
-
self.navigator_platform = True
|
116
|
-
self.navigator_plugins = True
|
117
130
|
self.navigator_user_agent = False # Causes issues
|
118
131
|
self.navigator_vendor = False # Causes issues
|
119
|
-
|
120
|
-
self.hairline = True
|
132
|
+
|
121
133
|
yield from super().enabled_scripts
|
122
134
|
|
123
135
|
|
@@ -184,7 +196,7 @@ class Capture():
|
|
184
196
|
self._viewport: ViewportSize | None = None
|
185
197
|
self._user_agent: str = ''
|
186
198
|
self._timezone_id: str = ''
|
187
|
-
self._locale: str = ''
|
199
|
+
self._locale: str = 'en-US'
|
188
200
|
self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
|
189
201
|
|
190
202
|
def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
|
@@ -462,21 +474,22 @@ class Capture():
|
|
462
474
|
# NOTE: Which perms are supported by which browsers varies
|
463
475
|
# See https://github.com/microsoft/playwright/issues/16577
|
464
476
|
chromium_permissions = [
|
465
|
-
'geolocation',
|
466
|
-
'midi',
|
467
|
-
'midi-sysex',
|
468
|
-
'notifications',
|
469
|
-
'camera',
|
470
|
-
'microphone',
|
471
|
-
'background-sync',
|
472
|
-
'ambient-light-sensor',
|
473
477
|
'accelerometer',
|
474
|
-
'gyroscope',
|
475
|
-
'magnetometer',
|
476
478
|
'accessibility-events',
|
479
|
+
'ambient-light-sensor',
|
480
|
+
'background-sync',
|
481
|
+
'camera',
|
477
482
|
'clipboard-read',
|
478
483
|
'clipboard-write',
|
479
|
-
'
|
484
|
+
'geolocation',
|
485
|
+
'gyroscope',
|
486
|
+
'magnetometer',
|
487
|
+
'microphone',
|
488
|
+
'midi-sysex',
|
489
|
+
'midi',
|
490
|
+
'notifications',
|
491
|
+
'payment-handler',
|
492
|
+
'storage-access'
|
480
493
|
]
|
481
494
|
|
482
495
|
firefox_permissions = ['geolocation', 'notifications']
|
@@ -720,7 +733,6 @@ class Capture():
|
|
720
733
|
await self.__dialog_clickthrough(page)
|
721
734
|
|
722
735
|
await stealth_async(page, PCStealthConfig())
|
723
|
-
# await stealth_async(page)
|
724
736
|
|
725
737
|
page.set_default_timeout((self._capture_timeout - 2) * 1000)
|
726
738
|
# trigger a callback on each request to store it in a dict indexed by URL to get it back from the favicon fetcher
|
@@ -799,6 +811,8 @@ class Capture():
|
|
799
811
|
self.logger.warning(f'Target closed while resolving captcha on {url}: {e}')
|
800
812
|
except Error as e:
|
801
813
|
self.logger.warning(f'Error while resolving captcha on {url}: {e}')
|
814
|
+
except (TimeoutError, asyncio.TimeoutError) as e:
|
815
|
+
self.logger.warning(f'[Timeout] Error while resolving captcha on {url}: {e}')
|
802
816
|
except Exception as e:
|
803
817
|
self.logger.exception(f'General error with captcha solving on {url}: {e}')
|
804
818
|
# ======
|
@@ -905,8 +919,13 @@ class Capture():
|
|
905
919
|
to_return['html'] = content
|
906
920
|
|
907
921
|
if 'html' in to_return and to_return['html'] is not None and with_favicon:
|
908
|
-
|
909
|
-
|
922
|
+
try:
|
923
|
+
to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
|
924
|
+
got_favicons = True
|
925
|
+
except (TimeoutError, asyncio.TimeoutError) as e:
|
926
|
+
self.logger.warning(f'[Timeout] Unable to get favicons: {e}')
|
927
|
+
except Exception as e:
|
928
|
+
self.logger.warning(f'Unable to get favicons: {e}')
|
910
929
|
|
911
930
|
to_return['last_redirected_url'] = page.url
|
912
931
|
to_return['png'] = await self._failsafe_get_screenshot(page)
|
@@ -1209,7 +1228,8 @@ class Capture():
|
|
1209
1228
|
if self.proxy and self.proxy.get('server'):
|
1210
1229
|
connector = ProxyConnector.from_url(self.proxy['server'])
|
1211
1230
|
|
1212
|
-
|
1231
|
+
timeout = aiohttp.ClientTimeout(total=10)
|
1232
|
+
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
1213
1233
|
while True:
|
1214
1234
|
try:
|
1215
1235
|
href = await main_frame.get_by_role("link", name="Alternatively, download audio as MP3").get_attribute("href")
|
@@ -1219,7 +1239,7 @@ class Capture():
|
|
1219
1239
|
if not href:
|
1220
1240
|
self.logger.warning('Unable to find download link for captcha.')
|
1221
1241
|
return False
|
1222
|
-
async with session.get(href,
|
1242
|
+
async with session.get(href, ssl=False) as response:
|
1223
1243
|
response.raise_for_status()
|
1224
1244
|
mp3_content = await response.read()
|
1225
1245
|
with NamedTemporaryFile() as mp3_file, NamedTemporaryFile() as wav_file:
|
@@ -1417,7 +1437,8 @@ class Capture():
|
|
1417
1437
|
return set()
|
1418
1438
|
to_fetch, to_return = extracted_favicons
|
1419
1439
|
to_fetch.add('/favicon.ico')
|
1420
|
-
|
1440
|
+
timeout = aiohttp.ClientTimeout(total=10)
|
1441
|
+
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
1421
1442
|
session.headers['user-agent'] = self.user_agent
|
1422
1443
|
for u in to_fetch:
|
1423
1444
|
try:
|
@@ -1427,7 +1448,7 @@ class Capture():
|
|
1427
1448
|
if url_to_fetch in self._requests:
|
1428
1449
|
favicon = self._requests[url_to_fetch]
|
1429
1450
|
if not favicon:
|
1430
|
-
async with session.get(url_to_fetch,
|
1451
|
+
async with session.get(url_to_fetch, ssl=False) as favicon_response:
|
1431
1452
|
favicon_response.raise_for_status()
|
1432
1453
|
favicon = await favicon_response.read()
|
1433
1454
|
if favicon:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.25.
|
3
|
+
Version: 1.25.10
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
5
|
Home-page: https://github.com/Lookyloo/PlaywrightCapture
|
6
6
|
License: BSD-3-Clause
|
@@ -21,8 +21,8 @@ Classifier: Topic :: Internet
|
|
21
21
|
Classifier: Topic :: Security
|
22
22
|
Provides-Extra: recaptcha
|
23
23
|
Requires-Dist: SpeechRecognition (>=3.10.4,<4.0.0) ; extra == "recaptcha"
|
24
|
-
Requires-Dist: aiohttp-socks (>=0.
|
25
|
-
Requires-Dist: aiohttp[speedups] (>=3.
|
24
|
+
Requires-Dist: aiohttp-socks (>=0.9,<0.10)
|
25
|
+
Requires-Dist: aiohttp[speedups] (>=3.10.1,<4.0.0)
|
26
26
|
Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
|
27
27
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
|
28
28
|
Requires-Dist: dateparser (>=1.2.0,<2.0.0)
|
@@ -31,7 +31,7 @@ Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
|
|
31
31
|
Requires-Dist: puremagic (>=1.26,<2.0)
|
32
32
|
Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
|
33
33
|
Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
|
34
|
-
Requires-Dist: setuptools (>=
|
34
|
+
Requires-Dist: setuptools (>=72.1.0,<73.0.0)
|
35
35
|
Requires-Dist: tzdata (>=2024.1,<2025.0)
|
36
36
|
Requires-Dist: w3lib (>=2.2.1,<3.0.0)
|
37
37
|
Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
|
@@ -0,0 +1,9 @@
|
|
1
|
+
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
+
playwrightcapture/capture.py,sha256=AayQPmsjzcN2TIBi4uuwDmV0kqIVLDdAfEaNPUg8TXY,72606
|
3
|
+
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
|
+
playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
|
5
|
+
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
playwrightcapture-1.25.10.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
+
playwrightcapture-1.25.10.dist-info/METADATA,sha256=3nVVIzM7kcR62_XYA1mRUxfoflPZwfLS5B8KDPy3Hks,3172
|
8
|
+
playwrightcapture-1.25.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
9
|
+
playwrightcapture-1.25.10.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=uS8e87-7jl8F7TgfzhKhlV4pGf8n6twu9rVzzlqIhXM,71671
|
3
|
-
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
|
-
playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
|
5
|
-
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
playwrightcapture-1.25.9.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
-
playwrightcapture-1.25.9.dist-info/METADATA,sha256=7ds0ymzTNfkYLajoJTc-t1G4wOhPHRbxmg6CCZkMtUE,3173
|
8
|
-
playwrightcapture-1.25.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
9
|
-
playwrightcapture-1.25.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|