PlaywrightCapture 1.25.8__py3-none-any.whl → 1.25.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +17 -4
- {playwrightcapture-1.25.8.dist-info → playwrightcapture-1.25.9.dist-info}/METADATA +2 -2
- {playwrightcapture-1.25.8.dist-info → playwrightcapture-1.25.9.dist-info}/RECORD +5 -5
- {playwrightcapture-1.25.8.dist-info → playwrightcapture-1.25.9.dist-info}/LICENSE +0 -0
- {playwrightcapture-1.25.8.dist-info → playwrightcapture-1.25.9.dist-info}/WHEEL +0 -0
playwrightcapture/capture.py
CHANGED
@@ -18,7 +18,7 @@ from io import BytesIO
|
|
18
18
|
from logging import LoggerAdapter, Logger
|
19
19
|
from tempfile import NamedTemporaryFile
|
20
20
|
from typing import Any, TypedDict, Literal, TYPE_CHECKING, MutableMapping, Generator
|
21
|
-
from urllib.parse import urlparse, unquote, urljoin
|
21
|
+
from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
|
22
22
|
from zipfile import ZipFile
|
23
23
|
|
24
24
|
import aiohttp
|
@@ -164,7 +164,7 @@ class Capture():
|
|
164
164
|
self.proxy: ProxySettings = {}
|
165
165
|
if proxy:
|
166
166
|
if isinstance(proxy, str):
|
167
|
-
self.proxy =
|
167
|
+
self.proxy = self.__prepare_proxy_playwright(proxy)
|
168
168
|
elif isinstance(proxy, dict):
|
169
169
|
self.proxy = {'server': proxy['server'], 'bypass': proxy.get('bypass', ''),
|
170
170
|
'username': proxy.get('username', ''),
|
@@ -187,6 +187,19 @@ class Capture():
|
|
187
187
|
self._locale: str = ''
|
188
188
|
self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
|
189
189
|
|
190
|
+
def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
|
191
|
+
splitted = urlsplit(proxy)
|
192
|
+
if splitted.username and splitted.password:
|
193
|
+
return {'username': splitted.username, 'password': splitted.password,
|
194
|
+
'server': urlunsplit((splitted.scheme, f'{splitted.hostname}:{splitted.port}', splitted.path, splitted.query, splitted.fragment))}
|
195
|
+
return {'server': proxy}
|
196
|
+
|
197
|
+
def __prepare_proxy_aiohttp(self, proxy: ProxySettings) -> str:
|
198
|
+
if 'username' in proxy and 'password' in proxy:
|
199
|
+
splitted = urlsplit(proxy['server'])
|
200
|
+
return urlunsplit((splitted.scheme, f'{proxy["username"]}:{proxy["password"]}@{splitted.netloc}', splitted.path, splitted.query, splitted.fragment))
|
201
|
+
return proxy['server']
|
202
|
+
|
190
203
|
async def __aenter__(self) -> Capture:
|
191
204
|
'''Launch the browser'''
|
192
205
|
self._temp_harfile = NamedTemporaryFile(delete=False)
|
@@ -1395,9 +1408,9 @@ class Capture():
|
|
1395
1408
|
Method inspired by https://github.com/ail-project/ail-framework/blob/master/bin/lib/crawlers.py
|
1396
1409
|
"""
|
1397
1410
|
connector = None
|
1398
|
-
if self.proxy
|
1411
|
+
if self.proxy:
|
1399
1412
|
# NOTE 2024-05-17: switch to async to fetch, the lib uses socks5h by default
|
1400
|
-
connector = ProxyConnector.from_url(self.proxy
|
1413
|
+
connector = ProxyConnector.from_url(self.__prepare_proxy_aiohttp(self.proxy))
|
1401
1414
|
|
1402
1415
|
extracted_favicons = self.__extract_favicons(rendered_content)
|
1403
1416
|
if not extracted_favicons:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.25.
|
3
|
+
Version: 1.25.9
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
5
|
Home-page: https://github.com/Lookyloo/PlaywrightCapture
|
6
6
|
License: BSD-3-Clause
|
@@ -26,7 +26,7 @@ Requires-Dist: aiohttp[speedups] (>=3.9.5,<4.0.0)
|
|
26
26
|
Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
|
27
27
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
|
28
28
|
Requires-Dist: dateparser (>=1.2.0,<2.0.0)
|
29
|
-
Requires-Dist: playwright (>=1.45.
|
29
|
+
Requires-Dist: playwright (>=1.45.1,<2.0.0)
|
30
30
|
Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
|
31
31
|
Requires-Dist: puremagic (>=1.26,<2.0)
|
32
32
|
Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
|
@@ -1,9 +1,9 @@
|
|
1
1
|
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=
|
2
|
+
playwrightcapture/capture.py,sha256=uS8e87-7jl8F7TgfzhKhlV4pGf8n6twu9rVzzlqIhXM,71671
|
3
3
|
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
4
|
playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
|
5
5
|
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
playwrightcapture-1.25.
|
7
|
-
playwrightcapture-1.25.
|
8
|
-
playwrightcapture-1.25.
|
9
|
-
playwrightcapture-1.25.
|
6
|
+
playwrightcapture-1.25.9.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
+
playwrightcapture-1.25.9.dist-info/METADATA,sha256=7ds0ymzTNfkYLajoJTc-t1G4wOhPHRbxmg6CCZkMtUE,3173
|
8
|
+
playwrightcapture-1.25.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
9
|
+
playwrightcapture-1.25.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|