PlaywrightCapture 1.25.7__tar.gz → 1.25.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.25.7
3
+ Version: 1.25.9
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -26,7 +26,7 @@ Requires-Dist: aiohttp[speedups] (>=3.9.5,<4.0.0)
26
26
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
27
27
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
29
- Requires-Dist: playwright (>=1.45.0,<2.0.0)
29
+ Requires-Dist: playwright (>=1.45.1,<2.0.0)
30
30
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
31
  Requires-Dist: puremagic (>=1.26,<2.0)
32
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
@@ -18,7 +18,7 @@ from io import BytesIO
18
18
  from logging import LoggerAdapter, Logger
19
19
  from tempfile import NamedTemporaryFile
20
20
  from typing import Any, TypedDict, Literal, TYPE_CHECKING, MutableMapping, Generator
21
- from urllib.parse import urlparse, unquote, urljoin
21
+ from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
22
22
  from zipfile import ZipFile
23
23
 
24
24
  import aiohttp
@@ -164,7 +164,7 @@ class Capture():
164
164
  self.proxy: ProxySettings = {}
165
165
  if proxy:
166
166
  if isinstance(proxy, str):
167
- self.proxy = {'server': proxy}
167
+ self.proxy = self.__prepare_proxy_playwright(proxy)
168
168
  elif isinstance(proxy, dict):
169
169
  self.proxy = {'server': proxy['server'], 'bypass': proxy.get('bypass', ''),
170
170
  'username': proxy.get('username', ''),
@@ -187,6 +187,19 @@ class Capture():
187
187
  self._locale: str = ''
188
188
  self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
189
189
 
190
+ def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
191
+ splitted = urlsplit(proxy)
192
+ if splitted.username and splitted.password:
193
+ return {'username': splitted.username, 'password': splitted.password,
194
+ 'server': urlunsplit((splitted.scheme, f'{splitted.hostname}:{splitted.port}', splitted.path, splitted.query, splitted.fragment))}
195
+ return {'server': proxy}
196
+
197
+ def __prepare_proxy_aiohttp(self, proxy: ProxySettings) -> str:
198
+ if 'username' in proxy and 'password' in proxy:
199
+ splitted = urlsplit(proxy['server'])
200
+ return urlunsplit((splitted.scheme, f'{proxy["username"]}:{proxy["password"]}@{splitted.netloc}', splitted.path, splitted.query, splitted.fragment))
201
+ return proxy['server']
202
+
190
203
  async def __aenter__(self) -> Capture:
191
204
  '''Launch the browser'''
192
205
  self._temp_harfile = NamedTemporaryFile(delete=False)
@@ -411,6 +424,9 @@ class Capture():
411
424
  if context_vp := device_context_settings.pop('viewport', self._default_viewport):
412
425
  # Always true, but we also always want to pop it.
413
426
  vp = self.viewport if self.viewport else context_vp
427
+ else:
428
+ ua = self.user_agent
429
+ vp = self.viewport
414
430
 
415
431
  self.context = await self.browser.new_context(
416
432
  record_har_path=self._temp_harfile.name,
@@ -1392,9 +1408,9 @@ class Capture():
1392
1408
  Method inspired by https://github.com/ail-project/ail-framework/blob/master/bin/lib/crawlers.py
1393
1409
  """
1394
1410
  connector = None
1395
- if self.proxy and self.proxy.get('server'):
1411
+ if self.proxy:
1396
1412
  # NOTE 2024-05-17: switch to async to fetch, the lib uses socks5h by default
1397
- connector = ProxyConnector.from_url(self.proxy['server'])
1413
+ connector = ProxyConnector.from_url(self.__prepare_proxy_aiohttp(self.proxy))
1398
1414
 
1399
1415
  extracted_favicons = self.__extract_favicons(rendered_content)
1400
1416
  if not extracted_favicons:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "PlaywrightCapture"
3
- version = "1.25.7"
3
+ version = "1.25.9"
4
4
  description = "A simple library to capture websites using playwright"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -19,7 +19,7 @@ classifiers=[
19
19
 
20
20
  [tool.poetry.dependencies]
21
21
  python = "^3.8"
22
- playwright = "^1.45.0"
22
+ playwright = "^1.45.1"
23
23
  dateparser = "^1.2.0"
24
24
  beautifulsoup4 = {version= "^4.12.3", extras = ["lxml", "charset_normalizer"]}
25
25
  w3lib = "^2.2.1"