PlaywrightCapture 1.22.7__tar.gz → 1.23.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.22.7
3
+ Version: 1.23.0
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -23,10 +23,10 @@ Provides-Extra: recaptcha
23
23
  Requires-Dist: SpeechRecognition (>=3.10.1,<4.0.0) ; extra == "recaptcha"
24
24
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
25
25
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
26
- Requires-Dist: playwright (>=1.41.0,<2.0.0)
26
+ Requires-Dist: playwright (>=1.41.1,<2.0.0)
27
27
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
28
28
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
29
- Requires-Dist: pytz (>=2023.3.post1,<2024.0) ; python_version < "3.9"
29
+ Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
30
30
  Requires-Dist: requests[socks] (>=2.31.0,<3.0.0) ; extra == "recaptcha"
31
31
  Requires-Dist: setuptools (>=69.0.3,<70.0.0)
32
32
  Requires-Dist: tzdata (>=2023.4,<2024.0)
@@ -26,7 +26,7 @@ from bs4 import BeautifulSoup
26
26
  from charset_normalizer import from_bytes
27
27
  from playwright.async_api import async_playwright, Frame, Error, Page, Download
28
28
  from playwright.async_api import TimeoutError as PlaywrightTimeoutError
29
- from playwright_stealth import stealth_async # type: ignore
29
+ from playwright_stealth import stealth_async # type: ignore[import-untyped]
30
30
  from w3lib.html import strip_html5_whitespace
31
31
  from w3lib.url import canonicalize_url, safe_url_string
32
32
 
@@ -46,8 +46,8 @@ if TYPE_CHECKING:
46
46
  BROWSER = Literal['chromium', 'firefox', 'webkit']
47
47
 
48
48
  try:
49
- import pydub # type: ignore
50
- from speech_recognition import Recognizer, AudioFile # type: ignore
49
+ import pydub # type: ignore[import-untyped]
50
+ from speech_recognition import Recognizer, AudioFile # type: ignore[import-untyped]
51
51
  CAN_SOLVE_CAPTCHA = True
52
52
  except ImportError:
53
53
  CAN_SOLVE_CAPTCHA = False
@@ -122,9 +122,9 @@ class Capture():
122
122
  self._user_agent: str = ''
123
123
  self._timezone_id: str = ''
124
124
  self._locale: str = ''
125
- self._color_scheme: str = ''
125
+ self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
126
126
 
127
- async def __aenter__(self) -> 'Capture':
127
+ async def __aenter__(self) -> Capture:
128
128
  '''Launch the browser'''
129
129
  self._temp_harfile = NamedTemporaryFile(delete=False)
130
130
 
@@ -138,10 +138,9 @@ class Capture():
138
138
  elif self.browser_name not in self._browsers:
139
139
  raise UnknownPlaywrightBrowser(f'Incorrect browser name {self.browser_name}, must be in {", ".join(self._browsers)}')
140
140
 
141
- launch_settings = {}
142
- if self.proxy:
143
- launch_settings['proxy'] = self.proxy
144
- self.browser = await self.playwright[self.browser_name].launch(**launch_settings) # type: ignore
141
+ self.browser = await self.playwright[self.browser_name].launch(
142
+ proxy=self.proxy if self.proxy else None
143
+ )
145
144
  return self
146
145
 
147
146
  async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
@@ -329,59 +328,37 @@ class Capture():
329
328
  self._user_agent = user_agent
330
329
 
331
330
  @property
332
- def color_scheme(self) -> str:
331
+ def color_scheme(self) -> Literal['dark', 'light', 'no-preference', 'null'] | None:
333
332
  return self._color_scheme
334
333
 
335
334
  @color_scheme.setter
336
- def color_scheme(self, color_scheme: str | None) -> None:
335
+ def color_scheme(self, color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None) -> None:
337
336
  if not color_scheme:
338
337
  return
339
- schemes = ['light', 'dark', 'no-preference']
340
- if color_scheme in ['light', 'dark', 'no-preference']:
338
+ schemes = ['light', 'dark', 'no-preference', 'null']
339
+ if color_scheme in schemes:
341
340
  self._color_scheme = color_scheme
342
341
  else:
343
342
  raise InvalidPlaywrightParameter(f'Invalid color scheme ({color_scheme}), must be in {", ".join(schemes)}.')
344
343
 
345
344
  async def initialize_context(self) -> None:
346
- default_context_settings = {
347
- 'record_har_path': self._temp_harfile.name,
348
- 'ignore_https_errors': True
349
- }
350
-
345
+ device_context_settings = {}
351
346
  if self.device_name:
352
- default_context_settings.update(self.playwright.devices[self.device_name])
353
-
354
- if self.http_credentials:
355
- default_context_settings['http_credentials'] = self.http_credentials
356
-
357
- if self.user_agent:
358
- # User defined UA, can overwrite device UA
359
- default_context_settings['user_agent'] = self.user_agent
360
-
361
- if self.locale:
362
- default_context_settings['locale'] = self.locale
363
-
364
- if self.timezone_id:
365
- default_context_settings['timezone_id'] = self.timezone_id
366
-
367
- if self.color_scheme:
368
- default_context_settings['color_scheme'] = self.color_scheme
369
-
370
- if self.viewport:
371
- # User defined viewport, can overwrite device viewport
372
- default_context_settings['viewport'] = self.viewport
373
- elif 'viewport' not in default_context_settings:
374
- # No viewport given, fallback to default
375
- default_context_settings['viewport'] = self._default_viewport
376
-
377
- if self.browser_name == 'firefox' and default_context_settings.get('is_mobile'):
378
- # NOTE: Not supported, see https://github.com/microsoft/playwright-python/issues/1509
379
- default_context_settings.pop('is_mobile')
380
-
381
- # FIXME: video for debug
382
- # default_context_settings['record_video_dir'] = './videos/'
383
-
384
- self.context = await self.browser.new_context(**default_context_settings) # type: ignore
347
+ device_context_settings = self.playwright.devices[self.device_name]
348
+
349
+ self.context = await self.browser.new_context(
350
+ record_har_path=self._temp_harfile.name,
351
+ ignore_https_errors=True,
352
+ http_credentials=self.http_credentials if self.http_credentials else None,
353
+ user_agent=self.user_agent if self.user_agent else device_context_settings.pop('user_agent', None),
354
+ locale=self.locale if self.locale else None,
355
+ timezone_id=self.timezone_id if self.timezone_id else None,
356
+ color_scheme=self.color_scheme if self.color_scheme else None,
357
+ viewport=self.viewport if self.viewport else device_context_settings.pop('viewport', self._default_viewport),
358
+ # For debug only
359
+ # record_video_dir='./videos/',
360
+ **device_context_settings
361
+ )
385
362
  self.context.set_default_timeout(self._capture_timeout * 1000)
386
363
 
387
364
  if self.cookies:
@@ -650,7 +627,7 @@ class Capture():
650
627
  rendered_hostname_only=rendered_hostname_only,
651
628
  max_depth_capture_time=max_capture_time),
652
629
  timeout=max_capture_time + 1) # just adding a bit of padding so playwright has the chance to raise the exception first
653
- to_return['children'].append(child_capture) # type: ignore
630
+ to_return['children'].append(child_capture) # type: ignore[union-attr]
654
631
  except (TimeoutError, asyncio.exceptions.TimeoutError):
655
632
  self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
656
633
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "PlaywrightCapture"
3
- version = "1.22.7"
3
+ version = "1.23.0"
4
4
  description = "A simple library to capture websites using playwright"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -19,14 +19,14 @@ classifiers=[
19
19
 
20
20
  [tool.poetry.dependencies]
21
21
  python = "^3.8"
22
- playwright = "^1.41.0"
22
+ playwright = "^1.41.1"
23
23
  dateparser = "^1.2.0"
24
24
  beautifulsoup4 = {version= "^4.12.3", extras = ["lxml", "charset_normalizer"]}
25
25
  w3lib = "^2.1.2"
26
26
  requests = {extras = ["socks"], version = "^2.31.0"}
27
27
  pydub = {version = "^0.25.1", optional = true}
28
28
  SpeechRecognition = {version = "^3.10.1", optional = true}
29
- pytz = {"version" = "^2023.3.post1", python = "<3.9"}
29
+ pytz = {"version" = "^2024.1", python = "<3.9"}
30
30
  tzdata = "^2023.4"
31
31
  playwright-stealth = "^1.0.6"
32
32
  setuptools = "^69.0.3"
@@ -42,8 +42,8 @@ types-beautifulsoup4 = "^4.12.0.20240106"
42
42
  pytest = "^7.4.4"
43
43
  mypy = "^1.8.0"
44
44
  types-dateparser = "^1.1.4.20240106"
45
- types-requests = "^2.31.0.20240106"
46
- types-pytz = "^2023.3.1.1"
45
+ types-requests = "^2.31.0.20240125"
46
+ types-pytz = "^2023.4.0.20240130"
47
47
 
48
48
 
49
49
  [build-system]