PlaywrightCapture 1.22.7__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +29 -52
- {playwrightcapture-1.22.7.dist-info → playwrightcapture-1.23.0.dist-info}/METADATA +3 -3
- {playwrightcapture-1.22.7.dist-info → playwrightcapture-1.23.0.dist-info}/RECORD +5 -5
- {playwrightcapture-1.22.7.dist-info → playwrightcapture-1.23.0.dist-info}/LICENSE +0 -0
- {playwrightcapture-1.22.7.dist-info → playwrightcapture-1.23.0.dist-info}/WHEEL +0 -0
playwrightcapture/capture.py
CHANGED
@@ -26,7 +26,7 @@ from bs4 import BeautifulSoup
|
|
26
26
|
from charset_normalizer import from_bytes
|
27
27
|
from playwright.async_api import async_playwright, Frame, Error, Page, Download
|
28
28
|
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
|
29
|
-
from playwright_stealth import stealth_async # type: ignore
|
29
|
+
from playwright_stealth import stealth_async # type: ignore[import-untyped]
|
30
30
|
from w3lib.html import strip_html5_whitespace
|
31
31
|
from w3lib.url import canonicalize_url, safe_url_string
|
32
32
|
|
@@ -46,8 +46,8 @@ if TYPE_CHECKING:
|
|
46
46
|
BROWSER = Literal['chromium', 'firefox', 'webkit']
|
47
47
|
|
48
48
|
try:
|
49
|
-
import pydub # type: ignore
|
50
|
-
from speech_recognition import Recognizer, AudioFile # type: ignore
|
49
|
+
import pydub # type: ignore[import-untyped]
|
50
|
+
from speech_recognition import Recognizer, AudioFile # type: ignore[import-untyped]
|
51
51
|
CAN_SOLVE_CAPTCHA = True
|
52
52
|
except ImportError:
|
53
53
|
CAN_SOLVE_CAPTCHA = False
|
@@ -122,9 +122,9 @@ class Capture():
|
|
122
122
|
self._user_agent: str = ''
|
123
123
|
self._timezone_id: str = ''
|
124
124
|
self._locale: str = ''
|
125
|
-
self._color_scheme:
|
125
|
+
self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
|
126
126
|
|
127
|
-
async def __aenter__(self) ->
|
127
|
+
async def __aenter__(self) -> Capture:
|
128
128
|
'''Launch the browser'''
|
129
129
|
self._temp_harfile = NamedTemporaryFile(delete=False)
|
130
130
|
|
@@ -138,10 +138,9 @@ class Capture():
|
|
138
138
|
elif self.browser_name not in self._browsers:
|
139
139
|
raise UnknownPlaywrightBrowser(f'Incorrect browser name {self.browser_name}, must be in {", ".join(self._browsers)}')
|
140
140
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
self.browser = await self.playwright[self.browser_name].launch(**launch_settings) # type: ignore
|
141
|
+
self.browser = await self.playwright[self.browser_name].launch(
|
142
|
+
proxy=self.proxy if self.proxy else None
|
143
|
+
)
|
145
144
|
return self
|
146
145
|
|
147
146
|
async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
|
@@ -329,59 +328,37 @@ class Capture():
|
|
329
328
|
self._user_agent = user_agent
|
330
329
|
|
331
330
|
@property
|
332
|
-
def color_scheme(self) ->
|
331
|
+
def color_scheme(self) -> Literal['dark', 'light', 'no-preference', 'null'] | None:
|
333
332
|
return self._color_scheme
|
334
333
|
|
335
334
|
@color_scheme.setter
|
336
|
-
def color_scheme(self, color_scheme:
|
335
|
+
def color_scheme(self, color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None) -> None:
|
337
336
|
if not color_scheme:
|
338
337
|
return
|
339
|
-
schemes = ['light', 'dark', 'no-preference']
|
340
|
-
if color_scheme in
|
338
|
+
schemes = ['light', 'dark', 'no-preference', 'null']
|
339
|
+
if color_scheme in schemes:
|
341
340
|
self._color_scheme = color_scheme
|
342
341
|
else:
|
343
342
|
raise InvalidPlaywrightParameter(f'Invalid color scheme ({color_scheme}), must be in {", ".join(schemes)}.')
|
344
343
|
|
345
344
|
async def initialize_context(self) -> None:
|
346
|
-
|
347
|
-
'record_har_path': self._temp_harfile.name,
|
348
|
-
'ignore_https_errors': True
|
349
|
-
}
|
350
|
-
|
345
|
+
device_context_settings = {}
|
351
346
|
if self.device_name:
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
if self.color_scheme:
|
368
|
-
default_context_settings['color_scheme'] = self.color_scheme
|
369
|
-
|
370
|
-
if self.viewport:
|
371
|
-
# User defined viewport, can overwrite device viewport
|
372
|
-
default_context_settings['viewport'] = self.viewport
|
373
|
-
elif 'viewport' not in default_context_settings:
|
374
|
-
# No viewport given, fallback to default
|
375
|
-
default_context_settings['viewport'] = self._default_viewport
|
376
|
-
|
377
|
-
if self.browser_name == 'firefox' and default_context_settings.get('is_mobile'):
|
378
|
-
# NOTE: Not supported, see https://github.com/microsoft/playwright-python/issues/1509
|
379
|
-
default_context_settings.pop('is_mobile')
|
380
|
-
|
381
|
-
# FIXME: video for debug
|
382
|
-
# default_context_settings['record_video_dir'] = './videos/'
|
383
|
-
|
384
|
-
self.context = await self.browser.new_context(**default_context_settings) # type: ignore
|
347
|
+
device_context_settings = self.playwright.devices[self.device_name]
|
348
|
+
|
349
|
+
self.context = await self.browser.new_context(
|
350
|
+
record_har_path=self._temp_harfile.name,
|
351
|
+
ignore_https_errors=True,
|
352
|
+
http_credentials=self.http_credentials if self.http_credentials else None,
|
353
|
+
user_agent=self.user_agent if self.user_agent else device_context_settings.pop('user_agent', None),
|
354
|
+
locale=self.locale if self.locale else None,
|
355
|
+
timezone_id=self.timezone_id if self.timezone_id else None,
|
356
|
+
color_scheme=self.color_scheme if self.color_scheme else None,
|
357
|
+
viewport=self.viewport if self.viewport else device_context_settings.pop('viewport', self._default_viewport),
|
358
|
+
# For debug only
|
359
|
+
# record_video_dir='./videos/',
|
360
|
+
**device_context_settings
|
361
|
+
)
|
385
362
|
self.context.set_default_timeout(self._capture_timeout * 1000)
|
386
363
|
|
387
364
|
if self.cookies:
|
@@ -650,7 +627,7 @@ class Capture():
|
|
650
627
|
rendered_hostname_only=rendered_hostname_only,
|
651
628
|
max_depth_capture_time=max_capture_time),
|
652
629
|
timeout=max_capture_time + 1) # just adding a bit of padding so playwright has the chance to raise the exception first
|
653
|
-
to_return['children'].append(child_capture) # type: ignore
|
630
|
+
to_return['children'].append(child_capture) # type: ignore[union-attr]
|
654
631
|
except (TimeoutError, asyncio.exceptions.TimeoutError):
|
655
632
|
self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
|
656
633
|
except Exception as e:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.23.0
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
5
|
Home-page: https://github.com/Lookyloo/PlaywrightCapture
|
6
6
|
License: BSD-3-Clause
|
@@ -23,10 +23,10 @@ Provides-Extra: recaptcha
|
|
23
23
|
Requires-Dist: SpeechRecognition (>=3.10.1,<4.0.0) ; extra == "recaptcha"
|
24
24
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
|
25
25
|
Requires-Dist: dateparser (>=1.2.0,<2.0.0)
|
26
|
-
Requires-Dist: playwright (>=1.41.
|
26
|
+
Requires-Dist: playwright (>=1.41.1,<2.0.0)
|
27
27
|
Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
|
28
28
|
Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
|
29
|
-
Requires-Dist: pytz (>=
|
29
|
+
Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
|
30
30
|
Requires-Dist: requests[socks] (>=2.31.0,<3.0.0) ; extra == "recaptcha"
|
31
31
|
Requires-Dist: setuptools (>=69.0.3,<70.0.0)
|
32
32
|
Requires-Dist: tzdata (>=2023.4,<2024.0)
|
@@ -1,9 +1,9 @@
|
|
1
1
|
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=
|
2
|
+
playwrightcapture/capture.py,sha256=ZsRkTqa-E5fgiqsV636Av3vfNO-ZKipN1-wTd3-AZoI,48492
|
3
3
|
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
4
|
playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
|
5
5
|
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
playwrightcapture-1.
|
7
|
-
playwrightcapture-1.
|
8
|
-
playwrightcapture-1.
|
9
|
-
playwrightcapture-1.
|
6
|
+
playwrightcapture-1.23.0.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
+
playwrightcapture-1.23.0.dist-info/METADATA,sha256=CcUuAThLDySnGSM_YvGohD9wCuI3okxz75asmA951gY,3008
|
8
|
+
playwrightcapture-1.23.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
9
|
+
playwrightcapture-1.23.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|