PlaywrightCapture 1.31.2__py3-none-any.whl → 1.31.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +15 -4
- {playwrightcapture-1.31.2.dist-info → playwrightcapture-1.31.4.dist-info}/METADATA +3 -3
- {playwrightcapture-1.31.2.dist-info → playwrightcapture-1.31.4.dist-info}/RECORD +5 -5
- {playwrightcapture-1.31.2.dist-info → playwrightcapture-1.31.4.dist-info}/LICENSE +0 -0
- {playwrightcapture-1.31.2.dist-info → playwrightcapture-1.31.4.dist-info}/WHEEL +0 -0
playwrightcapture/capture.py
CHANGED
@@ -118,7 +118,8 @@ class Capture():
|
|
118
118
|
proxy: str | dict[str, str] | None=None,
|
119
119
|
socks5_dns_resolver: str | list[str] | None=None,
|
120
120
|
general_timeout_in_sec: int | None=None, loglevel: str | int='INFO',
|
121
|
-
uuid: str | None=None, headless: bool=True
|
121
|
+
uuid: str | None=None, headless: bool=True,
|
122
|
+
*, init_script: str | None=None):
|
122
123
|
"""Captures a page with Playwright.
|
123
124
|
|
124
125
|
:param browser: The browser to use for the capture.
|
@@ -129,6 +130,7 @@ class Capture():
|
|
129
130
|
:param loglevel: Python loglevel
|
130
131
|
:param uuid: The UUID of the capture.
|
131
132
|
:param headless: Whether to run the browser in headless mode. WARNING: requires to run in a graphical environment.
|
133
|
+
:param init_script: An optional JavaScript that will be executed on each page - See https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-init-script
|
132
134
|
"""
|
133
135
|
master_logger = logging.getLogger('playwrightcapture')
|
134
136
|
master_logger.setLevel(loglevel)
|
@@ -179,6 +181,8 @@ class Capture():
|
|
179
181
|
self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
|
180
182
|
self._java_script_enabled = True
|
181
183
|
|
184
|
+
self._init_script = init_script
|
185
|
+
|
182
186
|
def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
|
183
187
|
splitted = urlsplit(proxy)
|
184
188
|
if splitted.username and splitted.password:
|
@@ -460,6 +464,9 @@ class Capture():
|
|
460
464
|
)
|
461
465
|
self.context.set_default_timeout(self._capture_timeout * 1000)
|
462
466
|
|
467
|
+
if self._init_script:
|
468
|
+
await self.context.add_init_script(script=self._init_script)
|
469
|
+
|
463
470
|
# very quick and dirty get a platform from the UA so it's not always Win32
|
464
471
|
# This this is deprecated and not very important.
|
465
472
|
# Ref: https://developer.mozilla.org/en-US/docs/Web/API/Navigator/platform
|
@@ -823,7 +830,7 @@ class Capture():
|
|
823
830
|
# Same technique as: https://github.com/NikolaiT/uncaptcha3
|
824
831
|
if CAN_SOLVE_CAPTCHA:
|
825
832
|
try:
|
826
|
-
if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible(
|
833
|
+
if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible()
|
827
834
|
and await page.locator("//iframe[@title='reCAPTCHA']").first.is_enabled(timeout=2000)):
|
828
835
|
self.logger.info('Found a captcha')
|
829
836
|
await self._recaptcha_solver(page)
|
@@ -1354,7 +1361,11 @@ class Capture():
|
|
1354
1361
|
return href
|
1355
1362
|
|
1356
1363
|
urls: set[str] = set()
|
1357
|
-
|
1364
|
+
try:
|
1365
|
+
soup = BeautifulSoup(rendered_html, "lxml")
|
1366
|
+
except Exception as e:
|
1367
|
+
self.logger.info(f'Unable to parse HTML: {e}')
|
1368
|
+
soup = BeautifulSoup(rendered_html, "html.parser")
|
1358
1369
|
|
1359
1370
|
rendered_hostname = urlparse(rendered_url).hostname
|
1360
1371
|
# The simple ones: the links.
|
@@ -1387,7 +1398,7 @@ class Capture():
|
|
1387
1398
|
if not recaptcha_init_frame:
|
1388
1399
|
return False
|
1389
1400
|
try:
|
1390
|
-
if await recaptcha_init_frame.get_by_role("checkbox", name="I'm not a robot").is_visible(
|
1401
|
+
if await recaptcha_init_frame.get_by_role("checkbox", name="I'm not a robot").is_visible():
|
1391
1402
|
await recaptcha_init_frame.get_by_role("checkbox", name="I'm not a robot").click()
|
1392
1403
|
else:
|
1393
1404
|
self.logger.info('Checkbox not visible.')
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.31.
|
3
|
+
Version: 1.31.4
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
5
|
License: BSD-3-Clause
|
6
6
|
Author: Raphaël Vinot
|
@@ -20,14 +20,14 @@ Classifier: Topic :: Security
|
|
20
20
|
Provides-Extra: recaptcha
|
21
21
|
Requires-Dist: SpeechRecognition (>=3.14.3) ; extra == "recaptcha"
|
22
22
|
Requires-Dist: aiohttp-socks (>=0.10.1)
|
23
|
-
Requires-Dist: aiohttp[speedups] (>=3.12.
|
23
|
+
Requires-Dist: aiohttp[speedups] (>=3.12.14)
|
24
24
|
Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
|
25
25
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)
|
26
26
|
Requires-Dist: dateparser (>=1.2.2)
|
27
27
|
Requires-Dist: dnspython (>=2.7.0,<3.0.0)
|
28
28
|
Requires-Dist: playwright (>=1.53.0)
|
29
29
|
Requires-Dist: playwright-stealth (>=2)
|
30
|
-
Requires-Dist: puremagic (>=1.
|
30
|
+
Requires-Dist: puremagic (>=1.30)
|
31
31
|
Requires-Dist: pydub (>=0.25.1) ; (python_version < "3.10") and (extra == "recaptcha")
|
32
32
|
Requires-Dist: pydub-ng (>=0.2.0) ; (python_version >= "3.10") and (extra == "recaptcha")
|
33
33
|
Requires-Dist: python-socks (>=2.7.1,<3.0.0)
|
@@ -1,10 +1,10 @@
|
|
1
1
|
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=
|
2
|
+
playwrightcapture/capture.py,sha256=qWDwQt7pKOr3fQUhidY_U9bO8cIIOSiqSaTNxUdkT3M,87111
|
3
3
|
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
4
|
playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
|
5
5
|
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
playwrightcapture/socks5dnslookup.py,sha256=ZpOf8tgsRQZi-WDcn9JbbG1bKz9DSfK_jz1l53UI1Ho,4058
|
7
|
-
playwrightcapture-1.31.
|
8
|
-
playwrightcapture-1.31.
|
9
|
-
playwrightcapture-1.31.
|
10
|
-
playwrightcapture-1.31.
|
7
|
+
playwrightcapture-1.31.4.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
8
|
+
playwrightcapture-1.31.4.dist-info/METADATA,sha256=y6QEbPdB1201g3Z81no658PsoD5OoxdyT81LjDCk3uA,3285
|
9
|
+
playwrightcapture-1.31.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
10
|
+
playwrightcapture-1.31.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|