PlaywrightCapture 1.25.10__py3-none-any.whl → 1.25.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +31 -0
- {playwrightcapture-1.25.10.dist-info → playwrightcapture-1.25.11.dist-info}/METADATA +4 -4
- {playwrightcapture-1.25.10.dist-info → playwrightcapture-1.25.11.dist-info}/RECORD +5 -5
- {playwrightcapture-1.25.10.dist-info → playwrightcapture-1.25.11.dist-info}/LICENSE +0 -0
- {playwrightcapture-1.25.10.dist-info → playwrightcapture-1.25.11.dist-info}/WHEEL +0 -0
playwrightcapture/capture.py
CHANGED
@@ -601,6 +601,8 @@ class Capture():
|
|
601
601
|
elif await page.get_by_test_id("uc-accept-all-button").is_visible():
|
602
602
|
self.logger.info('Consent window found, clicking through.')
|
603
603
|
await page.get_by_test_id("uc-accept-all-button").click(timeout=2000)
|
604
|
+
elif await page.locator('#axeptio_btn_acceptAll').is_visible():
|
605
|
+
await page.locator('#axeptio_btn_acceptAll').click(timeout=2000)
|
604
606
|
else:
|
605
607
|
self.logger.info('Consent window found (dialog), but no button to click through.')
|
606
608
|
await page.add_locator_handler(
|
@@ -646,6 +648,24 @@ class Capture():
|
|
646
648
|
)
|
647
649
|
self.logger.info('Piwik handler added')
|
648
650
|
|
651
|
+
async def __frame_consent(self, frame: Frame) -> bool:
|
652
|
+
"""Search & Click content in iframes. Cannot easily use the locator handler for this without having many many handlers.
|
653
|
+
And the iframes don't have a title or a role to easily identify them so we just try with generic locators that vary by language."""
|
654
|
+
got_button: bool = False
|
655
|
+
if await frame.get_by_label("Alle akzeptieren").is_visible():
|
656
|
+
got_button = True
|
657
|
+
await frame.get_by_label("Alle akzeptieren").click(timeout=2000)
|
658
|
+
elif await frame.get_by_label("Accept & continue").is_visible():
|
659
|
+
got_button = True
|
660
|
+
await frame.get_by_label("Accept & continue").click(timeout=2000)
|
661
|
+
elif await frame.get_by_label("Accepter et continuer").is_visible():
|
662
|
+
got_button = True
|
663
|
+
await frame.get_by_label("Accepter et continuer").click(timeout=2000)
|
664
|
+
elif await frame.get_by_label("Accepteer").is_visible():
|
665
|
+
got_button = True
|
666
|
+
await frame.get_by_label("Accepteer").click(timeout=2000)
|
667
|
+
return got_button
|
668
|
+
|
649
669
|
async def capture_page(self, url: str, *, max_depth_capture_time: int,
|
650
670
|
referer: str | None=None,
|
651
671
|
page: Page | None=None, depth: int=0,
|
@@ -911,8 +931,19 @@ class Capture():
|
|
911
931
|
# self.logger.warning('Unable to move time forward.')
|
912
932
|
|
913
933
|
self.logger.debug('Done with instrumentation, waiting for network idle.')
|
934
|
+
if allow_tracking:
|
935
|
+
self.logger.debug('Check iFrames for button')
|
936
|
+
for frame in page.frames:
|
937
|
+
frame_title = await frame.title()
|
938
|
+
self.logger.debug(f'Check button on {frame_title}')
|
939
|
+
if await self.__frame_consent(frame):
|
940
|
+
self.logger.debug(f'Got button on {frame_title}')
|
941
|
+
await self._wait_for_random_timeout(page, 10) # Wait 10 sec after click
|
942
|
+
self.logger.debug('Done with iFrames.')
|
943
|
+
|
914
944
|
await self._wait_for_random_timeout(page, 5) # Wait 5 sec after instrumentation
|
915
945
|
await self._safe_wait(page)
|
946
|
+
|
916
947
|
self.logger.debug('Done with instrumentation, done with waiting.')
|
917
948
|
|
918
949
|
if content := await self._failsafe_get_content(page):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.25.
|
3
|
+
Version: 1.25.11
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
5
|
Home-page: https://github.com/Lookyloo/PlaywrightCapture
|
6
6
|
License: BSD-3-Clause
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Security
|
|
22
22
|
Provides-Extra: recaptcha
|
23
23
|
Requires-Dist: SpeechRecognition (>=3.10.4,<4.0.0) ; extra == "recaptcha"
|
24
24
|
Requires-Dist: aiohttp-socks (>=0.9,<0.10)
|
25
|
-
Requires-Dist: aiohttp[speedups] (>=3.10.
|
25
|
+
Requires-Dist: aiohttp[speedups] (>=3.10.3,<4.0.0)
|
26
26
|
Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
|
27
27
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
|
28
28
|
Requires-Dist: dateparser (>=1.2.0,<2.0.0)
|
29
|
-
Requires-Dist: playwright (>=1.
|
29
|
+
Requires-Dist: playwright (>=1.46.0,<2.0.0)
|
30
30
|
Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
|
31
|
-
Requires-Dist: puremagic (>=1.
|
31
|
+
Requires-Dist: puremagic (>=1.27,<2.0)
|
32
32
|
Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
|
33
33
|
Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
|
34
34
|
Requires-Dist: setuptools (>=72.1.0,<73.0.0)
|
@@ -1,9 +1,9 @@
|
|
1
1
|
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=
|
2
|
+
playwrightcapture/capture.py,sha256=Rmo_EVRlR9btsgE2H99OtGPRZwIe8RVq-JCc2GzUWiI,74446
|
3
3
|
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
4
|
playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
|
5
5
|
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
playwrightcapture-1.25.
|
7
|
-
playwrightcapture-1.25.
|
8
|
-
playwrightcapture-1.25.
|
9
|
-
playwrightcapture-1.25.
|
6
|
+
playwrightcapture-1.25.11.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
+
playwrightcapture-1.25.11.dist-info/METADATA,sha256=nGuO6TAlz2lKM15HiIgZJ4iERLBO_AXNBBpgqo8nfhM,3172
|
8
|
+
playwrightcapture-1.25.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
9
|
+
playwrightcapture-1.25.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|