PlaywrightCapture 1.25.3__py3-none-any.whl → 1.25.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- playwrightcapture/capture.py +37 -27
- {playwrightcapture-1.25.3.dist-info → playwrightcapture-1.25.4.dist-info}/METADATA +1 -1
- {playwrightcapture-1.25.3.dist-info → playwrightcapture-1.25.4.dist-info}/RECORD +5 -5
- {playwrightcapture-1.25.3.dist-info → playwrightcapture-1.25.4.dist-info}/LICENSE +0 -0
- {playwrightcapture-1.25.3.dist-info → playwrightcapture-1.25.4.dist-info}/WHEEL +0 -0
playwrightcapture/capture.py
CHANGED
@@ -682,8 +682,7 @@ class Capture():
|
|
682
682
|
capturing_sub = False
|
683
683
|
try:
|
684
684
|
page = await self.context.new_page()
|
685
|
-
await page.clock.install()
|
686
|
-
page.on("dialog", lambda dialog: dialog.accept())
|
685
|
+
# await page.clock.install()
|
687
686
|
except Error as e:
|
688
687
|
self.logger.warning(f'The context is in a broken state: {e}')
|
689
688
|
self.should_retry = True
|
@@ -707,6 +706,7 @@ class Capture():
|
|
707
706
|
page.set_default_timeout((self._capture_timeout - 2) * 1000)
|
708
707
|
# trigger a callback on each request to store it in a dict indexed by URL to get it back from the favicon fetcher
|
709
708
|
page.on("requestfinished", store_request)
|
709
|
+
page.on("dialog", lambda dialog: dialog.accept())
|
710
710
|
|
711
711
|
try:
|
712
712
|
# Parse the URL. If there is a fragment, we need to scroll to it manually
|
@@ -762,31 +762,31 @@ class Capture():
|
|
762
762
|
await self._wait_for_random_timeout(page, 5) # Wait 5 sec after document loaded
|
763
763
|
self.logger.debug('Start instrumentation.')
|
764
764
|
|
765
|
-
# ==== recaptcha
|
766
|
-
# Same technique as: https://github.com/NikolaiT/uncaptcha3
|
767
|
-
if CAN_SOLVE_CAPTCHA:
|
768
|
-
try:
|
769
|
-
if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible(timeout=3000)
|
770
|
-
and await page.locator("//iframe[@title='reCAPTCHA']").first.is_enabled(timeout=2000)):
|
771
|
-
self.logger.info('Found a captcha')
|
772
|
-
await self._recaptcha_solver(page)
|
773
|
-
except PlaywrightTimeoutError as e:
|
774
|
-
self.logger.info(f'Captcha on {url} is not ready: {e}')
|
775
|
-
except TargetClosedError as e:
|
776
|
-
self.logger.warning(f'Target closed while resolving captcha on {url}: {e}')
|
777
|
-
except Error as e:
|
778
|
-
self.logger.warning(f'Error while resolving captcha on {url}: {e}')
|
779
|
-
except Exception as e:
|
780
|
-
self.logger.exception(f'General error with captcha solving on {url}: {e}')
|
781
|
-
# ======
|
782
|
-
# NOTE: testing
|
783
|
-
# await self.__cloudflare_bypass_attempt(page)
|
784
|
-
self.logger.debug('Done with captcha.')
|
785
|
-
|
786
765
|
# check if we have anything on the page. If we don't, the page is not working properly.
|
787
766
|
if await self._failsafe_get_content(page):
|
788
767
|
self.logger.debug('Got rendered content')
|
789
768
|
|
769
|
+
# ==== recaptcha
|
770
|
+
# Same technique as: https://github.com/NikolaiT/uncaptcha3
|
771
|
+
if CAN_SOLVE_CAPTCHA:
|
772
|
+
try:
|
773
|
+
if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible(timeout=3000)
|
774
|
+
and await page.locator("//iframe[@title='reCAPTCHA']").first.is_enabled(timeout=2000)):
|
775
|
+
self.logger.info('Found a captcha')
|
776
|
+
await self._recaptcha_solver(page)
|
777
|
+
except PlaywrightTimeoutError as e:
|
778
|
+
self.logger.info(f'Captcha on {url} is not ready: {e}')
|
779
|
+
except TargetClosedError as e:
|
780
|
+
self.logger.warning(f'Target closed while resolving captcha on {url}: {e}')
|
781
|
+
except Error as e:
|
782
|
+
self.logger.warning(f'Error while resolving captcha on {url}: {e}')
|
783
|
+
except Exception as e:
|
784
|
+
self.logger.exception(f'General error with captcha solving on {url}: {e}')
|
785
|
+
# ======
|
786
|
+
# NOTE: testing
|
787
|
+
# await self.__cloudflare_bypass_attempt(page)
|
788
|
+
self.logger.debug('Done with captcha.')
|
789
|
+
|
790
790
|
# move mouse
|
791
791
|
await page.mouse.move(x=random.uniform(300, 800), y=random.uniform(200, 500))
|
792
792
|
self.logger.debug('Moved mouse.')
|
@@ -866,8 +866,12 @@ class Capture():
|
|
866
866
|
to_return["downloaded_file"] = mem_zip.getvalue()
|
867
867
|
|
868
868
|
# fast forward 30s
|
869
|
-
|
870
|
-
|
869
|
+
# try:
|
870
|
+
# async with timeout(3):
|
871
|
+
# await page.clock.run_for("47")
|
872
|
+
# self.logger.debug('Moved time forward.')
|
873
|
+
# except TimeoutError:
|
874
|
+
# self.logger.warning('Unable to move time forward.')
|
871
875
|
|
872
876
|
self.logger.debug('Done with instrumentation, waiting for network idle.')
|
873
877
|
await self._wait_for_random_timeout(page, 5) # Wait 5 sec after instrumentation
|
@@ -1078,8 +1082,9 @@ class Capture():
|
|
1078
1082
|
tries = 3
|
1079
1083
|
while tries:
|
1080
1084
|
try:
|
1081
|
-
|
1082
|
-
|
1085
|
+
async with timeout(30):
|
1086
|
+
return await page.content()
|
1087
|
+
except (Error, TimeoutError):
|
1083
1088
|
self.logger.debug('Unable to get page content, trying again.')
|
1084
1089
|
tries -= 1
|
1085
1090
|
await self._wait_for_random_timeout(page, 1)
|
@@ -1225,6 +1230,11 @@ class Capture():
|
|
1225
1230
|
if ': ' in name:
|
1226
1231
|
_, name = name.split(': ', maxsplit=1)
|
1227
1232
|
exception._name = name.strip()
|
1233
|
+
else:
|
1234
|
+
# The format changed in Playwright 1.43.0, the name of the method that failed is set before the exception itself.
|
1235
|
+
if ': ' in exception.message:
|
1236
|
+
_, name = exception.message.split(': ', maxsplit=1)
|
1237
|
+
exception._name = name.strip()
|
1228
1238
|
|
1229
1239
|
def _exception_is_network_error(self, exception: Error) -> bool:
|
1230
1240
|
if exception.name in [
|
@@ -1,9 +1,9 @@
|
|
1
1
|
playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
|
2
|
-
playwrightcapture/capture.py,sha256=
|
2
|
+
playwrightcapture/capture.py,sha256=ANNPmaTgAIDihdqRDXkuc4LBjZeqcA7EAQpr7zXEpww,70047
|
3
3
|
playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
|
4
4
|
playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
|
5
5
|
playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
playwrightcapture-1.25.
|
7
|
-
playwrightcapture-1.25.
|
8
|
-
playwrightcapture-1.25.
|
9
|
-
playwrightcapture-1.25.
|
6
|
+
playwrightcapture-1.25.4.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
|
7
|
+
playwrightcapture-1.25.4.dist-info/METADATA,sha256=gsWFbtAU24Ag1VpP65y6M3tOl5wTy2dWWVNf5AmOETU,3173
|
8
|
+
playwrightcapture-1.25.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
9
|
+
playwrightcapture-1.25.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|