PlaywrightCapture 1.23.12__tar.gz → 1.23.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.23.12
3
+ Version: 1.23.13
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -122,7 +122,7 @@ class Capture():
122
122
  'password': proxy.get('password', '')}
123
123
 
124
124
  self.should_retry: bool = False
125
- self.__network_not_idle: int = 1
125
+ self.__network_not_idle: int = 2 # makes sure we do not wait for network idle the max amount of time the capture is allowed to take
126
126
  self._cookies: list[SetCookieParam] = []
127
127
  self._http_credentials: HttpCredentials = {}
128
128
  self._geolocation: Geolocation = {}
@@ -556,6 +556,8 @@ class Capture():
556
556
  return
557
557
  try:
558
558
  if response := await request.response():
559
+ if got_favicons:
560
+ return
559
561
  if response.ok:
560
562
  try:
561
563
  if body := await response.body():
@@ -648,8 +650,8 @@ class Capture():
648
650
  # Same technique as: https://github.com/NikolaiT/uncaptcha3
649
651
  if CAN_SOLVE_CAPTCHA:
650
652
  try:
651
- if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible(timeout=5000)
652
- and await page.locator("//iframe[@title='reCAPTCHA']").first.is_enabled(timeout=5000)):
653
+ if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible(timeout=3000)
654
+ and await page.locator("//iframe[@title='reCAPTCHA']").first.is_enabled(timeout=2000)):
653
655
  self.logger.info('Found a captcha')
654
656
  await self._recaptcha_solver(page)
655
657
  except PlaywrightTimeoutError as e:
@@ -678,7 +680,7 @@ class Capture():
678
680
  # We got a fragment, make sure we go to it and scroll only a little bit.
679
681
  fragment = unquote(parsed_url.fragment)
680
682
  try:
681
- await page.locator(f'id={fragment}').first.scroll_into_view_if_needed(timeout=5000)
683
+ await page.locator(f'id={fragment}').first.scroll_into_view_if_needed(timeout=3000)
682
684
  await self._safe_wait(page)
683
685
  await page.mouse.wheel(delta_y=random.uniform(150, 300), delta_x=0)
684
686
  self.logger.debug('Jumped to fragment.')
@@ -724,6 +726,7 @@ class Capture():
724
726
  to_return['potential_favicons'] = self.get_favicons(page.url, to_return['html'])
725
727
  got_favicons = True
726
728
 
729
+ await self._safe_wait(page)
727
730
  to_return['png'] = await self._failsafe_get_screenshot(page)
728
731
 
729
732
  if self.wait_for_download > 0:
@@ -857,7 +860,7 @@ class Capture():
857
860
 
858
861
  async def _failsafe_get_screenshot(self, page: Page) -> bytes:
859
862
  try:
860
- return await page.screenshot(full_page=True, timeout=10000)
863
+ return await page.screenshot(full_page=True, timeout=5000)
861
864
  except Error as e:
862
865
  self.logger.info(f"Capturing a screenshot of the full page failed, trying to scale it down: {e}")
863
866
 
@@ -875,7 +878,7 @@ class Capture():
875
878
  async def _safe_wait(self, page: Page) -> None:
876
879
  try:
877
880
  # If we don't have networkidle relatively quick, it's probably because we're playing a video.
878
- await page.wait_for_load_state('networkidle', timeout=10000 / self.__network_not_idle)
881
+ await page.wait_for_load_state('networkidle', timeout=self._capture_timeout / self.__network_not_idle)
879
882
  except PlaywrightTimeoutError:
880
883
  # Network never idle, keep going
881
884
  self.__network_not_idle += 1
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "PlaywrightCapture"
3
- version = "1.23.12"
3
+ version = "1.23.13"
4
4
  description = "A simple library to capture websites using playwright"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"