PlaywrightCapture 1.27.0__tar.gz → 1.27.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.27.0
3
+ Version: 1.27.2
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -22,15 +22,15 @@ Classifier: Topic :: Security
22
22
  Provides-Extra: recaptcha
23
23
  Requires-Dist: SpeechRecognition (>=3.11.0) ; extra == "recaptcha"
24
24
  Requires-Dist: aiohttp-socks (>=0.9,<0.10)
25
- Requires-Dist: aiohttp[speedups] (>=3.10.10,<4.0.0)
25
+ Requires-Dist: aiohttp[speedups] (>=3.11.7,<4.0.0)
26
26
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
27
27
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
29
- Requires-Dist: playwright (>=1.48.0,<2.0.0)
29
+ Requires-Dist: playwright (>=1.49.0,<2.0.0)
30
30
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
31
  Requires-Dist: puremagic (>=1.28,<2.0)
32
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
33
- Requires-Dist: setuptools (>=75.3.0,<76.0.0)
33
+ Requires-Dist: setuptools (>=75.6.0,<76.0.0)
34
34
  Requires-Dist: tzdata (>=2024.2,<2025.0)
35
35
  Requires-Dist: w3lib (>=2.2.1,<3.0.0)
36
36
  Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
@@ -732,7 +732,7 @@ class Capture():
732
732
  await frame.get_by_label(label).click(timeout=2000)
733
733
  break
734
734
  except (TimeoutError, asyncio.TimeoutError) as e:
735
- self.logger.warning(f'Frame consent timeout: {e}')
735
+ self.logger.warning(f'Consent timeout (label {label}) : {e}')
736
736
 
737
737
  try:
738
738
  async with timeout(5):
@@ -742,9 +742,9 @@ class Capture():
742
742
  await frame.get_by_role("button", name=label).click(timeout=2000)
743
743
  break
744
744
  except (TimeoutError, asyncio.TimeoutError) as e:
745
- self.logger.warning(f'Frame consent timeout: {e}')
745
+ self.logger.warning(f'Frame consent timeout (button {label}): {e}')
746
746
  except Exception as e:
747
- self.logger.info(f'Issue with frame consent: {e}')
747
+ self.logger.info(f'Issue with consent validation: {e}')
748
748
  return got_button
749
749
 
750
750
  async def _move_time_forward(self, page: Page, time: int) -> None:
@@ -825,12 +825,18 @@ class Capture():
825
825
  capturing_sub = False
826
826
  try:
827
827
  page = await self.context.new_page()
828
- await page.clock.install()
829
828
  except Error as e:
830
- self.logger.warning(f'The context is in a broken state: {e}')
829
+ self.logger.warning(f'Unable to create new page, the context is in a broken state: {e}')
831
830
  self.should_retry = True
832
831
  return to_return
833
832
 
833
+ try:
834
+ await page.clock.install()
835
+ clock_set = True
836
+ except Error as e:
837
+ self.logger.warning(f'Unable to install the clock: {e}')
838
+ clock_set = False
839
+
834
840
  if allow_tracking:
835
841
  # Add authorization clickthroughs
836
842
  await self.__dialog_didomi_clickthrough(page)
@@ -898,8 +904,7 @@ class Capture():
898
904
  await page.bring_to_front()
899
905
  self.logger.debug('Page moved to front.')
900
906
  except Error as e:
901
- self.logger.warning('Page in a broken state.')
902
- raise e
907
+ self.logger.warning(f'Unable to bring the page to the front: {e}.')
903
908
 
904
909
  # page instrumentation
905
910
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after document loaded
@@ -969,7 +974,8 @@ class Capture():
969
974
  self.logger.debug('Got button on main frame')
970
975
  await self._wait_for_random_timeout(page, 10) # Wait 10 sec after click
971
976
 
972
- await self._move_time_forward(page, 10)
977
+ if clock_set:
978
+ await self._move_time_forward(page, 10)
973
979
 
974
980
  if parsed_url.fragment:
975
981
  # We got a fragment, make sure we go to it and scroll only a little bit.
@@ -1037,8 +1043,9 @@ class Capture():
1037
1043
  z.writestr(f'{i}_{filename}', file_content)
1038
1044
  to_return["downloaded_file"] = mem_zip.getvalue()
1039
1045
 
1040
- # fast forward ~30s
1041
- await self._move_time_forward(page, 30)
1046
+ if clock_set:
1047
+ # fast forward ~30s
1048
+ await self._move_time_forward(page, 30)
1042
1049
 
1043
1050
  self.logger.debug('Done with instrumentation, waiting for network idle.')
1044
1051
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after instrumentation
@@ -1460,6 +1467,7 @@ class Capture():
1460
1467
  'net::ERR_CONNECTION_TIMED_OUT',
1461
1468
  'net::ERR_HTTP_RESPONSE_CODE_FAILURE',
1462
1469
  'net::ERR_HTTP2_PROTOCOL_ERROR',
1470
+ 'net::ERR_INVALID_HTTP_RESPONSE',
1463
1471
  'net::ERR_INVALID_REDIRECT',
1464
1472
  'net::ERR_NAME_NOT_RESOLVED',
1465
1473
  'net::ERR_NETWORK_ACCESS_DENIED',
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "PlaywrightCapture"
3
- version = "1.27.0"
3
+ version = "1.27.2"
4
4
  description = "A simple library to capture websites using playwright"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -19,7 +19,7 @@ classifiers=[
19
19
 
20
20
  [tool.poetry.dependencies]
21
21
  python = "^3.9"
22
- playwright = "^1.48.0"
22
+ playwright = "^1.49.0"
23
23
  dateparser = "^1.2.0"
24
24
  beautifulsoup4 = {version= "^4.12.3", extras = ["lxml", "charset_normalizer"]}
25
25
  w3lib = "^2.2.1"
@@ -27,10 +27,10 @@ pydub = {version = "^0.25.1", optional = true}
27
27
  SpeechRecognition = {version = ">=3.11.0", optional = true}
28
28
  tzdata = "^2024.2"
29
29
  playwright-stealth = "^1.0.6"
30
- setuptools = "^75.3.0"
30
+ setuptools = "^75.6.0"
31
31
  puremagic = "^1.28"
32
32
  async-timeout = {version = "^4.0.3", python = "<3.11"}
33
- aiohttp = {version = "^3.10.10", extras = ["speedups"]}
33
+ aiohttp = {version = "^3.11.7", extras = ["speedups"]}
34
34
  aiohttp-socks = "^0.9"
35
35
 
36
36
  [tool.poetry.extras]