PlaywrightCapture 1.26.3__py3-none-any.whl → 1.27.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,8 @@ from dataclasses import dataclass
17
17
  from io import BytesIO
18
18
  from logging import LoggerAdapter, Logger
19
19
  from tempfile import NamedTemporaryFile
20
- from typing import Any, TypedDict, Literal, TYPE_CHECKING, MutableMapping, Generator
20
+ from typing import Any, TypedDict, Literal, TYPE_CHECKING
21
+ from collections.abc import MutableMapping, Generator
21
22
  from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
22
23
  from zipfile import ZipFile
23
24
 
@@ -38,11 +39,8 @@ from w3lib.url import canonicalize_url, safe_url_string
38
39
 
39
40
  from .exceptions import UnknownPlaywrightBrowser, UnknownPlaywrightDevice, InvalidPlaywrightParameter
40
41
 
41
- if sys.version_info < (3, 9):
42
- from pytz import all_timezones_set
43
- else:
44
- from zoneinfo import available_timezones
45
- all_timezones_set = available_timezones()
42
+ from zoneinfo import available_timezones
43
+ all_timezones_set = available_timezones()
46
44
 
47
45
  if sys.version_info < (3, 11):
48
46
  from async_timeout import timeout
@@ -104,7 +102,7 @@ class PlaywrightCaptureLogAdapter(LoggerAdapter): # type: ignore[type-arg]
104
102
  class PCStealthConfig(StealthConfig): # type: ignore[misc]
105
103
 
106
104
  @property
107
- def enabled_scripts(self) -> Generator[str, None, None]:
105
+ def enabled_scripts(self) -> Generator[str]:
108
106
  self.chrome_app = True
109
107
  self.chrome_csi = True
110
108
  self.chrome_runtime = True
@@ -734,7 +732,7 @@ class Capture():
734
732
  await frame.get_by_label(label).click(timeout=2000)
735
733
  break
736
734
  except (TimeoutError, asyncio.TimeoutError) as e:
737
- self.logger.warning(f'Frame consent timeout: {e}')
735
+ self.logger.warning(f'Consent timeout (label {label}) : {e}')
738
736
 
739
737
  try:
740
738
  async with timeout(5):
@@ -744,9 +742,9 @@ class Capture():
744
742
  await frame.get_by_role("button", name=label).click(timeout=2000)
745
743
  break
746
744
  except (TimeoutError, asyncio.TimeoutError) as e:
747
- self.logger.warning(f'Frame consent timeout: {e}')
745
+ self.logger.warning(f'Frame consent timeout (button {label}): {e}')
748
746
  except Exception as e:
749
- self.logger.info(f'Issue with frame consent: {e}')
747
+ self.logger.info(f'Issue with consent validation: {e}')
750
748
  return got_button
751
749
 
752
750
  async def _move_time_forward(self, page: Page, time: int) -> None:
@@ -827,12 +825,18 @@ class Capture():
827
825
  capturing_sub = False
828
826
  try:
829
827
  page = await self.context.new_page()
830
- await page.clock.install()
831
828
  except Error as e:
832
- self.logger.warning(f'The context is in a broken state: {e}')
829
+ self.logger.warning(f'Unable to create new page, the context is in a broken state: {e}')
833
830
  self.should_retry = True
834
831
  return to_return
835
832
 
833
+ try:
834
+ await page.clock.install()
835
+ clock_set = True
836
+ except Error as e:
837
+ self.logger.warning(f'Unable to install the clock: {e}')
838
+ clock_set = False
839
+
836
840
  if allow_tracking:
837
841
  # Add authorization clickthroughs
838
842
  await self.__dialog_didomi_clickthrough(page)
@@ -900,8 +904,7 @@ class Capture():
900
904
  await page.bring_to_front()
901
905
  self.logger.debug('Page moved to front.')
902
906
  except Error as e:
903
- self.logger.warning('Page in a broken state.')
904
- raise e
907
+ self.logger.warning(f'Unable to bring the page to the front: {e}.')
905
908
 
906
909
  # page instrumentation
907
910
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after document loaded
@@ -971,7 +974,8 @@ class Capture():
971
974
  self.logger.debug('Got button on main frame')
972
975
  await self._wait_for_random_timeout(page, 10) # Wait 10 sec after click
973
976
 
974
- await self._move_time_forward(page, 10)
977
+ if clock_set:
978
+ await self._move_time_forward(page, 10)
975
979
 
976
980
  if parsed_url.fragment:
977
981
  # We got a fragment, make sure we go to it and scroll only a little bit.
@@ -1039,8 +1043,9 @@ class Capture():
1039
1043
  z.writestr(f'{i}_{filename}', file_content)
1040
1044
  to_return["downloaded_file"] = mem_zip.getvalue()
1041
1045
 
1042
- # fast forward ~30s
1043
- await self._move_time_forward(page, 30)
1046
+ if clock_set:
1047
+ # fast forward ~30s
1048
+ await self._move_time_forward(page, 30)
1044
1049
 
1045
1050
  self.logger.debug('Done with instrumentation, waiting for network idle.')
1046
1051
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after instrumentation
@@ -1462,6 +1467,7 @@ class Capture():
1462
1467
  'net::ERR_CONNECTION_TIMED_OUT',
1463
1468
  'net::ERR_HTTP_RESPONSE_CODE_FAILURE',
1464
1469
  'net::ERR_HTTP2_PROTOCOL_ERROR',
1470
+ 'net::ERR_INVALID_HTTP_RESPONSE',
1465
1471
  'net::ERR_INVALID_REDIRECT',
1466
1472
  'net::ERR_NAME_NOT_RESOLVED',
1467
1473
  'net::ERR_NETWORK_ACCESS_DENIED',
@@ -1,18 +1,17 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.26.3
3
+ Version: 1.27.1
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
7
7
  Author: Raphaël Vinot
8
8
  Author-email: raphael.vinot@circl.lu
9
- Requires-Python: >=3.8,<4.0
9
+ Requires-Python: >=3.9,<4.0
10
10
  Classifier: Environment :: Console
11
11
  Classifier: Intended Audience :: Science/Research
12
12
  Classifier: Intended Audience :: Telecommunications Industry
13
13
  Classifier: License :: OSI Approved :: BSD License
14
14
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.8
16
15
  Classifier: Programming Language :: Python :: 3.9
17
16
  Classifier: Programming Language :: Python :: 3.10
18
17
  Classifier: Programming Language :: Python :: 3.11
@@ -21,11 +20,9 @@ Classifier: Programming Language :: Python :: 3.13
21
20
  Classifier: Topic :: Internet
22
21
  Classifier: Topic :: Security
23
22
  Provides-Extra: recaptcha
24
- Requires-Dist: SpeechRecognition (<3.11) ; (python_version < "3.9") and (extra == "recaptcha")
25
- Requires-Dist: SpeechRecognition (>=3.11) ; (python_version >= "3.9") and (extra == "recaptcha")
23
+ Requires-Dist: SpeechRecognition (>=3.11.0) ; extra == "recaptcha"
26
24
  Requires-Dist: aiohttp-socks (>=0.9,<0.10)
27
- Requires-Dist: aiohttp[speedups] (<3.11) ; python_version < "3.9"
28
- Requires-Dist: aiohttp[speedups] (>=3.10.10,<4.0.0) ; python_version >= "3.9"
25
+ Requires-Dist: aiohttp[speedups] (>=3.10.10,<4.0.0)
29
26
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
30
27
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
31
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
@@ -33,8 +30,7 @@ Requires-Dist: playwright (>=1.48.0,<2.0.0)
33
30
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
34
31
  Requires-Dist: puremagic (>=1.28,<2.0)
35
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
36
- Requires-Dist: pytz (>=2024.2,<2025.0) ; python_version < "3.9"
37
- Requires-Dist: setuptools (>=75.2.0,<76.0.0)
33
+ Requires-Dist: setuptools (>=75.3.0,<76.0.0)
38
34
  Requires-Dist: tzdata (>=2024.2,<2025.0)
39
35
  Requires-Dist: w3lib (>=2.2.1,<3.0.0)
40
36
  Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
@@ -1,9 +1,9 @@
1
1
  playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=hUslnglyM37dnf7tuUSsPXAl1r3hTzbpiio6L8KQGcs,80175
2
+ playwrightcapture/capture.py,sha256=Tq0ijQdMU4DOF_d_INPasVm84tkW2MNXOuRVZtOgep4,80475
3
3
  playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
4
  playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
5
  playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- playwrightcapture-1.26.3.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
- playwrightcapture-1.26.3.dist-info/METADATA,sha256=AbPtgWXp9mL734pi894RRPEvj8h5a7nyjbOfaCz16NY,3433
8
- playwrightcapture-1.26.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
9
- playwrightcapture-1.26.3.dist-info/RECORD,,
6
+ playwrightcapture-1.27.1.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
+ playwrightcapture-1.27.1.dist-info/METADATA,sha256=gSnwJd2Ra0uyxdaNX-2zbCR6zaUY3le2SS4XP41rJwk,3102
8
+ playwrightcapture-1.27.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
9
+ playwrightcapture-1.27.1.dist-info/RECORD,,