PlaywrightCapture 1.25.1__tar.gz → 1.25.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.25.1
3
+ Version: 1.25.3
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -28,7 +28,7 @@ Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
29
29
  Requires-Dist: playwright (>=1.45.0,<2.0.0)
30
30
  Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
- Requires-Dist: puremagic (>=1.25,<2.0)
31
+ Requires-Dist: puremagic (>=1.26,<2.0)
32
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
33
33
  Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
34
34
  Requires-Dist: setuptools (>=70.3.0,<71.0.0)
@@ -32,7 +32,7 @@ from playwright._impl._errors import TargetClosedError
32
32
  from playwright.async_api import async_playwright, Frame, Error, Page, Download, Request
33
33
  from playwright.async_api import TimeoutError as PlaywrightTimeoutError
34
34
  from playwright_stealth import stealth_async, StealthConfig # type: ignore[import-untyped]
35
- from puremagic import PureError, from_string # type: ignore[import-untyped]
35
+ from puremagic import PureError, from_string
36
36
  from w3lib.html import strip_html5_whitespace
37
37
  from w3lib.url import canonicalize_url, safe_url_string
38
38
 
@@ -683,6 +683,7 @@ class Capture():
683
683
  try:
684
684
  page = await self.context.new_page()
685
685
  await page.clock.install()
686
+ page.on("dialog", lambda dialog: dialog.accept())
686
687
  except Error as e:
687
688
  self.logger.warning(f'The context is in a broken state: {e}')
688
689
  self.should_retry = True
@@ -805,12 +806,6 @@ class Capture():
805
806
  except Exception as e:
806
807
  self.logger.warning(f'Could not find body: {e}')
807
808
 
808
- # fast forward 30s
809
- await page.clock.run_for(10000)
810
- await page.clock.resume()
811
- await self._wait_for_random_timeout(page, 5) # Wait 5 sec
812
- self.logger.warning('Moved time forward.')
813
-
814
809
  if parsed_url.fragment:
815
810
  # We got a fragment, make sure we go to it and scroll only a little bit.
816
811
  fragment = unquote(parsed_url.fragment)
@@ -870,6 +865,10 @@ class Capture():
870
865
  z.writestr(f'{i}_{filename}', file_content)
871
866
  to_return["downloaded_file"] = mem_zip.getvalue()
872
867
 
868
+ # fast forward 30s
869
+ await page.clock.run_for("30")
870
+ self.logger.debug('Moved time forward.')
871
+
873
872
  self.logger.debug('Done with instrumentation, waiting for network idle.')
874
873
  await self._wait_for_random_timeout(page, 5) # Wait 5 sec after instrumentation
875
874
  await self._safe_wait(page)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "PlaywrightCapture"
3
- version = "1.25.1"
3
+ version = "1.25.3"
4
4
  description = "A simple library to capture websites using playwright"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -29,7 +29,7 @@ pytz = {"version" = "^2024.1", python = "<3.9"}
29
29
  tzdata = "^2024.1"
30
30
  playwright-stealth = "^1.0.6"
31
31
  setuptools = "^70.3.0"
32
- puremagic = "^1.25"
32
+ puremagic = "^1.26"
33
33
  async-timeout = {version = "^4.0.3", python = "<3.11"}
34
34
  aiohttp = {extras = ["speedups"], version = "^3.9.5"}
35
35
  aiohttp-socks = "^0.8.4"