PlaywrightCapture 1.31.3__py3-none-any.whl → 1.31.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,7 +118,8 @@ class Capture():
118
118
  proxy: str | dict[str, str] | None=None,
119
119
  socks5_dns_resolver: str | list[str] | None=None,
120
120
  general_timeout_in_sec: int | None=None, loglevel: str | int='INFO',
121
- uuid: str | None=None, headless: bool=True):
121
+ uuid: str | None=None, headless: bool=True,
122
+ *, init_script: str | None=None):
122
123
  """Captures a page with Playwright.
123
124
 
124
125
  :param browser: The browser to use for the capture.
@@ -129,6 +130,7 @@ class Capture():
129
130
  :param loglevel: Python loglevel
130
131
  :param uuid: The UUID of the capture.
131
132
  :param headless: Whether to run the browser in headless mode. WARNING: requires to run in a graphical environment.
133
+ :param init_script: An optional JavaScript that will be executed on each page - See https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-init-script
132
134
  """
133
135
  master_logger = logging.getLogger('playwrightcapture')
134
136
  master_logger.setLevel(loglevel)
@@ -179,6 +181,8 @@ class Capture():
179
181
  self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
180
182
  self._java_script_enabled = True
181
183
 
184
+ self._init_script = init_script
185
+
182
186
  def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
183
187
  splitted = urlsplit(proxy)
184
188
  if splitted.username and splitted.password:
@@ -460,6 +464,9 @@ class Capture():
460
464
  )
461
465
  self.context.set_default_timeout(self._capture_timeout * 1000)
462
466
 
467
+ if self._init_script:
468
+ await self.context.add_init_script(script=self._init_script)
469
+
463
470
  # very quick and dirty get a platform from the UA so it's not always Win32
464
471
  # This this is deprecated and not very important.
465
472
  # Ref: https://developer.mozilla.org/en-US/docs/Web/API/Navigator/platform
@@ -1354,7 +1361,11 @@ class Capture():
1354
1361
  return href
1355
1362
 
1356
1363
  urls: set[str] = set()
1357
- soup = BeautifulSoup(rendered_html, "lxml")
1364
+ try:
1365
+ soup = BeautifulSoup(rendered_html, "lxml")
1366
+ except Exception as e:
1367
+ self.logger.info(f'Unable to parse HTML: {e}')
1368
+ soup = BeautifulSoup(rendered_html, "html.parser")
1358
1369
 
1359
1370
  rendered_hostname = urlparse(rendered_url).hostname
1360
1371
  # The simple ones: the links.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: PlaywrightCapture
3
- Version: 1.31.3
3
+ Version: 1.31.4
4
4
  Summary: A simple library to capture websites using playwright
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -20,14 +20,14 @@ Classifier: Topic :: Security
20
20
  Provides-Extra: recaptcha
21
21
  Requires-Dist: SpeechRecognition (>=3.14.3) ; extra == "recaptcha"
22
22
  Requires-Dist: aiohttp-socks (>=0.10.1)
23
- Requires-Dist: aiohttp[speedups] (>=3.12.13)
23
+ Requires-Dist: aiohttp[speedups] (>=3.12.14)
24
24
  Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
25
25
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)
26
26
  Requires-Dist: dateparser (>=1.2.2)
27
27
  Requires-Dist: dnspython (>=2.7.0,<3.0.0)
28
28
  Requires-Dist: playwright (>=1.53.0)
29
29
  Requires-Dist: playwright-stealth (>=2)
30
- Requires-Dist: puremagic (>=1.29)
30
+ Requires-Dist: puremagic (>=1.30)
31
31
  Requires-Dist: pydub (>=0.25.1) ; (python_version < "3.10") and (extra == "recaptcha")
32
32
  Requires-Dist: pydub-ng (>=0.2.0) ; (python_version >= "3.10") and (extra == "recaptcha")
33
33
  Requires-Dist: python-socks (>=2.7.1,<3.0.0)
@@ -1,10 +1,10 @@
1
1
  playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=p5N16ymdpSXDu738YGtn_KjYChzcqtkqWMyTNQfbhKU,86562
2
+ playwrightcapture/capture.py,sha256=qWDwQt7pKOr3fQUhidY_U9bO8cIIOSiqSaTNxUdkT3M,87111
3
3
  playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
4
  playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
5
5
  playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  playwrightcapture/socks5dnslookup.py,sha256=ZpOf8tgsRQZi-WDcn9JbbG1bKz9DSfK_jz1l53UI1Ho,4058
7
- playwrightcapture-1.31.3.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
8
- playwrightcapture-1.31.3.dist-info/METADATA,sha256=p6R6YfXdVpPfpNMBIljl4v_Lu30N6ZdZ-RC3UGa83OY,3285
9
- playwrightcapture-1.31.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
- playwrightcapture-1.31.3.dist-info/RECORD,,
7
+ playwrightcapture-1.31.4.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
8
+ playwrightcapture-1.31.4.dist-info/METADATA,sha256=y6QEbPdB1201g3Z81no658PsoD5OoxdyT81LjDCk3uA,3285
9
+ playwrightcapture-1.31.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
+ playwrightcapture-1.31.4.dist-info/RECORD,,