PlaywrightCapture 1.31.2__py3-none-any.whl → 1.31.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,7 +118,8 @@ class Capture():
118
118
  proxy: str | dict[str, str] | None=None,
119
119
  socks5_dns_resolver: str | list[str] | None=None,
120
120
  general_timeout_in_sec: int | None=None, loglevel: str | int='INFO',
121
- uuid: str | None=None, headless: bool=True):
121
+ uuid: str | None=None, headless: bool=True,
122
+ *, init_script: str | None=None):
122
123
  """Captures a page with Playwright.
123
124
 
124
125
  :param browser: The browser to use for the capture.
@@ -129,6 +130,7 @@ class Capture():
129
130
  :param loglevel: Python loglevel
130
131
  :param uuid: The UUID of the capture.
131
132
  :param headless: Whether to run the browser in headless mode. WARNING: requires to run in a graphical environment.
133
+ :param init_script: An optional JavaScript that will be executed on each page - See https://playwright.dev/python/docs/api/class-browsercontext#browser-context-add-init-script
132
134
  """
133
135
  master_logger = logging.getLogger('playwrightcapture')
134
136
  master_logger.setLevel(loglevel)
@@ -179,6 +181,8 @@ class Capture():
179
181
  self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
180
182
  self._java_script_enabled = True
181
183
 
184
+ self._init_script = init_script
185
+
182
186
  def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
183
187
  splitted = urlsplit(proxy)
184
188
  if splitted.username and splitted.password:
@@ -460,6 +464,9 @@ class Capture():
460
464
  )
461
465
  self.context.set_default_timeout(self._capture_timeout * 1000)
462
466
 
467
+ if self._init_script:
468
+ await self.context.add_init_script(script=self._init_script)
469
+
463
470
  # very quick and dirty get a platform from the UA so it's not always Win32
464
471
  # This this is deprecated and not very important.
465
472
  # Ref: https://developer.mozilla.org/en-US/docs/Web/API/Navigator/platform
@@ -823,7 +830,7 @@ class Capture():
823
830
  # Same technique as: https://github.com/NikolaiT/uncaptcha3
824
831
  if CAN_SOLVE_CAPTCHA:
825
832
  try:
826
- if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible(timeout=3000)
833
+ if (await page.locator("//iframe[@title='reCAPTCHA']").first.is_visible()
827
834
  and await page.locator("//iframe[@title='reCAPTCHA']").first.is_enabled(timeout=2000)):
828
835
  self.logger.info('Found a captcha')
829
836
  await self._recaptcha_solver(page)
@@ -1354,7 +1361,11 @@ class Capture():
1354
1361
  return href
1355
1362
 
1356
1363
  urls: set[str] = set()
1357
- soup = BeautifulSoup(rendered_html, "lxml")
1364
+ try:
1365
+ soup = BeautifulSoup(rendered_html, "lxml")
1366
+ except Exception as e:
1367
+ self.logger.info(f'Unable to parse HTML: {e}')
1368
+ soup = BeautifulSoup(rendered_html, "html.parser")
1358
1369
 
1359
1370
  rendered_hostname = urlparse(rendered_url).hostname
1360
1371
  # The simple ones: the links.
@@ -1387,7 +1398,7 @@ class Capture():
1387
1398
  if not recaptcha_init_frame:
1388
1399
  return False
1389
1400
  try:
1390
- if await recaptcha_init_frame.get_by_role("checkbox", name="I'm not a robot").is_visible(timeout=5000):
1401
+ if await recaptcha_init_frame.get_by_role("checkbox", name="I'm not a robot").is_visible():
1391
1402
  await recaptcha_init_frame.get_by_role("checkbox", name="I'm not a robot").click()
1392
1403
  else:
1393
1404
  self.logger.info('Checkbox not visible.')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: PlaywrightCapture
3
- Version: 1.31.2
3
+ Version: 1.31.4
4
4
  Summary: A simple library to capture websites using playwright
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -20,14 +20,14 @@ Classifier: Topic :: Security
20
20
  Provides-Extra: recaptcha
21
21
  Requires-Dist: SpeechRecognition (>=3.14.3) ; extra == "recaptcha"
22
22
  Requires-Dist: aiohttp-socks (>=0.10.1)
23
- Requires-Dist: aiohttp[speedups] (>=3.12.13)
23
+ Requires-Dist: aiohttp[speedups] (>=3.12.14)
24
24
  Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
25
25
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)
26
26
  Requires-Dist: dateparser (>=1.2.2)
27
27
  Requires-Dist: dnspython (>=2.7.0,<3.0.0)
28
28
  Requires-Dist: playwright (>=1.53.0)
29
29
  Requires-Dist: playwright-stealth (>=2)
30
- Requires-Dist: puremagic (>=1.29)
30
+ Requires-Dist: puremagic (>=1.30)
31
31
  Requires-Dist: pydub (>=0.25.1) ; (python_version < "3.10") and (extra == "recaptcha")
32
32
  Requires-Dist: pydub-ng (>=0.2.0) ; (python_version >= "3.10") and (extra == "recaptcha")
33
33
  Requires-Dist: python-socks (>=2.7.1,<3.0.0)
@@ -1,10 +1,10 @@
1
1
  playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=lXcBWWEWRmQem6Rs_yj51PB74JFlRL4k1kggFCMxFTA,86586
2
+ playwrightcapture/capture.py,sha256=qWDwQt7pKOr3fQUhidY_U9bO8cIIOSiqSaTNxUdkT3M,87111
3
3
  playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
4
  playwrightcapture/helpers.py,sha256=Xqs09zHhzAWnpBtQ0A9YAxg80P3Lj7aBj5M2WuEr0so,1843
5
5
  playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  playwrightcapture/socks5dnslookup.py,sha256=ZpOf8tgsRQZi-WDcn9JbbG1bKz9DSfK_jz1l53UI1Ho,4058
7
- playwrightcapture-1.31.2.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
8
- playwrightcapture-1.31.2.dist-info/METADATA,sha256=6JDSFOTJaWk1oER6gis-AbR-vKAaX3vXwOPa9sV-bFA,3285
9
- playwrightcapture-1.31.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
- playwrightcapture-1.31.2.dist-info/RECORD,,
7
+ playwrightcapture-1.31.4.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
8
+ playwrightcapture-1.31.4.dist-info/METADATA,sha256=y6QEbPdB1201g3Z81no658PsoD5OoxdyT81LjDCk3uA,3285
9
+ playwrightcapture-1.31.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
+ playwrightcapture-1.31.4.dist-info/RECORD,,