PlaywrightCapture 1.25.9__py3-none-any.whl → 1.25.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -96,28 +96,40 @@ class PlaywrightCaptureLogAdapter(LoggerAdapter): # type: ignore[type-arg]
96
96
  return msg, kwargs
97
97
 
98
98
 
99
+ # good test pages:
100
+ # https://bot.incolumitas.com/
101
+ # https://fingerprintjs.github.io/BotD/main/
102
+
99
103
  @dataclass
100
104
  class PCStealthConfig(StealthConfig): # type: ignore[misc]
101
105
 
102
106
  @property
103
107
  def enabled_scripts(self) -> Generator[str, None, None]:
104
- self.webdriver = True
105
- self.webgl_vendor = True
106
108
  self.chrome_app = True
107
109
  self.chrome_csi = True
108
- self.chrome_load_times = True
109
110
  self.chrome_runtime = True
111
+ self.chrome_load_times = True
112
+ self.navigator_plugins = True
113
+ self.hairline = True
110
114
  self.iframe_content_window = True
111
115
  self.media_codecs = True
116
+
117
+ # permissions are handled directly in playwright
118
+ self.navigator_permissions = False
119
+ # Platform is correct now
120
+ self.navigator_platform = False
121
+ # probably useless, but it will fallback to 4 regardless
112
122
  self.navigator_hardware_concurrency = 4
123
+ # Webgl vendor is correct now
124
+ self.webgl_vendor = False
125
+ # Set by the viewport
126
+ self.outerdimensions = False
127
+
128
+ # Not working with Playwright 1.45+
113
129
  self.navigator_languages = False # Causes issue
114
- self.navigator_permissions = True
115
- self.navigator_platform = True
116
- self.navigator_plugins = True
117
130
  self.navigator_user_agent = False # Causes issues
118
131
  self.navigator_vendor = False # Causes issues
119
- self.outerdimensions = True
120
- self.hairline = True
132
+
121
133
  yield from super().enabled_scripts
122
134
 
123
135
 
@@ -184,7 +196,7 @@ class Capture():
184
196
  self._viewport: ViewportSize | None = None
185
197
  self._user_agent: str = ''
186
198
  self._timezone_id: str = ''
187
- self._locale: str = ''
199
+ self._locale: str = 'en-US'
188
200
  self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None
189
201
 
190
202
  def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
@@ -462,21 +474,22 @@ class Capture():
462
474
  # NOTE: Which perms are supported by which browsers varies
463
475
  # See https://github.com/microsoft/playwright/issues/16577
464
476
  chromium_permissions = [
465
- 'geolocation',
466
- 'midi',
467
- 'midi-sysex',
468
- 'notifications',
469
- 'camera',
470
- 'microphone',
471
- 'background-sync',
472
- 'ambient-light-sensor',
473
477
  'accelerometer',
474
- 'gyroscope',
475
- 'magnetometer',
476
478
  'accessibility-events',
479
+ 'ambient-light-sensor',
480
+ 'background-sync',
481
+ 'camera',
477
482
  'clipboard-read',
478
483
  'clipboard-write',
479
- 'payment-handler'
484
+ 'geolocation',
485
+ 'gyroscope',
486
+ 'magnetometer',
487
+ 'microphone',
488
+ 'midi-sysex',
489
+ 'midi',
490
+ 'notifications',
491
+ 'payment-handler',
492
+ 'storage-access'
480
493
  ]
481
494
 
482
495
  firefox_permissions = ['geolocation', 'notifications']
@@ -720,7 +733,6 @@ class Capture():
720
733
  await self.__dialog_clickthrough(page)
721
734
 
722
735
  await stealth_async(page, PCStealthConfig())
723
- # await stealth_async(page)
724
736
 
725
737
  page.set_default_timeout((self._capture_timeout - 2) * 1000)
726
738
  # trigger a callback on each request to store it in a dict indexed by URL to get it back from the favicon fetcher
@@ -799,6 +811,8 @@ class Capture():
799
811
  self.logger.warning(f'Target closed while resolving captcha on {url}: {e}')
800
812
  except Error as e:
801
813
  self.logger.warning(f'Error while resolving captcha on {url}: {e}')
814
+ except (TimeoutError, asyncio.TimeoutError) as e:
815
+ self.logger.warning(f'[Timeout] Error while resolving captcha on {url}: {e}')
802
816
  except Exception as e:
803
817
  self.logger.exception(f'General error with captcha solving on {url}: {e}')
804
818
  # ======
@@ -905,8 +919,13 @@ class Capture():
905
919
  to_return['html'] = content
906
920
 
907
921
  if 'html' in to_return and to_return['html'] is not None and with_favicon:
908
- to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
909
- got_favicons = True
922
+ try:
923
+ to_return['potential_favicons'] = await self.get_favicons(page.url, to_return['html'])
924
+ got_favicons = True
925
+ except (TimeoutError, asyncio.TimeoutError) as e:
926
+ self.logger.warning(f'[Timeout] Unable to get favicons: {e}')
927
+ except Exception as e:
928
+ self.logger.warning(f'Unable to get favicons: {e}')
910
929
 
911
930
  to_return['last_redirected_url'] = page.url
912
931
  to_return['png'] = await self._failsafe_get_screenshot(page)
@@ -1209,7 +1228,8 @@ class Capture():
1209
1228
  if self.proxy and self.proxy.get('server'):
1210
1229
  connector = ProxyConnector.from_url(self.proxy['server'])
1211
1230
 
1212
- async with aiohttp.ClientSession(connector=connector) as session:
1231
+ timeout = aiohttp.ClientTimeout(total=10)
1232
+ async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
1213
1233
  while True:
1214
1234
  try:
1215
1235
  href = await main_frame.get_by_role("link", name="Alternatively, download audio as MP3").get_attribute("href")
@@ -1219,7 +1239,7 @@ class Capture():
1219
1239
  if not href:
1220
1240
  self.logger.warning('Unable to find download link for captcha.')
1221
1241
  return False
1222
- async with session.get(href, timeout=10, ssl=False) as response:
1242
+ async with session.get(href, ssl=False) as response:
1223
1243
  response.raise_for_status()
1224
1244
  mp3_content = await response.read()
1225
1245
  with NamedTemporaryFile() as mp3_file, NamedTemporaryFile() as wav_file:
@@ -1417,7 +1437,8 @@ class Capture():
1417
1437
  return set()
1418
1438
  to_fetch, to_return = extracted_favicons
1419
1439
  to_fetch.add('/favicon.ico')
1420
- async with aiohttp.ClientSession(connector=connector) as session:
1440
+ timeout = aiohttp.ClientTimeout(total=10)
1441
+ async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
1421
1442
  session.headers['user-agent'] = self.user_agent
1422
1443
  for u in to_fetch:
1423
1444
  try:
@@ -1427,7 +1448,7 @@ class Capture():
1427
1448
  if url_to_fetch in self._requests:
1428
1449
  favicon = self._requests[url_to_fetch]
1429
1450
  if not favicon:
1430
- async with session.get(url_to_fetch, timeout=5, ssl=False) as favicon_response:
1451
+ async with session.get(url_to_fetch, ssl=False) as favicon_response:
1431
1452
  favicon_response.raise_for_status()
1432
1453
  favicon = await favicon_response.read()
1433
1454
  if favicon:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PlaywrightCapture
3
- Version: 1.25.9
3
+ Version: 1.25.10
4
4
  Summary: A simple library to capture websites using playwright
5
5
  Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
6
  License: BSD-3-Clause
@@ -21,8 +21,8 @@ Classifier: Topic :: Internet
21
21
  Classifier: Topic :: Security
22
22
  Provides-Extra: recaptcha
23
23
  Requires-Dist: SpeechRecognition (>=3.10.4,<4.0.0) ; extra == "recaptcha"
24
- Requires-Dist: aiohttp-socks (>=0.8.4,<0.9.0)
25
- Requires-Dist: aiohttp[speedups] (>=3.9.5,<4.0.0)
24
+ Requires-Dist: aiohttp-socks (>=0.9,<0.10)
25
+ Requires-Dist: aiohttp[speedups] (>=3.10.1,<4.0.0)
26
26
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
27
27
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
28
  Requires-Dist: dateparser (>=1.2.0,<2.0.0)
@@ -31,7 +31,7 @@ Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
31
  Requires-Dist: puremagic (>=1.26,<2.0)
32
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
33
33
  Requires-Dist: pytz (>=2024.1,<2025.0) ; python_version < "3.9"
34
- Requires-Dist: setuptools (>=71.1.0,<72.0.0)
34
+ Requires-Dist: setuptools (>=72.1.0,<73.0.0)
35
35
  Requires-Dist: tzdata (>=2024.1,<2025.0)
36
36
  Requires-Dist: w3lib (>=2.2.1,<3.0.0)
37
37
  Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
@@ -0,0 +1,9 @@
1
+ playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
+ playwrightcapture/capture.py,sha256=AayQPmsjzcN2TIBi4uuwDmV0kqIVLDdAfEaNPUg8TXY,72606
3
+ playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
+ playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
+ playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ playwrightcapture-1.25.10.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
+ playwrightcapture-1.25.10.dist-info/METADATA,sha256=3nVVIzM7kcR62_XYA1mRUxfoflPZwfLS5B8KDPy3Hks,3172
8
+ playwrightcapture-1.25.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
+ playwrightcapture-1.25.10.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=uS8e87-7jl8F7TgfzhKhlV4pGf8n6twu9rVzzlqIhXM,71671
3
- playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
- playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
- playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- playwrightcapture-1.25.9.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
- playwrightcapture-1.25.9.dist-info/METADATA,sha256=7ds0ymzTNfkYLajoJTc-t1G4wOhPHRbxmg6CCZkMtUE,3173
8
- playwrightcapture-1.25.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
- playwrightcapture-1.25.9.dist-info/RECORD,,