PlaywrightCapture 1.27.4__py3-none-any.whl → 1.27.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,21 +18,20 @@ from io import BytesIO
18
18
  from logging import LoggerAdapter, Logger
19
19
  from tempfile import NamedTemporaryFile
20
20
  from typing import Any, TypedDict, Literal, TYPE_CHECKING
21
- from collections.abc import MutableMapping, Generator
21
+ from collections.abc import MutableMapping, Iterator
22
22
  from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
23
23
  from zipfile import ZipFile
24
24
 
25
25
  import aiohttp
26
26
  import dateparser
27
- import urllib3
28
27
 
29
- from aiohttp_socks import ProxyConnector # type: ignore[import-untyped]
28
+ from aiohttp_socks import ProxyConnector
30
29
  from bs4 import BeautifulSoup
31
30
  from charset_normalizer import from_bytes
32
31
  from playwright._impl._errors import TargetClosedError
33
32
  from playwright.async_api import async_playwright, Frame, Error, Page, Download, Request
34
33
  from playwright.async_api import TimeoutError as PlaywrightTimeoutError
35
- from playwright_stealth import stealth_async, StealthConfig # type: ignore[import-untyped]
34
+ from playwright_stealth import stealth_async, StealthConfig # type: ignore[attr-defined]
36
35
  from puremagic import PureError, from_string
37
36
  from w3lib.html import strip_html5_whitespace
38
37
  from w3lib.url import canonicalize_url, safe_url_string
@@ -55,15 +54,12 @@ if TYPE_CHECKING:
55
54
  BROWSER = Literal['chromium', 'firefox', 'webkit']
56
55
 
57
56
  try:
58
- import pydub # type: ignore[import-untyped]
59
- from speech_recognition import Recognizer, AudioFile # type: ignore[import-untyped]
57
+ from pydub import AudioSegment # type: ignore[attr-defined]
58
+ from speech_recognition import Recognizer, AudioFile
60
59
  CAN_SOLVE_CAPTCHA = True
61
60
  except ImportError:
62
61
  CAN_SOLVE_CAPTCHA = False
63
62
 
64
- # Do not show TLS warnings from urllib3 when fetching a favicon
65
- urllib3.disable_warnings()
66
-
67
63
 
68
64
  class CaptureResponse(TypedDict, total=False):
69
65
 
@@ -99,10 +95,10 @@ class PlaywrightCaptureLogAdapter(LoggerAdapter): # type: ignore[type-arg]
99
95
  # https://fingerprintjs.github.io/BotD/main/
100
96
 
101
97
  @dataclass
102
- class PCStealthConfig(StealthConfig): # type: ignore[misc]
98
+ class PCStealthConfig(StealthConfig):
103
99
 
104
100
  @property
105
- def enabled_scripts(self) -> Generator[str]:
101
+ def enabled_scripts(self) -> Iterator[str]:
106
102
  self.chrome_app = True
107
103
  self.chrome_csi = True
108
104
  self.chrome_runtime = True
@@ -227,6 +223,7 @@ class Capture():
227
223
 
228
224
  self.browser = await self.playwright[self.browser_name].launch(
229
225
  proxy=self.proxy if self.proxy else None,
226
+ channel="chromium" if self.browser_name == "chromium" else None,
230
227
  # headless=False
231
228
  )
232
229
 
@@ -698,6 +695,8 @@ class Capture():
698
695
  "Accept all",
699
696
  "Accept",
700
697
  "Agree and close",
698
+ "I agree",
699
+ "Agree",
701
700
  # Dutch
702
701
  "Accepteer",
703
702
  # Spanish
@@ -1358,12 +1357,12 @@ class Capture():
1358
1357
  mp3_content = await response.read()
1359
1358
  with NamedTemporaryFile() as mp3_file, NamedTemporaryFile() as wav_file:
1360
1359
  mp3_file.write(mp3_content)
1361
- pydub.AudioSegment.from_mp3(mp3_file.name).export(wav_file.name, format="wav")
1362
- recognizer = Recognizer()
1363
- recaptcha_audio = AudioFile(wav_file.name)
1360
+ AudioSegment.from_mp3(mp3_file.name).export(wav_file.name, format="wav") # type: ignore[no-untyped-call]
1361
+ recognizer = Recognizer() # type: ignore[no-untyped-call]
1362
+ recaptcha_audio = AudioFile(wav_file.name) # type: ignore[no-untyped-call]
1364
1363
  with recaptcha_audio as source:
1365
- audio = recognizer.record(source)
1366
- text = recognizer.recognize_google(audio)
1364
+ audio = recognizer.record(source) # type: ignore[no-untyped-call]
1365
+ text = recognizer.recognize_google(audio) # type: ignore[attr-defined]
1367
1366
  await main_frame.get_by_role("textbox", name="Enter what you hear").fill(text)
1368
1367
  await main_frame.get_by_role("button", name="Verify").click()
1369
1368
  await self._safe_wait(page, 5)
@@ -1416,6 +1415,8 @@ class Capture():
1416
1415
  'Error receiving data: Connection reset by peer',
1417
1416
  'Internal SOCKSv5 proxy server error.',
1418
1417
  'Host unreachable through SOCKSv5 server.',
1418
+ # JS stuff
1419
+ 'TurnstileError: [Cloudflare Turnstile] Error: 300030.',
1419
1420
  # The browser barfed
1420
1421
  'Target page, context or browser has been closed',
1421
1422
  ]:
@@ -1,13 +1,11 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: PlaywrightCapture
3
- Version: 1.27.4
3
+ Version: 1.27.6
4
4
  Summary: A simple library to capture websites using playwright
5
- Home-page: https://github.com/Lookyloo/PlaywrightCapture
6
5
  License: BSD-3-Clause
7
6
  Author: Raphaël Vinot
8
7
  Author-email: raphael.vinot@circl.lu
9
- Requires-Python: >=3.9,<4.0
10
- Classifier: Environment :: Console
8
+ Requires-Python: >=3.9
11
9
  Classifier: Intended Audience :: Science/Research
12
10
  Classifier: Intended Audience :: Telecommunications Industry
13
11
  Classifier: License :: OSI Approved :: BSD License
@@ -20,20 +18,19 @@ Classifier: Programming Language :: Python :: 3.13
20
18
  Classifier: Topic :: Internet
21
19
  Classifier: Topic :: Security
22
20
  Provides-Extra: recaptcha
23
- Requires-Dist: SpeechRecognition (>=3.11.0) ; extra == "recaptcha"
24
- Requires-Dist: aiohttp-socks (>=0.9,<0.10)
25
- Requires-Dist: aiohttp[speedups] (>=3.11.9,<4.0.0)
26
- Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
27
- Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3,<5.0.0)
28
- Requires-Dist: dateparser (>=1.2.0,<2.0.0)
29
- Requires-Dist: playwright (>=1.49.0,<2.0.0)
30
- Requires-Dist: playwright-stealth (>=1.0.6,<2.0.0)
31
- Requires-Dist: puremagic (>=1.28,<2.0)
32
- Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "recaptcha"
33
- Requires-Dist: setuptools (>=75.6.0,<76.0.0)
34
- Requires-Dist: tzdata (>=2024.2,<2025.0)
35
- Requires-Dist: w3lib (>=2.2.1,<3.0.0)
36
- Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
21
+ Requires-Dist: SpeechRecognition (>=3.14.1) ; extra == "recaptcha"
22
+ Requires-Dist: aiohttp-socks (>=0.10.1)
23
+ Requires-Dist: aiohttp[speedups] (>=3.11.11)
24
+ Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
25
+ Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3)
26
+ Requires-Dist: dateparser (>=1.2.0)
27
+ Requires-Dist: playwright (>=1.49.1)
28
+ Requires-Dist: playwright-stealth (>=1.0.6)
29
+ Requires-Dist: puremagic (>=1.28)
30
+ Requires-Dist: pydub (>=0.25.1) ; extra == "recaptcha"
31
+ Requires-Dist: setuptools (>=75.8.0)
32
+ Requires-Dist: tzdata (>=2025.1)
33
+ Requires-Dist: w3lib (>=2.2.1)
37
34
  Description-Content-Type: text/markdown
38
35
 
39
36
  # Playwright Capture
@@ -0,0 +1,9 @@
1
+ playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
+ playwrightcapture/capture.py,sha256=7Dzb909bGdLD5vxdx2U-nD5Pikf6gWbpJL5eIfG7734,80856
3
+ playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
+ playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
+ playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ playwrightcapture-1.27.6.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
+ playwrightcapture-1.27.6.dist-info/METADATA,sha256=uebLrhWHQoJdm3qhPPyA6juEnayipPlcG9UxwuCnhrc,2853
8
+ playwrightcapture-1.27.6.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
9
+ playwrightcapture-1.27.6.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: poetry-core 2.0.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,9 +0,0 @@
1
- playwrightcapture/__init__.py,sha256=F90Y8wYS13tDjgsfjuFrCfmzQfdnH44G-ovuilJfLEE,511
2
- playwrightcapture/capture.py,sha256=DpUPVmxpGp2QpRUGjRggBroC2m7Tv0odC5K5Gx5rvrc,80668
3
- playwrightcapture/exceptions.py,sha256=LhGJQCGHzEu7Sx2Dfl28OFeDg1OmrwufFjAWXlxQnEA,366
4
- playwrightcapture/helpers.py,sha256=SXQLEuxMs8-bcWykMiUVosHzzxBKuS-QC0gBV3OmKmo,1764
5
- playwrightcapture/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- playwrightcapture-1.27.4.dist-info/LICENSE,sha256=uwFc39fTLacBUG-XTuxX6IQKTKhg4z14gWOLt3ex4Ho,1775
7
- playwrightcapture-1.27.4.dist-info/METADATA,sha256=Lr2jNkBJFonB9EqVAZlBbcQO4xGx7lwGVvGYHq52upY,3101
8
- playwrightcapture-1.27.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
9
- playwrightcapture-1.27.4.dist-info/RECORD,,