PlaywrightCapture 1.27.4__tar.gz → 1.27.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/PKG-INFO +16 -19
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/playwrightcapture/capture.py +17 -16
- playwrightcapture-1.27.6/pyproject.toml +55 -0
- playwrightcapture-1.27.4/pyproject.toml +0 -52
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/LICENSE +0 -0
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/README.md +0 -0
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/playwrightcapture/__init__.py +0 -0
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/playwrightcapture/exceptions.py +0 -0
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/playwrightcapture/helpers.py +0 -0
- {playwrightcapture-1.27.4 → playwrightcapture-1.27.6}/playwrightcapture/py.typed +0 -0
@@ -1,13 +1,11 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: PlaywrightCapture
|
3
|
-
Version: 1.27.
|
3
|
+
Version: 1.27.6
|
4
4
|
Summary: A simple library to capture websites using playwright
|
5
|
-
Home-page: https://github.com/Lookyloo/PlaywrightCapture
|
6
5
|
License: BSD-3-Clause
|
7
6
|
Author: Raphaël Vinot
|
8
7
|
Author-email: raphael.vinot@circl.lu
|
9
|
-
Requires-Python: >=3.9
|
10
|
-
Classifier: Environment :: Console
|
8
|
+
Requires-Python: >=3.9
|
11
9
|
Classifier: Intended Audience :: Science/Research
|
12
10
|
Classifier: Intended Audience :: Telecommunications Industry
|
13
11
|
Classifier: License :: OSI Approved :: BSD License
|
@@ -20,20 +18,19 @@ Classifier: Programming Language :: Python :: 3.13
|
|
20
18
|
Classifier: Topic :: Internet
|
21
19
|
Classifier: Topic :: Security
|
22
20
|
Provides-Extra: recaptcha
|
23
|
-
Requires-Dist: SpeechRecognition (>=3.
|
24
|
-
Requires-Dist: aiohttp-socks (>=0.
|
25
|
-
Requires-Dist: aiohttp[speedups] (>=3.11.
|
26
|
-
Requires-Dist: async-timeout (>=
|
27
|
-
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3
|
28
|
-
Requires-Dist: dateparser (>=1.2.0
|
29
|
-
Requires-Dist: playwright (>=1.49.
|
30
|
-
Requires-Dist: playwright-stealth (>=1.0.6
|
31
|
-
Requires-Dist: puremagic (>=1.28
|
32
|
-
Requires-Dist: pydub (>=0.25.1
|
33
|
-
Requires-Dist: setuptools (>=75.
|
34
|
-
Requires-Dist: tzdata (>=
|
35
|
-
Requires-Dist: w3lib (>=2.2.1
|
36
|
-
Project-URL: Repository, https://github.com/Lookyloo/PlaywrightCapture
|
21
|
+
Requires-Dist: SpeechRecognition (>=3.14.1) ; extra == "recaptcha"
|
22
|
+
Requires-Dist: aiohttp-socks (>=0.10.1)
|
23
|
+
Requires-Dist: aiohttp[speedups] (>=3.11.11)
|
24
|
+
Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
|
25
|
+
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3)
|
26
|
+
Requires-Dist: dateparser (>=1.2.0)
|
27
|
+
Requires-Dist: playwright (>=1.49.1)
|
28
|
+
Requires-Dist: playwright-stealth (>=1.0.6)
|
29
|
+
Requires-Dist: puremagic (>=1.28)
|
30
|
+
Requires-Dist: pydub (>=0.25.1) ; extra == "recaptcha"
|
31
|
+
Requires-Dist: setuptools (>=75.8.0)
|
32
|
+
Requires-Dist: tzdata (>=2025.1)
|
33
|
+
Requires-Dist: w3lib (>=2.2.1)
|
37
34
|
Description-Content-Type: text/markdown
|
38
35
|
|
39
36
|
# Playwright Capture
|
@@ -18,21 +18,20 @@ from io import BytesIO
|
|
18
18
|
from logging import LoggerAdapter, Logger
|
19
19
|
from tempfile import NamedTemporaryFile
|
20
20
|
from typing import Any, TypedDict, Literal, TYPE_CHECKING
|
21
|
-
from collections.abc import MutableMapping,
|
21
|
+
from collections.abc import MutableMapping, Iterator
|
22
22
|
from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
|
23
23
|
from zipfile import ZipFile
|
24
24
|
|
25
25
|
import aiohttp
|
26
26
|
import dateparser
|
27
|
-
import urllib3
|
28
27
|
|
29
|
-
from aiohttp_socks import ProxyConnector
|
28
|
+
from aiohttp_socks import ProxyConnector
|
30
29
|
from bs4 import BeautifulSoup
|
31
30
|
from charset_normalizer import from_bytes
|
32
31
|
from playwright._impl._errors import TargetClosedError
|
33
32
|
from playwright.async_api import async_playwright, Frame, Error, Page, Download, Request
|
34
33
|
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
|
35
|
-
from playwright_stealth import stealth_async, StealthConfig # type: ignore[
|
34
|
+
from playwright_stealth import stealth_async, StealthConfig # type: ignore[attr-defined]
|
36
35
|
from puremagic import PureError, from_string
|
37
36
|
from w3lib.html import strip_html5_whitespace
|
38
37
|
from w3lib.url import canonicalize_url, safe_url_string
|
@@ -55,15 +54,12 @@ if TYPE_CHECKING:
|
|
55
54
|
BROWSER = Literal['chromium', 'firefox', 'webkit']
|
56
55
|
|
57
56
|
try:
|
58
|
-
import
|
59
|
-
from speech_recognition import Recognizer, AudioFile
|
57
|
+
from pydub import AudioSegment # type: ignore[attr-defined]
|
58
|
+
from speech_recognition import Recognizer, AudioFile
|
60
59
|
CAN_SOLVE_CAPTCHA = True
|
61
60
|
except ImportError:
|
62
61
|
CAN_SOLVE_CAPTCHA = False
|
63
62
|
|
64
|
-
# Do not show TLS warnings from urllib3 when fetching a favicon
|
65
|
-
urllib3.disable_warnings()
|
66
|
-
|
67
63
|
|
68
64
|
class CaptureResponse(TypedDict, total=False):
|
69
65
|
|
@@ -99,10 +95,10 @@ class PlaywrightCaptureLogAdapter(LoggerAdapter): # type: ignore[type-arg]
|
|
99
95
|
# https://fingerprintjs.github.io/BotD/main/
|
100
96
|
|
101
97
|
@dataclass
|
102
|
-
class PCStealthConfig(StealthConfig):
|
98
|
+
class PCStealthConfig(StealthConfig):
|
103
99
|
|
104
100
|
@property
|
105
|
-
def enabled_scripts(self) ->
|
101
|
+
def enabled_scripts(self) -> Iterator[str]:
|
106
102
|
self.chrome_app = True
|
107
103
|
self.chrome_csi = True
|
108
104
|
self.chrome_runtime = True
|
@@ -227,6 +223,7 @@ class Capture():
|
|
227
223
|
|
228
224
|
self.browser = await self.playwright[self.browser_name].launch(
|
229
225
|
proxy=self.proxy if self.proxy else None,
|
226
|
+
channel="chromium" if self.browser_name == "chromium" else None,
|
230
227
|
# headless=False
|
231
228
|
)
|
232
229
|
|
@@ -698,6 +695,8 @@ class Capture():
|
|
698
695
|
"Accept all",
|
699
696
|
"Accept",
|
700
697
|
"Agree and close",
|
698
|
+
"I agree",
|
699
|
+
"Agree",
|
701
700
|
# Dutch
|
702
701
|
"Accepteer",
|
703
702
|
# Spanish
|
@@ -1358,12 +1357,12 @@ class Capture():
|
|
1358
1357
|
mp3_content = await response.read()
|
1359
1358
|
with NamedTemporaryFile() as mp3_file, NamedTemporaryFile() as wav_file:
|
1360
1359
|
mp3_file.write(mp3_content)
|
1361
|
-
|
1362
|
-
recognizer = Recognizer()
|
1363
|
-
recaptcha_audio = AudioFile(wav_file.name)
|
1360
|
+
AudioSegment.from_mp3(mp3_file.name).export(wav_file.name, format="wav") # type: ignore[no-untyped-call]
|
1361
|
+
recognizer = Recognizer() # type: ignore[no-untyped-call]
|
1362
|
+
recaptcha_audio = AudioFile(wav_file.name) # type: ignore[no-untyped-call]
|
1364
1363
|
with recaptcha_audio as source:
|
1365
|
-
audio = recognizer.record(source)
|
1366
|
-
text = recognizer.recognize_google(audio)
|
1364
|
+
audio = recognizer.record(source) # type: ignore[no-untyped-call]
|
1365
|
+
text = recognizer.recognize_google(audio) # type: ignore[attr-defined]
|
1367
1366
|
await main_frame.get_by_role("textbox", name="Enter what you hear").fill(text)
|
1368
1367
|
await main_frame.get_by_role("button", name="Verify").click()
|
1369
1368
|
await self._safe_wait(page, 5)
|
@@ -1416,6 +1415,8 @@ class Capture():
|
|
1416
1415
|
'Error receiving data: Connection reset by peer',
|
1417
1416
|
'Internal SOCKSv5 proxy server error.',
|
1418
1417
|
'Host unreachable through SOCKSv5 server.',
|
1418
|
+
# JS stuff
|
1419
|
+
'TurnstileError: [Cloudflare Turnstile] Error: 300030.',
|
1419
1420
|
# The browser barfed
|
1420
1421
|
'Target page, context or browser has been closed',
|
1421
1422
|
]:
|
@@ -0,0 +1,55 @@
|
|
1
|
+
[project]
|
2
|
+
name = "PlaywrightCapture"
|
3
|
+
version = "1.27.6"
|
4
|
+
description = "A simple library to capture websites using playwright"
|
5
|
+
authors = [
|
6
|
+
{name="Raphaël Vinot", email= "raphael.vinot@circl.lu"}
|
7
|
+
]
|
8
|
+
license = "BSD-3-Clause"
|
9
|
+
repository = "https://github.com/Lookyloo/PlaywrightCapture"
|
10
|
+
readme = "README.md"
|
11
|
+
requires-python = ">=3.9"
|
12
|
+
|
13
|
+
dynamic = [ "classifiers" ]
|
14
|
+
|
15
|
+
dependencies = [
|
16
|
+
"playwright (>=1.49.1)",
|
17
|
+
"dateparser (>=1.2.0)",
|
18
|
+
"beautifulsoup4 [lxml,charset_normalizer] (>=4.12.3)",
|
19
|
+
"w3lib (>=2.2.1)",
|
20
|
+
"tzdata (>=2025.1)",
|
21
|
+
"playwright-stealth (>=1.0.6)",
|
22
|
+
"setuptools (>=75.8.0)",
|
23
|
+
"puremagic (>=1.28)",
|
24
|
+
"async-timeout (>=5.0.1) ; python_version < \"3.11\"",
|
25
|
+
"aiohttp [speedups] (>=3.11.11)",
|
26
|
+
"aiohttp-socks (>=0.10.1)"
|
27
|
+
]
|
28
|
+
|
29
|
+
|
30
|
+
[tool.poetry]
|
31
|
+
classifiers=[
|
32
|
+
'Intended Audience :: Science/Research',
|
33
|
+
'Intended Audience :: Telecommunications Industry',
|
34
|
+
'Topic :: Security',
|
35
|
+
'Topic :: Internet',
|
36
|
+
]
|
37
|
+
|
38
|
+
|
39
|
+
[project.optional-dependencies]
|
40
|
+
recaptcha = [
|
41
|
+
"pydub (>=0.25.1)",
|
42
|
+
"SpeechRecognition (>=3.14.1)"
|
43
|
+
]
|
44
|
+
|
45
|
+
[tool.poetry.group.dev.dependencies]
|
46
|
+
types-beautifulsoup4 = "^4.12.0.20241020"
|
47
|
+
pytest = "^8.3.4"
|
48
|
+
mypy = "^1.14.1"
|
49
|
+
types-dateparser = "^1.2.0.20240420"
|
50
|
+
types-pytz = "^2024.2.0.20241221"
|
51
|
+
|
52
|
+
|
53
|
+
[build-system]
|
54
|
+
requires = ["poetry-core>=2.0"]
|
55
|
+
build-backend = "poetry.core.masonry.api"
|
@@ -1,52 +0,0 @@
|
|
1
|
-
[tool.poetry]
|
2
|
-
name = "PlaywrightCapture"
|
3
|
-
version = "1.27.4"
|
4
|
-
description = "A simple library to capture websites using playwright"
|
5
|
-
authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
|
6
|
-
license = "BSD-3-Clause"
|
7
|
-
repository = "https://github.com/Lookyloo/PlaywrightCapture"
|
8
|
-
readme = "README.md"
|
9
|
-
|
10
|
-
classifiers=[
|
11
|
-
'License :: OSI Approved :: BSD License',
|
12
|
-
'Environment :: Console',
|
13
|
-
'Intended Audience :: Science/Research',
|
14
|
-
'Intended Audience :: Telecommunications Industry',
|
15
|
-
'Programming Language :: Python :: 3',
|
16
|
-
'Topic :: Security',
|
17
|
-
'Topic :: Internet',
|
18
|
-
]
|
19
|
-
|
20
|
-
[tool.poetry.dependencies]
|
21
|
-
python = "^3.9"
|
22
|
-
playwright = "^1.49.0"
|
23
|
-
dateparser = "^1.2.0"
|
24
|
-
beautifulsoup4 = {version= "^4.12.3", extras = ["lxml", "charset_normalizer"]}
|
25
|
-
w3lib = "^2.2.1"
|
26
|
-
pydub = {version = "^0.25.1", optional = true}
|
27
|
-
SpeechRecognition = {version = ">=3.11.0", optional = true}
|
28
|
-
tzdata = "^2024.2"
|
29
|
-
playwright-stealth = "^1.0.6"
|
30
|
-
setuptools = "^75.6.0"
|
31
|
-
puremagic = "^1.28"
|
32
|
-
async-timeout = {version = "^4.0.3", python = "<3.11"}
|
33
|
-
aiohttp = {version = "^3.11.9", extras = ["speedups"]}
|
34
|
-
aiohttp-socks = "^0.9"
|
35
|
-
|
36
|
-
[tool.poetry.extras]
|
37
|
-
recaptcha = ["pydub", "SpeechRecognition"]
|
38
|
-
|
39
|
-
[tool.poetry.group.dev]
|
40
|
-
optional = true
|
41
|
-
|
42
|
-
[tool.poetry.group.dev.dependencies]
|
43
|
-
types-beautifulsoup4 = "^4.12.0.20241020"
|
44
|
-
pytest = "^8.3.4"
|
45
|
-
mypy = "^1.13.0"
|
46
|
-
types-dateparser = "^1.2.0.20240420"
|
47
|
-
types-pytz = "^2024.2.0.20241003"
|
48
|
-
|
49
|
-
|
50
|
-
[build-system]
|
51
|
-
requires = ["poetry-core"]
|
52
|
-
build-backend = "poetry.core.masonry.api"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|