cert-host-scraper 0.3.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cert-host-scraper might be problematic. Click here for more details.
- {cert-host-scraper-0.3.1 → cert_host_scraper-0.5.0}/LICENSE +0 -0
- {cert-host-scraper-0.3.1 → cert_host_scraper-0.5.0}/PKG-INFO +17 -5
- {cert-host-scraper-0.3.1 → cert_host_scraper-0.5.0}/README.md +13 -3
- cert_host_scraper-0.5.0/cert_host_scraper/__init__.py +5 -0
- {cert-host-scraper-0.3.1 → cert_host_scraper-0.5.0}/cert_host_scraper/cli.py +21 -5
- cert-host-scraper-0.3.1/cert_host_scraper/__init__.py → cert_host_scraper-0.5.0/cert_host_scraper/scraper.py +6 -0
- cert_host_scraper-0.5.0/cert_host_scraper/utils.py +6 -0
- {cert-host-scraper-0.3.1 → cert_host_scraper-0.5.0}/pyproject.toml +4 -3
- {cert-host-scraper-0.3.1 → cert_host_scraper-0.5.0}/setup.py +6 -5
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cert-host-scraper
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary:
|
|
5
5
|
Home-page: https://github.com/inverse/cert-host-scraper
|
|
6
6
|
License: MIT
|
|
@@ -10,11 +10,13 @@ Requires-Python: >=3.10,<4.0
|
|
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
14
|
Requires-Dist: aiohttp[speedups] (>=3.8.1,<4.0.0)
|
|
14
15
|
Requires-Dist: beautifulsoup4 (>=4.10.0,<5.0.0)
|
|
15
16
|
Requires-Dist: click (>=8.0.3,<9.0.0)
|
|
16
17
|
Requires-Dist: requests (>=2.27.1,<3.0.0)
|
|
17
|
-
Requires-Dist: rich (>=11
|
|
18
|
+
Requires-Dist: rich (>=11,<14)
|
|
19
|
+
Requires-Dist: single-source (>=0.3.0,<0.4.0)
|
|
18
20
|
Project-URL: Repository, https://github.com/inverse/cert-host-scraper
|
|
19
21
|
Description-Content-Type: text/markdown
|
|
20
22
|
|
|
@@ -26,12 +28,12 @@ Description-Content-Type: text/markdown
|
|
|
26
28
|
[](LICENSE)
|
|
27
29
|
[](https://github.com/psf/black)
|
|
28
30
|
|
|
29
|
-
Query the certificate transparency log
|
|
31
|
+
Query the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally filtering the results by status code.
|
|
30
32
|
|
|
31
33
|
## Usage
|
|
32
34
|
|
|
33
35
|
```bash
|
|
34
|
-
cert-host-scraper search your-domain.com [--status-code 200]
|
|
36
|
+
cert-host-scraper search your-domain.com [--status-code 200] [--clean/--no-clean]
|
|
35
37
|
```
|
|
36
38
|
|
|
37
39
|
## Installation
|
|
@@ -48,7 +50,17 @@ With pip:
|
|
|
48
50
|
pip install cert-host-scraper
|
|
49
51
|
```
|
|
50
52
|
|
|
51
|
-
##
|
|
53
|
+
## Development
|
|
54
|
+
|
|
55
|
+
Requires [poetry][0] and Python 3.10.
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
poetry install
|
|
59
|
+
poetry run python -m cert_host_scraper.cli
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## License
|
|
52
63
|
|
|
53
64
|
MIT
|
|
65
|
+
[0]: https://python-poetry.org
|
|
54
66
|
|
|
@@ -6,12 +6,12 @@
|
|
|
6
6
|
[](LICENSE)
|
|
7
7
|
[](https://github.com/psf/black)
|
|
8
8
|
|
|
9
|
-
Query the certificate transparency log
|
|
9
|
+
Query the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally filtering the results by status code.
|
|
10
10
|
|
|
11
11
|
## Usage
|
|
12
12
|
|
|
13
13
|
```bash
|
|
14
|
-
cert-host-scraper search your-domain.com [--status-code 200]
|
|
14
|
+
cert-host-scraper search your-domain.com [--status-code 200] [--clean/--no-clean]
|
|
15
15
|
```
|
|
16
16
|
|
|
17
17
|
## Installation
|
|
@@ -28,6 +28,16 @@ With pip:
|
|
|
28
28
|
pip install cert-host-scraper
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
-
##
|
|
31
|
+
## Development
|
|
32
|
+
|
|
33
|
+
Requires [poetry][0] and Python 3.10.
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
poetry install
|
|
37
|
+
poetry run python -m cert_host_scraper.cli
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## License
|
|
32
41
|
|
|
33
42
|
MIT
|
|
43
|
+
[0]: https://python-poetry.org
|
|
@@ -7,20 +7,27 @@ from rich.console import Console
|
|
|
7
7
|
from rich.progress import track
|
|
8
8
|
from rich.table import Table
|
|
9
9
|
|
|
10
|
-
from cert_host_scraper import
|
|
10
|
+
from cert_host_scraper import __version__
|
|
11
|
+
from cert_host_scraper.scraper import Options, Result, fetch_urls, validate_url
|
|
12
|
+
from cert_host_scraper.utils import strip_url
|
|
13
|
+
|
|
14
|
+
NO_STATUS_CODE_FILTER = 0
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
def validate_status_code(
|
|
14
18
|
_ctx: click.core.Context, _param: click.core.Option, value: str
|
|
15
19
|
):
|
|
16
20
|
try:
|
|
17
|
-
int(value)
|
|
21
|
+
return int(value)
|
|
18
22
|
except ValueError:
|
|
19
23
|
raise click.BadParameter("must be an integer")
|
|
24
|
+
except TypeError:
|
|
25
|
+
return NO_STATUS_CODE_FILTER
|
|
20
26
|
|
|
21
27
|
|
|
22
28
|
@click.group()
|
|
23
29
|
@click.option("--debug", is_flag=True, help="Whether to enable debug level output")
|
|
30
|
+
@click.version_option(__version__, message="%(version)s")
|
|
24
31
|
def cli(debug: bool):
|
|
25
32
|
log_level = logging.DEBUG if debug else logging.INFO
|
|
26
33
|
logging.basicConfig(level=log_level)
|
|
@@ -37,10 +44,19 @@ def cli(debug: bool):
|
|
|
37
44
|
@click.option(
|
|
38
45
|
"--clean/--no-clean", is_flag=True, help="Clean wildcard results", default=True
|
|
39
46
|
)
|
|
40
|
-
|
|
47
|
+
@click.option(
|
|
48
|
+
"--strip/--no-strip",
|
|
49
|
+
is_flag=True,
|
|
50
|
+
help="Remove protocol and leading www from search",
|
|
51
|
+
default=True,
|
|
52
|
+
)
|
|
53
|
+
def search(search: str, status_code: int, timeout: int, clean: bool, strip: bool):
|
|
41
54
|
"""
|
|
42
55
|
Search the certificate transparency log.
|
|
43
56
|
"""
|
|
57
|
+
if strip:
|
|
58
|
+
search = strip_url(search)
|
|
59
|
+
|
|
44
60
|
click.echo(f"Searching for {search}")
|
|
45
61
|
options = Options(timeout, clean)
|
|
46
62
|
results = []
|
|
@@ -54,8 +70,8 @@ def search(search: str, status_code: int, timeout: int, clean: bool):
|
|
|
54
70
|
sys.exit(1)
|
|
55
71
|
|
|
56
72
|
result = Result(results)
|
|
57
|
-
if status_code:
|
|
58
|
-
display = result.filter_by_status_code(
|
|
73
|
+
if status_code != NO_STATUS_CODE_FILTER:
|
|
74
|
+
display = result.filter_by_status_code(status_code)
|
|
59
75
|
else:
|
|
60
76
|
display = result.scraped
|
|
61
77
|
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import re
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from typing import List
|
|
4
5
|
|
|
5
6
|
import requests
|
|
7
|
+
import urllib3
|
|
6
8
|
from bs4 import BeautifulSoup
|
|
7
9
|
|
|
8
10
|
logger = logging.getLogger(__name__)
|
|
9
11
|
|
|
10
12
|
|
|
13
|
+
urllib3.disable_warnings()
|
|
14
|
+
|
|
15
|
+
|
|
11
16
|
@dataclass
|
|
12
17
|
class Options:
|
|
13
18
|
timeout: int
|
|
@@ -37,6 +42,7 @@ def fetch_site_information(url: str, timeout: int) -> int:
|
|
|
37
42
|
|
|
38
43
|
|
|
39
44
|
def fetch_site(search: str) -> str:
|
|
45
|
+
|
|
40
46
|
url = f"https://crt.sh/?q={search}"
|
|
41
47
|
result = requests.get(url)
|
|
42
48
|
result.raise_for_status()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cert-host-scraper"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = ""
|
|
5
5
|
authors = ["Malachi Soord <inverse.chi@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -19,10 +19,11 @@ requests = "^2.27.1"
|
|
|
19
19
|
beautifulsoup4 = "^4.10.0"
|
|
20
20
|
click = "^8.0.3"
|
|
21
21
|
aiohttp = {extras = ["speedups"], version = "^3.8.1"}
|
|
22
|
-
rich = "
|
|
22
|
+
rich = ">=11,<14"
|
|
23
|
+
single-source = "^0.3.0"
|
|
23
24
|
|
|
24
25
|
[tool.poetry.dev-dependencies]
|
|
25
|
-
pytest = "^7.
|
|
26
|
+
pytest = "^7.2.0"
|
|
26
27
|
pytest-socket = "^0.5.1"
|
|
27
28
|
|
|
28
29
|
[build-system]
|
|
@@ -12,20 +12,21 @@ install_requires = \
|
|
|
12
12
|
'beautifulsoup4>=4.10.0,<5.0.0',
|
|
13
13
|
'click>=8.0.3,<9.0.0',
|
|
14
14
|
'requests>=2.27.1,<3.0.0',
|
|
15
|
-
'rich>=11
|
|
15
|
+
'rich>=11,<14',
|
|
16
|
+
'single-source>=0.3.0,<0.4.0']
|
|
16
17
|
|
|
17
18
|
entry_points = \
|
|
18
19
|
{'console_scripts': ['cert-host-scraper = cert_host_scraper.cli:cli']}
|
|
19
20
|
|
|
20
21
|
setup_kwargs = {
|
|
21
22
|
'name': 'cert-host-scraper',
|
|
22
|
-
'version': '0.
|
|
23
|
+
'version': '0.5.0',
|
|
23
24
|
'description': '',
|
|
24
|
-
'long_description': '# Cert Host Scraper\n\n\n[](https://badge.fury.io/py/cert-host-scraper)\n\n[](LICENSE)\n[](https://github.com/psf/black)\n\nQuery the certificate transparency log
|
|
25
|
+
'long_description': '# Cert Host Scraper\n\n\n[](https://badge.fury.io/py/cert-host-scraper)\n\n[](LICENSE)\n[](https://github.com/psf/black)\n\nQuery the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally filtering the results by status code.\n\n## Usage\n\n```bash\ncert-host-scraper search your-domain.com [--status-code 200] [--clean/--no-clean]\n```\n\n## Installation\n\nWith pipx:\n\n```bash\npipx install cert-host-scraper\n```\n\nWith pip:\n\n```bash\npip install cert-host-scraper\n```\n\n## Development\n\nRequires [poetry][0] and Python 3.10.\n\n```\npoetry install\npoetry run python -m cert_host_scraper.cli\n```\n\n## License\n\nMIT\n[0]: https://python-poetry.org\n',
|
|
25
26
|
'author': 'Malachi Soord',
|
|
26
27
|
'author_email': 'inverse.chi@gmail.com',
|
|
27
|
-
'maintainer': None,
|
|
28
|
-
'maintainer_email': None,
|
|
28
|
+
'maintainer': 'None',
|
|
29
|
+
'maintainer_email': 'None',
|
|
29
30
|
'url': 'https://github.com/inverse/cert-host-scraper',
|
|
30
31
|
'packages': packages,
|
|
31
32
|
'package_data': package_data,
|