cert-host-scraper 0.4.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cert-host-scraper might be problematic. Click here for more details.
- {cert-host-scraper-0.4.1 → cert_host_scraper-0.5.0}/LICENSE +0 -0
- {cert-host-scraper-0.4.1 → cert_host_scraper-0.5.0}/PKG-INFO +16 -5
- {cert-host-scraper-0.4.1 → cert_host_scraper-0.5.0}/README.md +13 -3
- cert_host_scraper-0.5.0/cert_host_scraper/__init__.py +5 -0
- {cert-host-scraper-0.4.1 → cert_host_scraper-0.5.0}/cert_host_scraper/cli.py +13 -6
- cert-host-scraper-0.4.1/cert_host_scraper/__init__.py → cert_host_scraper-0.5.0/cert_host_scraper/scraper.py +6 -0
- cert_host_scraper-0.5.0/cert_host_scraper/utils.py +6 -0
- {cert-host-scraper-0.4.1 → cert_host_scraper-0.5.0}/pyproject.toml +3 -3
- {cert-host-scraper-0.4.1 → cert_host_scraper-0.5.0}/setup.py +5 -5
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cert-host-scraper
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary:
|
|
5
5
|
Home-page: https://github.com/inverse/cert-host-scraper
|
|
6
6
|
License: MIT
|
|
@@ -10,11 +10,12 @@ Requires-Python: >=3.10,<4.0
|
|
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
14
|
Requires-Dist: aiohttp[speedups] (>=3.8.1,<4.0.0)
|
|
14
15
|
Requires-Dist: beautifulsoup4 (>=4.10.0,<5.0.0)
|
|
15
16
|
Requires-Dist: click (>=8.0.3,<9.0.0)
|
|
16
17
|
Requires-Dist: requests (>=2.27.1,<3.0.0)
|
|
17
|
-
Requires-Dist: rich (>=11
|
|
18
|
+
Requires-Dist: rich (>=11,<14)
|
|
18
19
|
Requires-Dist: single-source (>=0.3.0,<0.4.0)
|
|
19
20
|
Project-URL: Repository, https://github.com/inverse/cert-host-scraper
|
|
20
21
|
Description-Content-Type: text/markdown
|
|
@@ -27,12 +28,12 @@ Description-Content-Type: text/markdown
|
|
|
27
28
|
[](LICENSE)
|
|
28
29
|
[](https://github.com/psf/black)
|
|
29
30
|
|
|
30
|
-
Query the certificate transparency log
|
|
31
|
+
Query the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally filtering the results by status code.
|
|
31
32
|
|
|
32
33
|
## Usage
|
|
33
34
|
|
|
34
35
|
```bash
|
|
35
|
-
cert-host-scraper search your-domain.com [--status-code 200]
|
|
36
|
+
cert-host-scraper search your-domain.com [--status-code 200] [--clean/--no-clean]
|
|
36
37
|
```
|
|
37
38
|
|
|
38
39
|
## Installation
|
|
@@ -49,7 +50,17 @@ With pip:
|
|
|
49
50
|
pip install cert-host-scraper
|
|
50
51
|
```
|
|
51
52
|
|
|
52
|
-
##
|
|
53
|
+
## Development
|
|
54
|
+
|
|
55
|
+
Requires [poetry][0] and Python 3.10.
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
poetry install
|
|
59
|
+
poetry run python -m cert_host_scraper.cli
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## License
|
|
53
63
|
|
|
54
64
|
MIT
|
|
65
|
+
[0]: https://python-poetry.org
|
|
55
66
|
|
|
@@ -6,12 +6,12 @@
|
|
|
6
6
|
[](LICENSE)
|
|
7
7
|
[](https://github.com/psf/black)
|
|
8
8
|
|
|
9
|
-
Query the certificate transparency log
|
|
9
|
+
Query the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally filtering the results by status code.
|
|
10
10
|
|
|
11
11
|
## Usage
|
|
12
12
|
|
|
13
13
|
```bash
|
|
14
|
-
cert-host-scraper search your-domain.com [--status-code 200]
|
|
14
|
+
cert-host-scraper search your-domain.com [--status-code 200] [--clean/--no-clean]
|
|
15
15
|
```
|
|
16
16
|
|
|
17
17
|
## Installation
|
|
@@ -28,6 +28,16 @@ With pip:
|
|
|
28
28
|
pip install cert-host-scraper
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
-
##
|
|
31
|
+
## Development
|
|
32
|
+
|
|
33
|
+
Requires [poetry][0] and Python 3.10.
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
poetry install
|
|
37
|
+
poetry run python -m cert_host_scraper.cli
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## License
|
|
32
41
|
|
|
33
42
|
MIT
|
|
43
|
+
[0]: https://python-poetry.org
|
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import sys
|
|
3
|
-
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
import click
|
|
6
5
|
from requests import RequestException
|
|
7
6
|
from rich.console import Console
|
|
8
7
|
from rich.progress import track
|
|
9
8
|
from rich.table import Table
|
|
10
|
-
from single_source import get_version
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
from cert_host_scraper import
|
|
10
|
+
from cert_host_scraper import __version__
|
|
11
|
+
from cert_host_scraper.scraper import Options, Result, fetch_urls, validate_url
|
|
12
|
+
from cert_host_scraper.utils import strip_url
|
|
15
13
|
|
|
16
14
|
NO_STATUS_CODE_FILTER = 0
|
|
17
15
|
|
|
@@ -46,10 +44,19 @@ def cli(debug: bool):
|
|
|
46
44
|
@click.option(
|
|
47
45
|
"--clean/--no-clean", is_flag=True, help="Clean wildcard results", default=True
|
|
48
46
|
)
|
|
49
|
-
|
|
47
|
+
@click.option(
|
|
48
|
+
"--strip/--no-strip",
|
|
49
|
+
is_flag=True,
|
|
50
|
+
help="Remove protocol and leading www from search",
|
|
51
|
+
default=True,
|
|
52
|
+
)
|
|
53
|
+
def search(search: str, status_code: int, timeout: int, clean: bool, strip: bool):
|
|
50
54
|
"""
|
|
51
55
|
Search the certificate transparency log.
|
|
52
56
|
"""
|
|
57
|
+
if strip:
|
|
58
|
+
search = strip_url(search)
|
|
59
|
+
|
|
53
60
|
click.echo(f"Searching for {search}")
|
|
54
61
|
options = Options(timeout, clean)
|
|
55
62
|
results = []
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import re
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from typing import List
|
|
4
5
|
|
|
5
6
|
import requests
|
|
7
|
+
import urllib3
|
|
6
8
|
from bs4 import BeautifulSoup
|
|
7
9
|
|
|
8
10
|
logger = logging.getLogger(__name__)
|
|
9
11
|
|
|
10
12
|
|
|
13
|
+
urllib3.disable_warnings()
|
|
14
|
+
|
|
15
|
+
|
|
11
16
|
@dataclass
|
|
12
17
|
class Options:
|
|
13
18
|
timeout: int
|
|
@@ -37,6 +42,7 @@ def fetch_site_information(url: str, timeout: int) -> int:
|
|
|
37
42
|
|
|
38
43
|
|
|
39
44
|
def fetch_site(search: str) -> str:
|
|
45
|
+
|
|
40
46
|
url = f"https://crt.sh/?q={search}"
|
|
41
47
|
result = requests.get(url)
|
|
42
48
|
result.raise_for_status()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cert-host-scraper"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = ""
|
|
5
5
|
authors = ["Malachi Soord <inverse.chi@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -19,11 +19,11 @@ requests = "^2.27.1"
|
|
|
19
19
|
beautifulsoup4 = "^4.10.0"
|
|
20
20
|
click = "^8.0.3"
|
|
21
21
|
aiohttp = {extras = ["speedups"], version = "^3.8.1"}
|
|
22
|
-
rich = "
|
|
22
|
+
rich = ">=11,<14"
|
|
23
23
|
single-source = "^0.3.0"
|
|
24
24
|
|
|
25
25
|
[tool.poetry.dev-dependencies]
|
|
26
|
-
pytest = "^7.
|
|
26
|
+
pytest = "^7.2.0"
|
|
27
27
|
pytest-socket = "^0.5.1"
|
|
28
28
|
|
|
29
29
|
[build-system]
|
|
@@ -12,7 +12,7 @@ install_requires = \
|
|
|
12
12
|
'beautifulsoup4>=4.10.0,<5.0.0',
|
|
13
13
|
'click>=8.0.3,<9.0.0',
|
|
14
14
|
'requests>=2.27.1,<3.0.0',
|
|
15
|
-
'rich>=11
|
|
15
|
+
'rich>=11,<14',
|
|
16
16
|
'single-source>=0.3.0,<0.4.0']
|
|
17
17
|
|
|
18
18
|
entry_points = \
|
|
@@ -20,13 +20,13 @@ entry_points = \
|
|
|
20
20
|
|
|
21
21
|
setup_kwargs = {
|
|
22
22
|
'name': 'cert-host-scraper',
|
|
23
|
-
'version': '0.
|
|
23
|
+
'version': '0.5.0',
|
|
24
24
|
'description': '',
|
|
25
|
-
'long_description': '# Cert Host Scraper\n\n\n[](https://badge.fury.io/py/cert-host-scraper)\n\n[](LICENSE)\n[](https://github.com/psf/black)\n\nQuery the certificate transparency log
|
|
25
|
+
'long_description': '# Cert Host Scraper\n\n\n[](https://badge.fury.io/py/cert-host-scraper)\n\n[](LICENSE)\n[](https://github.com/psf/black)\n\nQuery the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally filtering the results by status code.\n\n## Usage\n\n```bash\ncert-host-scraper search your-domain.com [--status-code 200] [--clean/--no-clean]\n```\n\n## Installation\n\nWith pipx:\n\n```bash\npipx install cert-host-scraper\n```\n\nWith pip:\n\n```bash\npip install cert-host-scraper\n```\n\n## Development\n\nRequires [poetry][0] and Python 3.10.\n\n```\npoetry install\npoetry run python -m cert_host_scraper.cli\n```\n\n## License\n\nMIT\n[0]: https://python-poetry.org\n',
|
|
26
26
|
'author': 'Malachi Soord',
|
|
27
27
|
'author_email': 'inverse.chi@gmail.com',
|
|
28
|
-
'maintainer': None,
|
|
29
|
-
'maintainer_email': None,
|
|
28
|
+
'maintainer': 'None',
|
|
29
|
+
'maintainer_email': 'None',
|
|
30
30
|
'url': 'https://github.com/inverse/cert-host-scraper',
|
|
31
31
|
'packages': packages,
|
|
32
32
|
'package_data': package_data,
|