cert-host-scraper 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cert-host-scraper might be problematic. Click here for more details.
- cert_host_scraper/cli.py +45 -20
- {cert_host_scraper-0.9.4.dist-info → cert_host_scraper-0.10.1.dist-info}/METADATA +14 -13
- cert_host_scraper-0.10.1.dist-info/RECORD +9 -0
- {cert_host_scraper-0.9.4.dist-info → cert_host_scraper-0.10.1.dist-info}/WHEEL +1 -1
- cert_host_scraper-0.9.4.dist-info/RECORD +0 -9
- {cert_host_scraper-0.9.4.dist-info → cert_host_scraper-0.10.1.dist-info}/LICENSE +0 -0
- {cert_host_scraper-0.9.4.dist-info → cert_host_scraper-0.10.1.dist-info}/entry_points.txt +0 -0
cert_host_scraper/cli.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
2
3
|
import logging
|
|
3
4
|
import sys
|
|
4
5
|
|
|
@@ -31,6 +32,15 @@ def validate_status_code(
|
|
|
31
32
|
return NO_STATUS_CODE_FILTER
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
class Output:
|
|
36
|
+
TABLE = "table"
|
|
37
|
+
JSON = "json"
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def values(cls) -> list:
|
|
41
|
+
return [cls.TABLE, cls.JSON]
|
|
42
|
+
|
|
43
|
+
|
|
34
44
|
@click.group()
|
|
35
45
|
@click.option("--debug", is_flag=True, help="Whether to enable debug level output")
|
|
36
46
|
@click.version_option(__version__, message="%(version)s")
|
|
@@ -61,6 +71,9 @@ def cli(debug: bool):
|
|
|
61
71
|
help="Number of URLs to process at once",
|
|
62
72
|
default=20,
|
|
63
73
|
)
|
|
74
|
+
@click.option(
|
|
75
|
+
"--output", type=click.Choice(Output.values()), required=True, default="table"
|
|
76
|
+
)
|
|
64
77
|
def search(
|
|
65
78
|
search: str,
|
|
66
79
|
status_code: int,
|
|
@@ -68,6 +81,7 @@ def search(
|
|
|
68
81
|
clean: bool,
|
|
69
82
|
strip: bool,
|
|
70
83
|
batch_size: int,
|
|
84
|
+
output: str,
|
|
71
85
|
):
|
|
72
86
|
"""
|
|
73
87
|
Search the certificate transparency log.
|
|
@@ -75,7 +89,10 @@ def search(
|
|
|
75
89
|
if strip:
|
|
76
90
|
search = strip_url(search)
|
|
77
91
|
|
|
78
|
-
|
|
92
|
+
display_json = output == Output.JSON
|
|
93
|
+
|
|
94
|
+
if not display_json:
|
|
95
|
+
click.echo(f"Searching for {search}")
|
|
79
96
|
options = Options(timeout, clean)
|
|
80
97
|
results = []
|
|
81
98
|
try:
|
|
@@ -84,11 +101,12 @@ def search(
|
|
|
84
101
|
click.echo(f"Failed to search for results: {e}")
|
|
85
102
|
sys.exit(1)
|
|
86
103
|
|
|
87
|
-
|
|
104
|
+
if not display_json:
|
|
105
|
+
click.echo(f"Found {len(urls)} URLs for {search}")
|
|
88
106
|
loop = asyncio.new_event_loop()
|
|
89
107
|
asyncio.set_event_loop(loop)
|
|
90
108
|
chunks = list(divide_chunks(urls, batch_size))
|
|
91
|
-
for chunk_index in track(range(len(chunks)), "Checking URLs"):
|
|
109
|
+
for chunk_index in track(range(len(chunks)), "Checking URLs", disable=display_json):
|
|
92
110
|
chunk_result = loop.run_until_complete(
|
|
93
111
|
asyncio.gather(*[validate_url(url, options) for url in chunks[chunk_index]])
|
|
94
112
|
)
|
|
@@ -100,23 +118,30 @@ def search(
|
|
|
100
118
|
else:
|
|
101
119
|
display = result.scraped
|
|
102
120
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
121
|
+
if display_json:
|
|
122
|
+
json_output = [
|
|
123
|
+
{"url": url_result.url, "status_code": url_result.status_code}
|
|
124
|
+
for url_result in display
|
|
125
|
+
]
|
|
126
|
+
click.echo(json.dumps(json_output, indent=2))
|
|
127
|
+
else:
|
|
128
|
+
table = Table(show_header=True, header_style="bold", box=box.MINIMAL)
|
|
129
|
+
table.add_column("URL")
|
|
130
|
+
table.add_column("Status Code")
|
|
131
|
+
for url_result in display:
|
|
132
|
+
display_code = str(url_result.status_code)
|
|
133
|
+
if url_result.status_code == -1:
|
|
134
|
+
display_code = "-"
|
|
135
|
+
|
|
136
|
+
url = url_result.url
|
|
137
|
+
if url_result.status_code == 200:
|
|
138
|
+
display_code = f"[green]{display_code}[/green]"
|
|
139
|
+
url = f"[green]{url}[/green]"
|
|
140
|
+
|
|
141
|
+
table.add_row(url, display_code)
|
|
142
|
+
|
|
143
|
+
console = Console()
|
|
144
|
+
console.print(table)
|
|
120
145
|
|
|
121
146
|
|
|
122
147
|
if __name__ == "__main__":
|
|
@@ -1,37 +1,35 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: cert-host-scraper
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
5
|
-
Home-page: https://github.com/inverse/cert-host-scraper
|
|
3
|
+
Version: 0.10.1
|
|
4
|
+
Summary: Query the certificate transparency log from crt.sh by a given a keyword and returns the status code of the matched results. Optionally, filtering the results by status code.
|
|
6
5
|
License: MIT
|
|
7
6
|
Author: Malachi Soord
|
|
8
7
|
Author-email: inverse.chi@gmail.com
|
|
9
|
-
Requires-Python: >=3.10,<4
|
|
8
|
+
Requires-Python: >=3.10,<4
|
|
10
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
10
|
Classifier: Programming Language :: Python :: 3
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
-
Requires-Dist: click (>=8.
|
|
15
|
+
Requires-Dist: click (>=8.1.8,<9.0.0)
|
|
17
16
|
Requires-Dist: requests (>=2.27.1,<3.0.0)
|
|
18
|
-
Requires-Dist: rich (>=11,<
|
|
17
|
+
Requires-Dist: rich (>=11,<15)
|
|
19
18
|
Requires-Dist: single-source (>=0.4.0,<0.5.0)
|
|
20
|
-
Project-URL: Repository, https://github.com/inverse/cert-host-scraper
|
|
21
19
|
Description-Content-Type: text/markdown
|
|
22
20
|
|
|
23
21
|
# Cert Host Scraper
|
|
24
22
|
|
|
25
|
-
](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml)
|
|
26
24
|
[](https://badge.fury.io/py/cert-host-scraper)
|
|
27
25
|

|
|
26
|
+
[](https://github.com/astral-sh/ruff)
|
|
27
|
+

|
|
28
28
|
[](LICENSE)
|
|
29
|
-
[](https://github.com/psf/black)
|
|
30
29
|
|
|
31
30
|
Query the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally, filtering the results by status code.
|
|
32
31
|
|
|
33
|
-
<img alt="Demo of cert-host-scraper" src="https://vhs.charm.sh/vhs-
|
|
34
|
-
|
|
32
|
+
<img alt="Demo of cert-host-scraper" src="https://vhs.charm.sh/vhs-7fKWanXXcalG2oS28DVyZC.gif" width="800" />
|
|
35
33
|
|
|
36
34
|
## Usage
|
|
37
35
|
|
|
@@ -57,14 +55,17 @@ pip install cert-host-scraper
|
|
|
57
55
|
|
|
58
56
|
Requires [poetry][0] and Python 3.10+.
|
|
59
57
|
|
|
60
|
-
```
|
|
58
|
+
```bash
|
|
61
59
|
poetry install
|
|
62
60
|
poetry run python -m cert_host_scraper.cli
|
|
63
61
|
```
|
|
64
62
|
|
|
63
|
+
Python and poetry versions are managed [mise][1] as defined in the provided `.tool-versions` file.
|
|
64
|
+
|
|
65
65
|
## License
|
|
66
66
|
|
|
67
67
|
MIT
|
|
68
68
|
|
|
69
69
|
[0]: https://python-poetry.org
|
|
70
|
+
[1]: https://github.com/jdx/mise
|
|
70
71
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
cert_host_scraper/__init__.py,sha256=BUkbAFUgGLjxexWjDqSMZA-YAY55fxoD-lym6D6yDHw,142
|
|
2
|
+
cert_host_scraper/cli.py,sha256=lZmoCEM_R7KGEPksAdpzibXtP64IemZvV0zc63kYx48,4099
|
|
3
|
+
cert_host_scraper/scraper.py,sha256=U-9n2WU6d8Q-5wyloEbK9b9AO_EeaTfv0VQo_CXAGYA,1723
|
|
4
|
+
cert_host_scraper/utils.py,sha256=SDtmBCgcuwoX-MlhAQRL5ekSZO3iUSx7lFX0pmfWaVw,337
|
|
5
|
+
cert_host_scraper-0.10.1.dist-info/LICENSE,sha256=x2zGqcA4IWCXX5UKMH144zM_rK2NMXSXHN5Qn8cg6yY,1070
|
|
6
|
+
cert_host_scraper-0.10.1.dist-info/METADATA,sha256=qlOJmL65cg4Amsr9TNrbgL-gH43Zh4aLvevwG0mdwPs,2372
|
|
7
|
+
cert_host_scraper-0.10.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
8
|
+
cert_host_scraper-0.10.1.dist-info/entry_points.txt,sha256=GlQNSNbnSjw_MDZrOzhqcATBJ7C4otv7Adrz2yaeK0w,63
|
|
9
|
+
cert_host_scraper-0.10.1.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
cert_host_scraper/__init__.py,sha256=BUkbAFUgGLjxexWjDqSMZA-YAY55fxoD-lym6D6yDHw,142
|
|
2
|
-
cert_host_scraper/cli.py,sha256=b6A3WF6Ydfbs7GzEKvQMejqYgxx-5-bB43kWEnZO7XA,3422
|
|
3
|
-
cert_host_scraper/scraper.py,sha256=U-9n2WU6d8Q-5wyloEbK9b9AO_EeaTfv0VQo_CXAGYA,1723
|
|
4
|
-
cert_host_scraper/utils.py,sha256=SDtmBCgcuwoX-MlhAQRL5ekSZO3iUSx7lFX0pmfWaVw,337
|
|
5
|
-
cert_host_scraper-0.9.4.dist-info/LICENSE,sha256=x2zGqcA4IWCXX5UKMH144zM_rK2NMXSXHN5Qn8cg6yY,1070
|
|
6
|
-
cert_host_scraper-0.9.4.dist-info/METADATA,sha256=qyzsw2Coop7-1iT6DO0h9hRmF9IHD0evyRa_66EQ6ho,1988
|
|
7
|
-
cert_host_scraper-0.9.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
8
|
-
cert_host_scraper-0.9.4.dist-info/entry_points.txt,sha256=GlQNSNbnSjw_MDZrOzhqcATBJ7C4otv7Adrz2yaeK0w,63
|
|
9
|
-
cert_host_scraper-0.9.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|