cert-host-scraper 0.9.3__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cert-host-scraper might be problematic. Click here for more details.
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/PKG-INFO +4 -3
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/README.md +1 -1
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/cert_host_scraper/cli.py +45 -20
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/pyproject.toml +4 -4
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/LICENSE +0 -0
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/cert_host_scraper/__init__.py +0 -0
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/cert_host_scraper/scraper.py +0 -0
- {cert_host_scraper-0.9.3 → cert_host_scraper-0.10.0}/cert_host_scraper/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cert-host-scraper
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0
|
|
4
4
|
Summary:
|
|
5
5
|
Home-page: https://github.com/inverse/cert-host-scraper
|
|
6
6
|
License: MIT
|
|
@@ -12,7 +12,8 @@ Classifier: Programming Language :: Python :: 3
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: click (>=8.1.8,<9.0.0)
|
|
16
17
|
Requires-Dist: requests (>=2.27.1,<3.0.0)
|
|
17
18
|
Requires-Dist: rich (>=11,<14)
|
|
18
19
|
Requires-Dist: single-source (>=0.4.0,<0.5.0)
|
|
@@ -21,7 +22,7 @@ Description-Content-Type: text/markdown
|
|
|
21
22
|
|
|
22
23
|
# Cert Host Scraper
|
|
23
24
|
|
|
24
|
-
](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml)
|
|
25
26
|
[](https://badge.fury.io/py/cert-host-scraper)
|
|
26
27
|

|
|
27
28
|
[](LICENSE)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Cert Host Scraper
|
|
2
2
|
|
|
3
|
-
](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml)
|
|
4
4
|
[](https://badge.fury.io/py/cert-host-scraper)
|
|
5
5
|

|
|
6
6
|
[](LICENSE)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
2
3
|
import logging
|
|
3
4
|
import sys
|
|
4
5
|
|
|
@@ -31,6 +32,15 @@ def validate_status_code(
|
|
|
31
32
|
return NO_STATUS_CODE_FILTER
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
class Output:
|
|
36
|
+
TABLE = "table"
|
|
37
|
+
JSON = "json"
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def values(cls) -> list:
|
|
41
|
+
return [cls.TABLE, cls.JSON]
|
|
42
|
+
|
|
43
|
+
|
|
34
44
|
@click.group()
|
|
35
45
|
@click.option("--debug", is_flag=True, help="Whether to enable debug level output")
|
|
36
46
|
@click.version_option(__version__, message="%(version)s")
|
|
@@ -61,6 +71,9 @@ def cli(debug: bool):
|
|
|
61
71
|
help="Number of URLs to process at once",
|
|
62
72
|
default=20,
|
|
63
73
|
)
|
|
74
|
+
@click.option(
|
|
75
|
+
"--output", type=click.Choice(Output.values()), required=True, default="table"
|
|
76
|
+
)
|
|
64
77
|
def search(
|
|
65
78
|
search: str,
|
|
66
79
|
status_code: int,
|
|
@@ -68,6 +81,7 @@ def search(
|
|
|
68
81
|
clean: bool,
|
|
69
82
|
strip: bool,
|
|
70
83
|
batch_size: int,
|
|
84
|
+
output: str,
|
|
71
85
|
):
|
|
72
86
|
"""
|
|
73
87
|
Search the certificate transparency log.
|
|
@@ -75,7 +89,10 @@ def search(
|
|
|
75
89
|
if strip:
|
|
76
90
|
search = strip_url(search)
|
|
77
91
|
|
|
78
|
-
|
|
92
|
+
display_json = output == Output.JSON
|
|
93
|
+
|
|
94
|
+
if not display_json:
|
|
95
|
+
click.echo(f"Searching for {search}")
|
|
79
96
|
options = Options(timeout, clean)
|
|
80
97
|
results = []
|
|
81
98
|
try:
|
|
@@ -84,11 +101,12 @@ def search(
|
|
|
84
101
|
click.echo(f"Failed to search for results: {e}")
|
|
85
102
|
sys.exit(1)
|
|
86
103
|
|
|
87
|
-
|
|
104
|
+
if not display_json:
|
|
105
|
+
click.echo(f"Found {len(urls)} URLs for {search}")
|
|
88
106
|
loop = asyncio.new_event_loop()
|
|
89
107
|
asyncio.set_event_loop(loop)
|
|
90
108
|
chunks = list(divide_chunks(urls, batch_size))
|
|
91
|
-
for chunk_index in track(range(len(chunks)), "Checking URLs"):
|
|
109
|
+
for chunk_index in track(range(len(chunks)), "Checking URLs", disable=display_json):
|
|
92
110
|
chunk_result = loop.run_until_complete(
|
|
93
111
|
asyncio.gather(*[validate_url(url, options) for url in chunks[chunk_index]])
|
|
94
112
|
)
|
|
@@ -100,23 +118,30 @@ def search(
|
|
|
100
118
|
else:
|
|
101
119
|
display = result.scraped
|
|
102
120
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
121
|
+
if display_json:
|
|
122
|
+
json_output = [
|
|
123
|
+
{"url": url_result.url, "status_code": url_result.status_code}
|
|
124
|
+
for url_result in display
|
|
125
|
+
]
|
|
126
|
+
click.echo(json.dumps(json_output, indent=2))
|
|
127
|
+
else:
|
|
128
|
+
table = Table(show_header=True, header_style="bold", box=box.MINIMAL)
|
|
129
|
+
table.add_column("URL")
|
|
130
|
+
table.add_column("Status Code")
|
|
131
|
+
for url_result in display:
|
|
132
|
+
display_code = str(url_result.status_code)
|
|
133
|
+
if url_result.status_code == -1:
|
|
134
|
+
display_code = "-"
|
|
135
|
+
|
|
136
|
+
url = url_result.url
|
|
137
|
+
if url_result.status_code == 200:
|
|
138
|
+
display_code = f"[green]{display_code}[/green]"
|
|
139
|
+
url = f"[green]{url}[/green]"
|
|
140
|
+
|
|
141
|
+
table.add_row(url, display_code)
|
|
142
|
+
|
|
143
|
+
console = Console()
|
|
144
|
+
console.print(table)
|
|
120
145
|
|
|
121
146
|
|
|
122
147
|
if __name__ == "__main__":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cert-host-scraper"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.10.0"
|
|
4
4
|
description = ""
|
|
5
5
|
authors = ["Malachi Soord <inverse.chi@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -16,14 +16,14 @@ cert-host-scraper = "cert_host_scraper.cli:cli"
|
|
|
16
16
|
[tool.poetry.dependencies]
|
|
17
17
|
python = "^3.10"
|
|
18
18
|
requests = "^2.27.1"
|
|
19
|
-
click = "^8.
|
|
19
|
+
click = "^8.1.8"
|
|
20
20
|
rich = ">=11,<14"
|
|
21
21
|
single-source = "^0.4.0"
|
|
22
22
|
|
|
23
23
|
[tool.poetry.dev-dependencies]
|
|
24
|
-
pytest = "^8.3.
|
|
24
|
+
pytest = "^8.3.4"
|
|
25
25
|
pytest-socket = "^0.7.0"
|
|
26
|
-
vcrpy = "^
|
|
26
|
+
vcrpy = "^7.0.0"
|
|
27
27
|
|
|
28
28
|
[tool.ruff]
|
|
29
29
|
lint.ignore = ["E501"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|