cert-host-scraper 0.9.4__tar.gz → 0.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cert-host-scraper might be problematic. Click here for more details.

@@ -1,37 +1,35 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: cert-host-scraper
3
- Version: 0.9.4
4
- Summary:
5
- Home-page: https://github.com/inverse/cert-host-scraper
3
+ Version: 0.10.1
4
+ Summary: Query the certificate transparency log from crt.sh by a given a keyword and returns the status code of the matched results. Optionally, filtering the results by status code.
6
5
  License: MIT
7
6
  Author: Malachi Soord
8
7
  Author-email: inverse.chi@gmail.com
9
- Requires-Python: >=3.10,<4.0
8
+ Requires-Python: >=3.10,<4
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3
12
11
  Classifier: Programming Language :: Python :: 3.10
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
15
14
  Classifier: Programming Language :: Python :: 3.13
16
- Requires-Dist: click (>=8.0.3,<9.0.0)
15
+ Requires-Dist: click (>=8.1.8,<9.0.0)
17
16
  Requires-Dist: requests (>=2.27.1,<3.0.0)
18
- Requires-Dist: rich (>=11,<14)
17
+ Requires-Dist: rich (>=11,<15)
19
18
  Requires-Dist: single-source (>=0.4.0,<0.5.0)
20
- Project-URL: Repository, https://github.com/inverse/cert-host-scraper
21
19
  Description-Content-Type: text/markdown
22
20
 
23
21
  # Cert Host Scraper
24
22
 
25
- ![CI](https://github.com/inverse/cert-host-scraper/workflows/CI/badge.svg)
23
+ [![CI](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml/badge.svg)](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml)
26
24
  [![PyPI version](https://badge.fury.io/py/cert-host-scraper.svg)](https://badge.fury.io/py/cert-host-scraper)
27
25
  ![PyPI downloads](https://img.shields.io/pypi/dm/cert-host-scraper?label=pypi%20downloads)
26
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
27
+ ![Static Badge](https://img.shields.io/badge/type%20checked-mypy-039dfc)
28
28
  [![License](https://img.shields.io/github/license/inverse/cert-host-scraper.svg)](LICENSE)
29
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
30
29
 
31
30
  Query the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally, filtering the results by status code.
32
31
 
33
- <img alt="Demo of cert-host-scraper" src="https://vhs.charm.sh/vhs-3n8rmkDw9BDCmq55P8YKAy.gif" width="800" />
34
-
32
+ <img alt="Demo of cert-host-scraper" src="https://vhs.charm.sh/vhs-7fKWanXXcalG2oS28DVyZC.gif" width="800" />
35
33
 
36
34
  ## Usage
37
35
 
@@ -57,14 +55,17 @@ pip install cert-host-scraper
57
55
 
58
56
  Requires [poetry][0] and Python 3.10+.
59
57
 
60
- ```
58
+ ```bash
61
59
  poetry install
62
60
  poetry run python -m cert_host_scraper.cli
63
61
  ```
64
62
 
63
+ Python and poetry versions are managed [mise][1] as defined in the provided `.tool-versions` file.
64
+
65
65
  ## License
66
66
 
67
67
  MIT
68
68
 
69
69
  [0]: https://python-poetry.org
70
+ [1]: https://github.com/jdx/mise
70
71
 
@@ -1,15 +1,15 @@
1
1
  # Cert Host Scraper
2
2
 
3
- ![CI](https://github.com/inverse/cert-host-scraper/workflows/CI/badge.svg)
3
+ [![CI](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml/badge.svg)](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml)
4
4
  [![PyPI version](https://badge.fury.io/py/cert-host-scraper.svg)](https://badge.fury.io/py/cert-host-scraper)
5
5
  ![PyPI downloads](https://img.shields.io/pypi/dm/cert-host-scraper?label=pypi%20downloads)
6
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
7
+ ![Static Badge](https://img.shields.io/badge/type%20checked-mypy-039dfc)
6
8
  [![License](https://img.shields.io/github/license/inverse/cert-host-scraper.svg)](LICENSE)
7
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
8
9
 
9
10
  Query the certificate transparency log from [crt.sh](https://crt.sh) by a given a keyword and returns the status code of the matched results. Optionally, filtering the results by status code.
10
11
 
11
- <img alt="Demo of cert-host-scraper" src="https://vhs.charm.sh/vhs-3n8rmkDw9BDCmq55P8YKAy.gif" width="800" />
12
-
12
+ <img alt="Demo of cert-host-scraper" src="https://vhs.charm.sh/vhs-7fKWanXXcalG2oS28DVyZC.gif" width="800" />
13
13
 
14
14
  ## Usage
15
15
 
@@ -35,13 +35,16 @@ pip install cert-host-scraper
35
35
 
36
36
  Requires [poetry][0] and Python 3.10+.
37
37
 
38
- ```
38
+ ```bash
39
39
  poetry install
40
40
  poetry run python -m cert_host_scraper.cli
41
41
  ```
42
42
 
43
+ Python and poetry versions are managed [mise][1] as defined in the provided `.tool-versions` file.
44
+
43
45
  ## License
44
46
 
45
47
  MIT
46
48
 
47
49
  [0]: https://python-poetry.org
50
+ [1]: https://github.com/jdx/mise
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import json
2
3
  import logging
3
4
  import sys
4
5
 
@@ -31,6 +32,15 @@ def validate_status_code(
31
32
  return NO_STATUS_CODE_FILTER
32
33
 
33
34
 
35
+ class Output:
36
+ TABLE = "table"
37
+ JSON = "json"
38
+
39
+ @classmethod
40
+ def values(cls) -> list:
41
+ return [cls.TABLE, cls.JSON]
42
+
43
+
34
44
  @click.group()
35
45
  @click.option("--debug", is_flag=True, help="Whether to enable debug level output")
36
46
  @click.version_option(__version__, message="%(version)s")
@@ -61,6 +71,9 @@ def cli(debug: bool):
61
71
  help="Number of URLs to process at once",
62
72
  default=20,
63
73
  )
74
+ @click.option(
75
+ "--output", type=click.Choice(Output.values()), required=True, default="table"
76
+ )
64
77
  def search(
65
78
  search: str,
66
79
  status_code: int,
@@ -68,6 +81,7 @@ def search(
68
81
  clean: bool,
69
82
  strip: bool,
70
83
  batch_size: int,
84
+ output: str,
71
85
  ):
72
86
  """
73
87
  Search the certificate transparency log.
@@ -75,7 +89,10 @@ def search(
75
89
  if strip:
76
90
  search = strip_url(search)
77
91
 
78
- click.echo(f"Searching for {search}")
92
+ display_json = output == Output.JSON
93
+
94
+ if not display_json:
95
+ click.echo(f"Searching for {search}")
79
96
  options = Options(timeout, clean)
80
97
  results = []
81
98
  try:
@@ -84,11 +101,12 @@ def search(
84
101
  click.echo(f"Failed to search for results: {e}")
85
102
  sys.exit(1)
86
103
 
87
- click.echo(f"Found {len(urls)} URLs for {search}")
104
+ if not display_json:
105
+ click.echo(f"Found {len(urls)} URLs for {search}")
88
106
  loop = asyncio.new_event_loop()
89
107
  asyncio.set_event_loop(loop)
90
108
  chunks = list(divide_chunks(urls, batch_size))
91
- for chunk_index in track(range(len(chunks)), "Checking URLs"):
109
+ for chunk_index in track(range(len(chunks)), "Checking URLs", disable=display_json):
92
110
  chunk_result = loop.run_until_complete(
93
111
  asyncio.gather(*[validate_url(url, options) for url in chunks[chunk_index]])
94
112
  )
@@ -100,23 +118,30 @@ def search(
100
118
  else:
101
119
  display = result.scraped
102
120
 
103
- table = Table(show_header=True, header_style="bold", box=box.MINIMAL)
104
- table.add_column("URL")
105
- table.add_column("Status Code")
106
- for url_result in display:
107
- display_code = str(url_result.status_code)
108
- if url_result.status_code == -1:
109
- display_code = "-"
110
-
111
- url = url_result.url
112
- if url_result.status_code == 200:
113
- display_code = f"[green]{display_code}[/green]"
114
- url = f"[green]{url}[/green]"
115
-
116
- table.add_row(url, display_code)
117
-
118
- console = Console()
119
- console.print(table)
121
+ if display_json:
122
+ json_output = [
123
+ {"url": url_result.url, "status_code": url_result.status_code}
124
+ for url_result in display
125
+ ]
126
+ click.echo(json.dumps(json_output, indent=2))
127
+ else:
128
+ table = Table(show_header=True, header_style="bold", box=box.MINIMAL)
129
+ table.add_column("URL")
130
+ table.add_column("Status Code")
131
+ for url_result in display:
132
+ display_code = str(url_result.status_code)
133
+ if url_result.status_code == -1:
134
+ display_code = "-"
135
+
136
+ url = url_result.url
137
+ if url_result.status_code == 200:
138
+ display_code = f"[green]{display_code}[/green]"
139
+ url = f"[green]{url}[/green]"
140
+
141
+ table.add_row(url, display_code)
142
+
143
+ console = Console()
144
+ console.print(table)
120
145
 
121
146
 
122
147
  if __name__ == "__main__":
@@ -1,29 +1,33 @@
1
- [tool.poetry]
1
+ [project]
2
2
  name = "cert-host-scraper"
3
- version = "0.9.4"
4
- description = ""
5
- authors = ["Malachi Soord <inverse.chi@gmail.com>"]
3
+ version = "0.10.1"
4
+ description = "Query the certificate transparency log from crt.sh by a given a keyword and returns the status code of the matched results. Optionally, filtering the results by status code."
5
+ authors = [
6
+ {name = "Malachi Soord", email = "inverse.chi@gmail.com"}
7
+ ]
6
8
  license = "MIT"
7
-
8
9
  readme = "README.md"
9
10
  repository = "https://github.com/inverse/cert-host-scraper"
10
11
  homepage = "https://github.com/inverse/cert-host-scraper"
12
+ dynamic = [ "dependencies" ]
13
+ requires-python = ">=3.10,<4"
11
14
 
12
-
13
- [tool.poetry.scripts]
15
+ [project.scripts]
14
16
  cert-host-scraper = "cert_host_scraper.cli:cli"
15
17
 
16
18
  [tool.poetry.dependencies]
17
19
  python = "^3.10"
18
20
  requests = "^2.27.1"
19
- click = "^8.0.3"
20
- rich = ">=11,<14"
21
+ click = "^8.1.8"
22
+ rich = ">=11,<15"
21
23
  single-source = "^0.4.0"
22
24
 
23
- [tool.poetry.dev-dependencies]
24
- pytest = "^8.3.3"
25
+
26
+ [tool.poetry.group.dev.dependencies]
27
+ pytest = "^8.3.5"
28
+ pytest-cov = "^6.0.0"
25
29
  pytest-socket = "^0.7.0"
26
- vcrpy = "^6.0.2"
30
+ vcrpy = "^7.0.0"
27
31
 
28
32
  [tool.ruff]
29
33
  lint.ignore = ["E501"]