cert-host-scraper 0.9.3__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cert-host-scraper might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cert-host-scraper
3
- Version: 0.9.3
3
+ Version: 0.10.0
4
4
  Summary:
5
5
  Home-page: https://github.com/inverse/cert-host-scraper
6
6
  License: MIT
@@ -12,7 +12,8 @@ Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
- Requires-Dist: click (>=8.0.3,<9.0.0)
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: click (>=8.1.8,<9.0.0)
16
17
  Requires-Dist: requests (>=2.27.1,<3.0.0)
17
18
  Requires-Dist: rich (>=11,<14)
18
19
  Requires-Dist: single-source (>=0.4.0,<0.5.0)
@@ -21,7 +22,7 @@ Description-Content-Type: text/markdown
21
22
 
22
23
  # Cert Host Scraper
23
24
 
24
- ![CI](https://github.com/inverse/cert-host-scraper/workflows/CI/badge.svg)
25
+ [![CI](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml/badge.svg)](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml)
25
26
  [![PyPI version](https://badge.fury.io/py/cert-host-scraper.svg)](https://badge.fury.io/py/cert-host-scraper)
26
27
  ![PyPI downloads](https://img.shields.io/pypi/dm/cert-host-scraper?label=pypi%20downloads)
27
28
  [![License](https://img.shields.io/github/license/inverse/cert-host-scraper.svg)](LICENSE)
@@ -1,6 +1,6 @@
1
1
  # Cert Host Scraper
2
2
 
3
- ![CI](https://github.com/inverse/cert-host-scraper/workflows/CI/badge.svg)
3
+ [![CI](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml/badge.svg)](https://github.com/inverse/cert-host-scraper/actions/workflows/ci.yml)
4
4
  [![PyPI version](https://badge.fury.io/py/cert-host-scraper.svg)](https://badge.fury.io/py/cert-host-scraper)
5
5
  ![PyPI downloads](https://img.shields.io/pypi/dm/cert-host-scraper?label=pypi%20downloads)
6
6
  [![License](https://img.shields.io/github/license/inverse/cert-host-scraper.svg)](LICENSE)
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import json
2
3
  import logging
3
4
  import sys
4
5
 
@@ -31,6 +32,15 @@ def validate_status_code(
31
32
  return NO_STATUS_CODE_FILTER
32
33
 
33
34
 
35
+ class Output:
36
+ TABLE = "table"
37
+ JSON = "json"
38
+
39
+ @classmethod
40
+ def values(cls) -> list:
41
+ return [cls.TABLE, cls.JSON]
42
+
43
+
34
44
  @click.group()
35
45
  @click.option("--debug", is_flag=True, help="Whether to enable debug level output")
36
46
  @click.version_option(__version__, message="%(version)s")
@@ -61,6 +71,9 @@ def cli(debug: bool):
61
71
  help="Number of URLs to process at once",
62
72
  default=20,
63
73
  )
74
+ @click.option(
75
+ "--output", type=click.Choice(Output.values()), required=True, default="table"
76
+ )
64
77
  def search(
65
78
  search: str,
66
79
  status_code: int,
@@ -68,6 +81,7 @@ def search(
68
81
  clean: bool,
69
82
  strip: bool,
70
83
  batch_size: int,
84
+ output: str,
71
85
  ):
72
86
  """
73
87
  Search the certificate transparency log.
@@ -75,7 +89,10 @@ def search(
75
89
  if strip:
76
90
  search = strip_url(search)
77
91
 
78
- click.echo(f"Searching for {search}")
92
+ display_json = output == Output.JSON
93
+
94
+ if not display_json:
95
+ click.echo(f"Searching for {search}")
79
96
  options = Options(timeout, clean)
80
97
  results = []
81
98
  try:
@@ -84,11 +101,12 @@ def search(
84
101
  click.echo(f"Failed to search for results: {e}")
85
102
  sys.exit(1)
86
103
 
87
- click.echo(f"Found {len(urls)} URLs for {search}")
104
+ if not display_json:
105
+ click.echo(f"Found {len(urls)} URLs for {search}")
88
106
  loop = asyncio.new_event_loop()
89
107
  asyncio.set_event_loop(loop)
90
108
  chunks = list(divide_chunks(urls, batch_size))
91
- for chunk_index in track(range(len(chunks)), "Checking URLs"):
109
+ for chunk_index in track(range(len(chunks)), "Checking URLs", disable=display_json):
92
110
  chunk_result = loop.run_until_complete(
93
111
  asyncio.gather(*[validate_url(url, options) for url in chunks[chunk_index]])
94
112
  )
@@ -100,23 +118,30 @@ def search(
100
118
  else:
101
119
  display = result.scraped
102
120
 
103
- table = Table(show_header=True, header_style="bold", box=box.MINIMAL)
104
- table.add_column("URL")
105
- table.add_column("Status Code")
106
- for url_result in display:
107
- display_code = str(url_result.status_code)
108
- if url_result.status_code == -1:
109
- display_code = "-"
110
-
111
- url = url_result.url
112
- if url_result.status_code == 200:
113
- display_code = f"[green]{display_code}[/green]"
114
- url = f"[green]{url}[/green]"
115
-
116
- table.add_row(url, display_code)
117
-
118
- console = Console()
119
- console.print(table)
121
+ if display_json:
122
+ json_output = [
123
+ {"url": url_result.url, "status_code": url_result.status_code}
124
+ for url_result in display
125
+ ]
126
+ click.echo(json.dumps(json_output, indent=2))
127
+ else:
128
+ table = Table(show_header=True, header_style="bold", box=box.MINIMAL)
129
+ table.add_column("URL")
130
+ table.add_column("Status Code")
131
+ for url_result in display:
132
+ display_code = str(url_result.status_code)
133
+ if url_result.status_code == -1:
134
+ display_code = "-"
135
+
136
+ url = url_result.url
137
+ if url_result.status_code == 200:
138
+ display_code = f"[green]{display_code}[/green]"
139
+ url = f"[green]{url}[/green]"
140
+
141
+ table.add_row(url, display_code)
142
+
143
+ console = Console()
144
+ console.print(table)
120
145
 
121
146
 
122
147
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cert-host-scraper"
3
- version = "0.9.3"
3
+ version = "0.10.0"
4
4
  description = ""
5
5
  authors = ["Malachi Soord <inverse.chi@gmail.com>"]
6
6
  license = "MIT"
@@ -16,14 +16,14 @@ cert-host-scraper = "cert_host_scraper.cli:cli"
16
16
  [tool.poetry.dependencies]
17
17
  python = "^3.10"
18
18
  requests = "^2.27.1"
19
- click = "^8.0.3"
19
+ click = "^8.1.8"
20
20
  rich = ">=11,<14"
21
21
  single-source = "^0.4.0"
22
22
 
23
23
  [tool.poetry.dev-dependencies]
24
- pytest = "^8.3.2"
24
+ pytest = "^8.3.4"
25
25
  pytest-socket = "^0.7.0"
26
- vcrpy = "^6.0.1"
26
+ vcrpy = "^7.0.0"
27
27
 
28
28
  [tool.ruff]
29
29
  lint.ignore = ["E501"]