mkdocs-htmlproofer-plugin 1.2.1__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/PKG-INFO +35 -2
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/README.md +17 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/htmlproofer/plugin.py +36 -7
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/mkdocs_htmlproofer_plugin.egg-info/PKG-INFO +35 -2
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/setup.py +1 -1
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/LICENSE.md +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/htmlproofer/__init__.py +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/mkdocs_htmlproofer_plugin.egg-info/SOURCES.txt +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/mkdocs_htmlproofer_plugin.egg-info/dependency_links.txt +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/mkdocs_htmlproofer_plugin.egg-info/entry_points.txt +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/mkdocs_htmlproofer_plugin.egg-info/requires.txt +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/mkdocs_htmlproofer_plugin.egg-info/top_level.txt +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/pyproject.toml +0 -0
- {mkdocs-htmlproofer-plugin-1.2.1 → mkdocs_htmlproofer_plugin-1.4.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-htmlproofer-plugin
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: A MkDocs plugin that validates URL in rendered HTML files
|
|
5
5
|
Home-page: https://github.com/manuzhang/mkdocs-htmlproofer-plugin
|
|
6
6
|
Author: Manu Zhang
|
|
@@ -20,6 +20,22 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
20
20
|
Requires-Python: >=3.8
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE.md
|
|
23
|
+
Requires-Dist: mkdocs>=1.4.0
|
|
24
|
+
Requires-Dist: Markdown
|
|
25
|
+
Requires-Dist: requests
|
|
26
|
+
Requires-Dist: beautifulsoup4
|
|
27
|
+
Dynamic: author
|
|
28
|
+
Dynamic: author-email
|
|
29
|
+
Dynamic: classifier
|
|
30
|
+
Dynamic: description
|
|
31
|
+
Dynamic: description-content-type
|
|
32
|
+
Dynamic: home-page
|
|
33
|
+
Dynamic: keywords
|
|
34
|
+
Dynamic: license
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
Dynamic: requires-dist
|
|
37
|
+
Dynamic: requires-python
|
|
38
|
+
Dynamic: summary
|
|
23
39
|
|
|
24
40
|
# mkdocs-htmlproofer-plugin [](https://pypi.org/project/mkdocs-htmlproofer-plugin)
|
|
25
41
|
|
|
@@ -27,6 +43,8 @@ License-File: LICENSE.md
|
|
|
27
43
|
|
|
28
44
|
*A [MkDocs](https://www.mkdocs.org/) plugin that validates URLs, including anchors, in rendered html files*.
|
|
29
45
|
|
|
46
|
+
> Note: [MkDocs 1.6+ supports native validation of anchors](https://www.mkdocs.org/user-guide/configuration/#validation).
|
|
47
|
+
|
|
30
48
|
## Installation
|
|
31
49
|
|
|
32
50
|
0. Prerequisites
|
|
@@ -140,6 +158,21 @@ plugins:
|
|
|
140
158
|
warn_on_ignored_urls: true
|
|
141
159
|
```
|
|
142
160
|
|
|
161
|
+
### `ignore_pages`
|
|
162
|
+
|
|
163
|
+
Avoid validating the URLs on the given list of markdown pages by ignoring them altogether.
|
|
164
|
+
Each page in the list supports unix style wildcards `*`, `[]`, `?`, etc.
|
|
165
|
+
|
|
166
|
+
```yaml
|
|
167
|
+
plugins:
|
|
168
|
+
- search
|
|
169
|
+
- htmlproofer:
|
|
170
|
+
raise_error: True
|
|
171
|
+
ignore_pages:
|
|
172
|
+
- path/to/file
|
|
173
|
+
- path/to/folder/*
|
|
174
|
+
```
|
|
175
|
+
|
|
143
176
|
### `validate_external_urls`
|
|
144
177
|
|
|
145
178
|
Avoids validating any external URLs (i.e those starting with http:// or https://).
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
*A [MkDocs](https://www.mkdocs.org/) plugin that validates URLs, including anchors, in rendered html files*.
|
|
6
6
|
|
|
7
|
+
> Note: [MkDocs 1.6+ supports native validation of anchors](https://www.mkdocs.org/user-guide/configuration/#validation).
|
|
8
|
+
|
|
7
9
|
## Installation
|
|
8
10
|
|
|
9
11
|
0. Prerequisites
|
|
@@ -117,6 +119,21 @@ plugins:
|
|
|
117
119
|
warn_on_ignored_urls: true
|
|
118
120
|
```
|
|
119
121
|
|
|
122
|
+
### `ignore_pages`
|
|
123
|
+
|
|
124
|
+
Avoid validating the URLs on the given list of markdown pages by ignoring them altogether.
|
|
125
|
+
Each page in the list supports unix style wildcards `*`, `[]`, `?`, etc.
|
|
126
|
+
|
|
127
|
+
```yaml
|
|
128
|
+
plugins:
|
|
129
|
+
- search
|
|
130
|
+
- htmlproofer:
|
|
131
|
+
raise_error: True
|
|
132
|
+
ignore_pages:
|
|
133
|
+
- path/to/file
|
|
134
|
+
- path/to/folder/*
|
|
135
|
+
```
|
|
136
|
+
|
|
120
137
|
### `validate_external_urls`
|
|
121
138
|
|
|
122
139
|
Avoids validating any external URLs (i.e those starting with http:// or https://).
|
|
@@ -3,6 +3,7 @@ from functools import lru_cache, partial
|
|
|
3
3
|
import os.path
|
|
4
4
|
import pathlib
|
|
5
5
|
import re
|
|
6
|
+
import time
|
|
6
7
|
from typing import Dict, List, Optional, Set
|
|
7
8
|
import urllib.parse
|
|
8
9
|
import uuid
|
|
@@ -70,6 +71,8 @@ class HtmlProoferPlugin(BasePlugin):
|
|
|
70
71
|
('validate_rendered_template', config_options.Type(bool, default=False)),
|
|
71
72
|
('ignore_urls', config_options.Type(list, default=[])),
|
|
72
73
|
('warn_on_ignored_urls', config_options.Type(bool, default=False)),
|
|
74
|
+
('ignore_pages', config_options.Type(list, default=[])),
|
|
75
|
+
('retry_max_times', config_options.Type(int, default=0)),
|
|
73
76
|
)
|
|
74
77
|
|
|
75
78
|
def __init__(self):
|
|
@@ -112,21 +115,26 @@ class HtmlProoferPlugin(BasePlugin):
|
|
|
112
115
|
strainer = SoupStrainer(('a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'sup', 'img'))
|
|
113
116
|
|
|
114
117
|
content = output_content if self.config['validate_rendered_template'] else page.content
|
|
115
|
-
soup = BeautifulSoup(content, 'html.parser', parse_only=strainer)
|
|
118
|
+
soup = BeautifulSoup(str(content), 'html.parser', parse_only=strainer)
|
|
116
119
|
|
|
117
|
-
all_element_ids = set(tag['id'] for tag in soup.select('[id]'))
|
|
120
|
+
all_element_ids = set(str(tag['id']) for tag in soup.select('[id]'))
|
|
118
121
|
all_element_ids.add('') # Empty anchor is commonly used, but not real
|
|
119
122
|
|
|
120
|
-
urls = set(a['href'] for a in soup.find_all('a', href=True)) |
|
|
123
|
+
urls = (set(str(a['href']) for a in soup.find_all('a', href=True)) |
|
|
124
|
+
set(str(img['src']) for img in soup.find_all('img')))
|
|
121
125
|
|
|
122
126
|
for url in urls:
|
|
123
127
|
if any(fnmatch.fnmatch(url, ignore_url) for ignore_url in self.config['ignore_urls']):
|
|
124
128
|
if self.config['warn_on_ignored_urls']:
|
|
125
129
|
log_warning(f"ignoring URL {url} from {page.file.src_path}")
|
|
130
|
+
elif any(
|
|
131
|
+
fnmatch.fnmatch(page.file.src_path, ignore_page)
|
|
132
|
+
for ignore_page in self.config['ignore_pages']
|
|
133
|
+
):
|
|
134
|
+
if self.config['warn_on_ignored_urls']:
|
|
135
|
+
log_warning(f"ignoring URL {url} from {page.file.src_path}")
|
|
126
136
|
else:
|
|
127
|
-
|
|
128
|
-
if self.bad_url(url_status) and self.is_error(self.config, url, url_status):
|
|
129
|
-
self.report_invalid_url(url, url_status, page.file.src_path)
|
|
137
|
+
self.check_url(url, page.file.src_path, all_element_ids, opt_files)
|
|
130
138
|
|
|
131
139
|
def report_invalid_url(self, url, url_status, src_path):
|
|
132
140
|
error = f'invalid url - {url} [{url_status}] [{src_path}]'
|
|
@@ -152,7 +160,7 @@ class HtmlProoferPlugin(BasePlugin):
|
|
|
152
160
|
|
|
153
161
|
if self.config['skip_downloads'] is False:
|
|
154
162
|
# Download the entire contents as to not break previous behaviour.
|
|
155
|
-
for _ in response.iter_content(chunk_size=1024):
|
|
163
|
+
for _ in response.iter_content(chunk_size=1024 * 1024):
|
|
156
164
|
pass
|
|
157
165
|
|
|
158
166
|
return response.status_code
|
|
@@ -163,6 +171,27 @@ class HtmlProoferPlugin(BasePlugin):
|
|
|
163
171
|
except requests.exceptions.ConnectionError:
|
|
164
172
|
return -1
|
|
165
173
|
|
|
174
|
+
def check_url(
|
|
175
|
+
self,
|
|
176
|
+
url: str,
|
|
177
|
+
src_path: str,
|
|
178
|
+
all_element_ids: Set[str],
|
|
179
|
+
files: Dict[str, File],
|
|
180
|
+
) -> None:
|
|
181
|
+
retry_times = 0
|
|
182
|
+
retry_max_times = self.config['retry_max_times']
|
|
183
|
+
retry_duration = 2
|
|
184
|
+
while retry_times <= retry_max_times:
|
|
185
|
+
url_status = self.get_url_status(url, src_path, all_element_ids, files)
|
|
186
|
+
retry_times += 1
|
|
187
|
+
if self.bad_url(url_status) and self.is_error(self.config, url, url_status):
|
|
188
|
+
if retry_times > retry_max_times:
|
|
189
|
+
self.report_invalid_url(url, url_status, src_path)
|
|
190
|
+
else:
|
|
191
|
+
log_info(f"Retrying URL {url} from {src_path} after {retry_duration} seconds...")
|
|
192
|
+
time.sleep(retry_duration)
|
|
193
|
+
retry_duration *= 2
|
|
194
|
+
|
|
166
195
|
def get_url_status(
|
|
167
196
|
self,
|
|
168
197
|
url: str,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-htmlproofer-plugin
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: A MkDocs plugin that validates URL in rendered HTML files
|
|
5
5
|
Home-page: https://github.com/manuzhang/mkdocs-htmlproofer-plugin
|
|
6
6
|
Author: Manu Zhang
|
|
@@ -20,6 +20,22 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
20
20
|
Requires-Python: >=3.8
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE.md
|
|
23
|
+
Requires-Dist: mkdocs>=1.4.0
|
|
24
|
+
Requires-Dist: Markdown
|
|
25
|
+
Requires-Dist: requests
|
|
26
|
+
Requires-Dist: beautifulsoup4
|
|
27
|
+
Dynamic: author
|
|
28
|
+
Dynamic: author-email
|
|
29
|
+
Dynamic: classifier
|
|
30
|
+
Dynamic: description
|
|
31
|
+
Dynamic: description-content-type
|
|
32
|
+
Dynamic: home-page
|
|
33
|
+
Dynamic: keywords
|
|
34
|
+
Dynamic: license
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
Dynamic: requires-dist
|
|
37
|
+
Dynamic: requires-python
|
|
38
|
+
Dynamic: summary
|
|
23
39
|
|
|
24
40
|
# mkdocs-htmlproofer-plugin [](https://pypi.org/project/mkdocs-htmlproofer-plugin)
|
|
25
41
|
|
|
@@ -27,6 +43,8 @@ License-File: LICENSE.md
|
|
|
27
43
|
|
|
28
44
|
*A [MkDocs](https://www.mkdocs.org/) plugin that validates URLs, including anchors, in rendered html files*.
|
|
29
45
|
|
|
46
|
+
> Note: [MkDocs 1.6+ supports native validation of anchors](https://www.mkdocs.org/user-guide/configuration/#validation).
|
|
47
|
+
|
|
30
48
|
## Installation
|
|
31
49
|
|
|
32
50
|
0. Prerequisites
|
|
@@ -140,6 +158,21 @@ plugins:
|
|
|
140
158
|
warn_on_ignored_urls: true
|
|
141
159
|
```
|
|
142
160
|
|
|
161
|
+
### `ignore_pages`
|
|
162
|
+
|
|
163
|
+
Avoid validating the URLs on the given list of markdown pages by ignoring them altogether.
|
|
164
|
+
Each page in the list supports unix style wildcards `*`, `[]`, `?`, etc.
|
|
165
|
+
|
|
166
|
+
```yaml
|
|
167
|
+
plugins:
|
|
168
|
+
- search
|
|
169
|
+
- htmlproofer:
|
|
170
|
+
raise_error: True
|
|
171
|
+
ignore_pages:
|
|
172
|
+
- path/to/file
|
|
173
|
+
- path/to/folder/*
|
|
174
|
+
```
|
|
175
|
+
|
|
143
176
|
### `validate_external_urls`
|
|
144
177
|
|
|
145
178
|
Avoids validating any external URLs (i.e those starting with http:// or https://).
|
|
@@ -9,7 +9,7 @@ def read(fname: str):
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name='mkdocs-htmlproofer-plugin',
|
|
12
|
-
version='1.
|
|
12
|
+
version='1.4.0',
|
|
13
13
|
description='A MkDocs plugin that validates URL in rendered HTML files',
|
|
14
14
|
long_description=read('README.md'),
|
|
15
15
|
long_description_content_type='text/markdown',
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|