html-proofer 4.0.0.rc3 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +3 -2
- data/lib/html-proofer.rb +1 -1
- data/lib/html_proofer/attribute/url.rb +180 -174
- data/lib/html_proofer/cache.rb +128 -85
- data/lib/html_proofer/check/favicon.rb +29 -24
- data/lib/html_proofer/check/images.rb +78 -47
- data/lib/html_proofer/check/links.rb +109 -98
- data/lib/html_proofer/check/open_graph.rb +30 -25
- data/lib/html_proofer/check/scripts.rb +36 -28
- data/lib/html_proofer/check.rb +11 -10
- data/lib/html_proofer/configuration.rb +16 -15
- data/lib/html_proofer/element.rb +19 -19
- data/lib/html_proofer/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +22 -18
- data/lib/html_proofer/reporter.rb +3 -3
- data/lib/html_proofer/runner.rb +45 -44
- data/lib/html_proofer/url_validator/external.rb +157 -152
- data/lib/html_proofer/url_validator/internal.rb +72 -62
- data/lib/html_proofer/utils.rb +5 -5
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +11 -9
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b6e8293984dde05cd406326b21fa93a638b0a972efdab95f59adfe606e8be538
|
4
|
+
data.tar.gz: b9323e2c6d7cf91a08f45a7b9ecb77b826c02b2ad2b6bf74e6951701b41c3bdb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d962362e0abc03b2deb15da0b06f2e42857756b045db568c959263346957b628dee73b719de2ed9f24a03762e2ae97e2ff208ea8f216c6d0323bca77864d43d
|
7
|
+
data.tar.gz: 2bb9ca1d81e1a088344c8df0c513d2c0e96187cf23b7895def81fba4ac1820f56a0099a4dd2caa861df6ecd75afd4388e4a5b37e18308c4641300ab07f704d15
|
data/bin/htmlproofer
CHANGED
@@ -26,10 +26,11 @@ Mercenary.program(:htmlproofer) do |p|
|
|
26
26
|
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
|
27
27
|
p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
28
28
|
p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
|
29
|
+
p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
|
29
30
|
p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
30
31
|
p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
|
31
|
-
p.option 'ignore_missing_alt', '--
|
32
|
-
p.option 'ignore_status_codes', '--
|
32
|
+
p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
|
33
|
+
p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
|
33
34
|
p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
|
34
35
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
35
36
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
data/lib/html-proofer.rb
CHANGED
@@ -1,231 +1,237 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Attribute
|
5
|
+
class Url < HTMLProofer::Attribute
|
6
|
+
attr_reader :url
|
5
7
|
|
6
|
-
|
8
|
+
REMOTE_SCHEMES = ["http", "https"].freeze
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
def initialize(runner, link_attribute, base_url: nil)
|
11
|
+
super
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
if @raw_attribute.nil?
|
14
|
+
@url = nil
|
15
|
+
else
|
16
|
+
@url = @raw_attribute.delete("\u200b").strip
|
17
|
+
@url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
+
swap_urls!
|
20
|
+
clean_url!
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
# convert "//" links to "https://"
|
23
|
+
@url.start_with?("//") ? @url = "https:#{@url}" : @url
|
24
|
+
end
|
25
|
+
end
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
def to_s
|
28
|
+
@url
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
+
def known_extension?
|
32
|
+
return true if hash_link?
|
31
33
|
|
32
|
-
|
34
|
+
ext = File.extname(path)
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
+
# no extension means we use the assumed one
|
37
|
+
return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
|
36
38
|
|
37
|
-
|
38
|
-
|
39
|
+
@runner.options[:extensions].include?(ext)
|
40
|
+
end
|
39
41
|
|
40
|
-
|
41
|
-
|
42
|
-
|
42
|
+
def unknown_extension?
|
43
|
+
!known_extension?
|
44
|
+
end
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
def ignore?
|
47
|
+
return true if /^javascript:/.match?(@url)
|
48
|
+
return true if ignores_pattern?(@runner.options[:ignore_urls])
|
49
|
+
end
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
51
|
+
def valid?
|
52
|
+
!parts.nil?
|
53
|
+
end
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
55
|
+
def path?
|
56
|
+
!parts.host.nil? && !parts.path.nil?
|
57
|
+
end
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
def parts
|
60
|
+
@parts ||= Addressable::URI.parse(@url)
|
61
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
62
|
+
@parts = nil
|
63
|
+
end
|
62
64
|
|
63
|
-
|
64
|
-
|
65
|
-
|
65
|
+
def path
|
66
|
+
Addressable::URI.unencode(parts.path) unless parts.nil?
|
67
|
+
end
|
66
68
|
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
def hash
|
70
|
+
parts&.fragment
|
71
|
+
end
|
70
72
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
73
|
+
# Does the URL have a hash?
|
74
|
+
def hash?
|
75
|
+
!blank?(hash)
|
76
|
+
end
|
75
77
|
|
76
|
-
|
77
|
-
|
78
|
-
|
78
|
+
def scheme
|
79
|
+
parts&.scheme
|
80
|
+
end
|
79
81
|
|
80
|
-
|
81
|
-
|
82
|
-
|
82
|
+
def remote?
|
83
|
+
REMOTE_SCHEMES.include?(scheme)
|
84
|
+
end
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
|
86
|
+
def http?
|
87
|
+
scheme == "http"
|
88
|
+
end
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
def https?
|
91
|
+
scheme == "https"
|
92
|
+
end
|
91
93
|
|
92
|
-
|
93
|
-
|
94
|
-
|
94
|
+
def non_http_remote?
|
95
|
+
!scheme.nil? && !remote?
|
96
|
+
end
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
98
|
+
def host
|
99
|
+
parts&.host
|
100
|
+
end
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
-
|
102
|
+
def domain_path
|
103
|
+
(host || "") + path
|
104
|
+
end
|
103
105
|
|
104
|
-
|
105
|
-
|
106
|
-
|
106
|
+
def query_values
|
107
|
+
parts&.query_values
|
108
|
+
end
|
107
109
|
|
108
|
-
|
109
|
-
|
110
|
-
|
110
|
+
# checks if a file exists relative to the current pwd
|
111
|
+
def exists?
|
112
|
+
return true if base64?
|
111
113
|
|
112
|
-
|
114
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
113
115
|
|
114
|
-
|
115
|
-
|
116
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
117
|
+
end
|
116
118
|
|
117
|
-
|
118
|
-
|
119
|
-
|
119
|
+
def base64?
|
120
|
+
/^data:image/.match?(@raw_attribute)
|
121
|
+
end
|
120
122
|
|
121
|
-
|
122
|
-
|
123
|
+
def absolute_path
|
124
|
+
path = file_path || @runner.current_filename
|
123
125
|
|
124
|
-
|
125
|
-
|
126
|
+
File.expand_path(path, Dir.pwd)
|
127
|
+
end
|
126
128
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
129
|
+
def file_path
|
130
|
+
return if path.nil? || path.empty?
|
131
|
+
|
132
|
+
path_dot_ext = ""
|
133
|
+
|
134
|
+
path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
|
135
|
+
|
136
|
+
base = if absolute_path?(path) # path relative to root
|
137
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
138
|
+
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
|
139
|
+
# relative links, path is a file
|
140
|
+
elsif File.exist?(File.expand_path(path,
|
141
|
+
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
|
142
|
+
File.dirname(@runner.current_filename)
|
143
|
+
# relative links in nested dir, path is a file
|
144
|
+
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
|
145
|
+
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
|
146
|
+
File.dirname(@runner.current_filename)
|
147
|
+
# relative link, path is a directory
|
148
|
+
else
|
149
|
+
@runner.current_filename
|
150
|
+
end
|
151
|
+
|
152
|
+
file = File.join(base, path)
|
153
|
+
|
154
|
+
if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
|
155
|
+
file = "#{file}#{@runner.options[:assume_extension]}"
|
156
|
+
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
157
|
+
file = File.join(file, @runner.options[:directory_index_file])
|
158
|
+
end
|
159
|
+
|
160
|
+
file
|
161
|
+
end
|
155
162
|
|
156
|
-
|
157
|
-
|
163
|
+
def unslashed_directory?(file)
|
164
|
+
File.directory?(file) && !file.end_with?(File::SEPARATOR)
|
165
|
+
end
|
158
166
|
|
159
|
-
|
160
|
-
|
161
|
-
|
167
|
+
def absolute_path?(path)
|
168
|
+
path.start_with?("/")
|
169
|
+
end
|
162
170
|
|
163
|
-
|
164
|
-
|
165
|
-
|
171
|
+
# path is external to the file
|
172
|
+
def external?
|
173
|
+
!internal?
|
174
|
+
end
|
166
175
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
176
|
+
def internal?
|
177
|
+
relative_link? || internal_absolute_link? || hash_link?
|
178
|
+
end
|
171
179
|
|
172
|
-
|
173
|
-
|
174
|
-
|
180
|
+
def internal_absolute_link?
|
181
|
+
url.start_with?("/")
|
182
|
+
end
|
175
183
|
|
176
|
-
|
177
|
-
|
178
|
-
end
|
184
|
+
def relative_link?
|
185
|
+
return false if remote?
|
179
186
|
|
180
|
-
|
181
|
-
|
187
|
+
hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
|
188
|
+
end
|
182
189
|
|
183
|
-
|
184
|
-
|
190
|
+
def link_points_to_same_page?
|
191
|
+
hash_link || param_link
|
192
|
+
end
|
185
193
|
|
186
|
-
|
187
|
-
|
188
|
-
|
194
|
+
def hash_link?
|
195
|
+
url.start_with?("#")
|
196
|
+
end
|
189
197
|
|
190
|
-
|
191
|
-
|
192
|
-
|
198
|
+
def param_link?
|
199
|
+
url.start_with?("?")
|
200
|
+
end
|
193
201
|
|
194
|
-
|
195
|
-
|
196
|
-
|
202
|
+
def sans_hash
|
203
|
+
@url.to_s.sub(/##{hash}/, "")
|
204
|
+
end
|
197
205
|
|
198
|
-
|
199
|
-
|
200
|
-
|
206
|
+
# catch any obvious issues, like strings in port numbers
|
207
|
+
private def clean_url!
|
208
|
+
return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
201
209
|
|
202
|
-
|
203
|
-
|
204
|
-
return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
210
|
+
@url = Addressable::URI.parse(@url).normalize.to_s
|
211
|
+
end
|
205
212
|
|
206
|
-
|
207
|
-
|
213
|
+
private def swap_urls!
|
214
|
+
return @url if blank?(replacements = @runner.options[:swap_urls])
|
208
215
|
|
209
|
-
|
210
|
-
|
216
|
+
replacements.each do |link, replace|
|
217
|
+
@url = @url.gsub(link, replace)
|
218
|
+
end
|
219
|
+
end
|
211
220
|
|
212
|
-
|
213
|
-
|
214
|
-
end
|
215
|
-
end
|
221
|
+
private def ignores_pattern?(links_to_ignore)
|
222
|
+
return false unless links_to_ignore.is_a?(Array)
|
216
223
|
|
217
|
-
|
218
|
-
|
224
|
+
links_to_ignore.each do |link_to_ignore|
|
225
|
+
case link_to_ignore
|
226
|
+
when String
|
227
|
+
return true if link_to_ignore == @raw_attribute
|
228
|
+
when Regexp
|
229
|
+
return true if link_to_ignore&.match?(@raw_attribute)
|
230
|
+
end
|
231
|
+
end
|
219
232
|
|
220
|
-
|
221
|
-
case link_to_ignore
|
222
|
-
when String
|
223
|
-
return true if link_to_ignore == @raw_attribute
|
224
|
-
when Regexp
|
225
|
-
return true if link_to_ignore&.match?(@raw_attribute)
|
233
|
+
false
|
226
234
|
end
|
227
235
|
end
|
228
|
-
|
229
|
-
false
|
230
236
|
end
|
231
237
|
end
|