html-proofer 4.0.0.rc2 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproofer +3 -2
- data/lib/html-proofer.rb +1 -1
- data/lib/html_proofer/attribute/url.rb +186 -174
- data/lib/html_proofer/cache.rb +128 -85
- data/lib/html_proofer/check/favicon.rb +29 -24
- data/lib/html_proofer/check/images.rb +78 -47
- data/lib/html_proofer/check/links.rb +109 -98
- data/lib/html_proofer/check/open_graph.rb +30 -25
- data/lib/html_proofer/check/scripts.rb +36 -28
- data/lib/html_proofer/check.rb +11 -10
- data/lib/html_proofer/configuration.rb +16 -15
- data/lib/html_proofer/element.rb +19 -19
- data/lib/html_proofer/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +22 -18
- data/lib/html_proofer/reporter.rb +3 -3
- data/lib/html_proofer/runner.rb +45 -44
- data/lib/html_proofer/url_validator/external.rb +157 -152
- data/lib/html_proofer/url_validator/internal.rb +72 -62
- data/lib/html_proofer/utils.rb +5 -5
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +11 -10
- metadata +22 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c86b30d36446c4e7513915f860d356094394893184e451050133d8db0051588
|
4
|
+
data.tar.gz: dea3274188458eec8e625e3d0e891cb383e9b533cf8f378a9348a1baf724f1e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 128ba50160a450747d9f7085ac6cea6231c6ec962a9d4b159df9f882682ec78396b5f2ca86264a69b1cbb0b3c28b2e96e2fb93f7e398ee60cee7d4a49b9c6b65
|
7
|
+
data.tar.gz: 323474cfa12ccadc4254f7dab7d035b8569fad92c4337d5946776b14a10dd30213849ed278ad9d3b314dcc20c93c5af78071b9acdaaee6635b4070a88497eb87
|
data/bin/htmlproofer
CHANGED
@@ -26,10 +26,11 @@ Mercenary.program(:htmlproofer) do |p|
|
|
26
26
|
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
|
27
27
|
p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
28
28
|
p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
|
29
|
+
p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
|
29
30
|
p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
30
31
|
p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
|
31
|
-
p.option 'ignore_missing_alt', '--
|
32
|
-
p.option 'ignore_status_codes', '--
|
32
|
+
p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
|
33
|
+
p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
|
33
34
|
p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
|
34
35
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
35
36
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
data/lib/html-proofer.rb
CHANGED
@@ -1,231 +1,243 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Attribute
|
5
|
+
class Url < HTMLProofer::Attribute
|
6
|
+
attr_reader :url
|
5
7
|
|
6
|
-
|
8
|
+
REMOTE_SCHEMES = ["http", "https"].freeze
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
def initialize(runner, link_attribute, base_url: nil)
|
11
|
+
super
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
if @raw_attribute.nil?
|
14
|
+
@url = nil
|
15
|
+
else
|
16
|
+
@url = @raw_attribute.delete("\u200b").strip
|
17
|
+
@url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
+
swap_urls!
|
20
|
+
clean_url!
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
# convert "//" links to "https://"
|
23
|
+
@url.start_with?("//") ? @url = "https:#{@url}" : @url
|
24
|
+
end
|
25
|
+
end
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
def to_s
|
28
|
+
@url
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
+
def known_extension?
|
32
|
+
return true if hash_link?
|
31
33
|
|
32
|
-
|
34
|
+
ext = File.extname(path)
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
+
# no extension means we use the assumed one
|
37
|
+
return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
|
36
38
|
|
37
|
-
|
38
|
-
|
39
|
+
@runner.options[:extensions].include?(ext)
|
40
|
+
end
|
39
41
|
|
40
|
-
|
41
|
-
|
42
|
-
|
42
|
+
def unknown_extension?
|
43
|
+
!known_extension?
|
44
|
+
end
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
def ignore?
|
47
|
+
return true if /^javascript:/.match?(@url)
|
48
|
+
return true if ignores_pattern?(@runner.options[:ignore_urls])
|
49
|
+
end
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
51
|
+
def valid?
|
52
|
+
!parts.nil?
|
53
|
+
end
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
55
|
+
def path?
|
56
|
+
!parts.host.nil? && !parts.path.nil?
|
57
|
+
end
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
def parts
|
60
|
+
@parts ||= Addressable::URI.parse(@url)
|
61
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
62
|
+
@parts = nil
|
63
|
+
end
|
62
64
|
|
63
|
-
|
64
|
-
|
65
|
-
|
65
|
+
def path
|
66
|
+
Addressable::URI.unencode(parts.path) unless parts.nil?
|
67
|
+
end
|
66
68
|
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
def hash
|
70
|
+
parts&.fragment
|
71
|
+
end
|
70
72
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
73
|
+
# Does the URL have a hash?
|
74
|
+
def hash?
|
75
|
+
!blank?(hash)
|
76
|
+
end
|
75
77
|
|
76
|
-
|
77
|
-
|
78
|
-
|
78
|
+
def scheme
|
79
|
+
parts&.scheme
|
80
|
+
end
|
79
81
|
|
80
|
-
|
81
|
-
|
82
|
-
|
82
|
+
def remote?
|
83
|
+
REMOTE_SCHEMES.include?(scheme)
|
84
|
+
end
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
|
86
|
+
def http?
|
87
|
+
scheme == "http"
|
88
|
+
end
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
def https?
|
91
|
+
scheme == "https"
|
92
|
+
end
|
91
93
|
|
92
|
-
|
93
|
-
|
94
|
-
|
94
|
+
def non_http_remote?
|
95
|
+
!scheme.nil? && !remote?
|
96
|
+
end
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
98
|
+
def host
|
99
|
+
parts&.host
|
100
|
+
end
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
-
|
102
|
+
def domain_path
|
103
|
+
(host || "") + path
|
104
|
+
end
|
103
105
|
|
104
|
-
|
105
|
-
|
106
|
-
|
106
|
+
def query_values
|
107
|
+
parts&.query_values
|
108
|
+
end
|
107
109
|
|
108
|
-
|
109
|
-
|
110
|
-
|
110
|
+
# checks if a file exists relative to the current pwd
|
111
|
+
def exists?
|
112
|
+
return true if base64?
|
111
113
|
|
112
|
-
|
114
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
113
115
|
|
114
|
-
|
115
|
-
|
116
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
117
|
+
end
|
116
118
|
|
117
|
-
|
118
|
-
|
119
|
-
|
119
|
+
def base64?
|
120
|
+
/^data:image/.match?(@raw_attribute)
|
121
|
+
end
|
120
122
|
|
121
|
-
|
122
|
-
|
123
|
+
def absolute_path
|
124
|
+
path = file_path || @runner.current_filename
|
123
125
|
|
124
|
-
|
125
|
-
|
126
|
+
File.expand_path(path, Dir.pwd)
|
127
|
+
end
|
126
128
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
129
|
+
def file_path
|
130
|
+
return if path.nil? || path.empty?
|
131
|
+
|
132
|
+
path_dot_ext = ""
|
133
|
+
|
134
|
+
path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
|
135
|
+
|
136
|
+
base = if absolute_path?(path) # path relative to root
|
137
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
138
|
+
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
|
139
|
+
# relative links, path is a file
|
140
|
+
elsif File.exist?(File.expand_path(path,
|
141
|
+
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
|
142
|
+
File.dirname(@runner.current_filename)
|
143
|
+
# relative links in nested dir, path is a file
|
144
|
+
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
|
145
|
+
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
|
146
|
+
File.dirname(@runner.current_filename)
|
147
|
+
# relative link, path is a directory
|
148
|
+
else
|
149
|
+
@runner.current_filename
|
150
|
+
end
|
151
|
+
|
152
|
+
file = File.join(base, path)
|
153
|
+
|
154
|
+
if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
|
155
|
+
file = "#{file}#{@runner.options[:assume_extension]}"
|
156
|
+
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
157
|
+
file = File.join(file, @runner.options[:directory_index_file])
|
158
|
+
end
|
159
|
+
|
160
|
+
file
|
161
|
+
end
|
155
162
|
|
156
|
-
|
157
|
-
|
163
|
+
def unslashed_directory?(file)
|
164
|
+
return false unless File.directory?(file)
|
158
165
|
|
159
|
-
|
160
|
-
|
161
|
-
end
|
166
|
+
!file.end_with?(File::SEPARATOR) && !follow_location?
|
167
|
+
end
|
162
168
|
|
163
|
-
|
164
|
-
|
165
|
-
|
169
|
+
def follow_location?
|
170
|
+
@runner.options[:typhoeus] && @runner.options[:typhoeus][:followlocation]
|
171
|
+
end
|
166
172
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
173
|
+
def absolute_path?(path)
|
174
|
+
path.start_with?("/")
|
175
|
+
end
|
171
176
|
|
172
|
-
|
173
|
-
|
174
|
-
|
177
|
+
# path is external to the file
|
178
|
+
def external?
|
179
|
+
!internal?
|
180
|
+
end
|
175
181
|
|
176
|
-
|
177
|
-
|
178
|
-
|
182
|
+
def internal?
|
183
|
+
relative_link? || internal_absolute_link? || hash_link?
|
184
|
+
end
|
179
185
|
|
180
|
-
|
181
|
-
|
186
|
+
def internal_absolute_link?
|
187
|
+
url.start_with?("/")
|
188
|
+
end
|
182
189
|
|
183
|
-
|
184
|
-
|
190
|
+
def relative_link?
|
191
|
+
return false if remote?
|
185
192
|
|
186
|
-
|
187
|
-
|
188
|
-
end
|
193
|
+
hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
|
194
|
+
end
|
189
195
|
|
190
|
-
|
191
|
-
|
192
|
-
|
196
|
+
def link_points_to_same_page?
|
197
|
+
hash_link || param_link
|
198
|
+
end
|
193
199
|
|
194
|
-
|
195
|
-
|
196
|
-
|
200
|
+
def hash_link?
|
201
|
+
url.start_with?("#")
|
202
|
+
end
|
197
203
|
|
198
|
-
|
199
|
-
|
200
|
-
|
204
|
+
def param_link?
|
205
|
+
url.start_with?("?")
|
206
|
+
end
|
201
207
|
|
202
|
-
|
203
|
-
|
204
|
-
|
208
|
+
def sans_hash
|
209
|
+
@url.to_s.sub(/##{hash}/, "")
|
210
|
+
end
|
205
211
|
|
206
|
-
|
207
|
-
|
212
|
+
# catch any obvious issues, like strings in port numbers
|
213
|
+
private def clean_url!
|
214
|
+
return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
208
215
|
|
209
|
-
|
210
|
-
|
216
|
+
@url = Addressable::URI.parse(@url).normalize.to_s
|
217
|
+
end
|
211
218
|
|
212
|
-
|
213
|
-
|
214
|
-
end
|
215
|
-
end
|
219
|
+
private def swap_urls!
|
220
|
+
return @url if blank?(replacements = @runner.options[:swap_urls])
|
216
221
|
|
217
|
-
|
218
|
-
|
222
|
+
replacements.each do |link, replace|
|
223
|
+
@url = @url.gsub(link, replace)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
private def ignores_pattern?(links_to_ignore)
|
228
|
+
return false unless links_to_ignore.is_a?(Array)
|
219
229
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
230
|
+
links_to_ignore.each do |link_to_ignore|
|
231
|
+
case link_to_ignore
|
232
|
+
when String
|
233
|
+
return true if link_to_ignore == @raw_attribute
|
234
|
+
when Regexp
|
235
|
+
return true if link_to_ignore&.match?(@raw_attribute)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
false
|
226
240
|
end
|
227
241
|
end
|
228
|
-
|
229
|
-
false
|
230
242
|
end
|
231
243
|
end
|