html-proofer 4.0.0.rc3 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproofer +6 -3
- data/lib/html-proofer.rb +1 -1
- data/lib/html_proofer/attribute/url.rb +186 -174
- data/lib/html_proofer/cache.rb +128 -85
- data/lib/html_proofer/check/favicon.rb +29 -24
- data/lib/html_proofer/check/images.rb +87 -47
- data/lib/html_proofer/check/links.rb +109 -98
- data/lib/html_proofer/check/open_graph.rb +30 -25
- data/lib/html_proofer/check/scripts.rb +36 -28
- data/lib/html_proofer/check.rb +11 -10
- data/lib/html_proofer/configuration.rb +16 -15
- data/lib/html_proofer/element.rb +41 -19
- data/lib/html_proofer/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +22 -18
- data/lib/html_proofer/reporter.rb +3 -3
- data/lib/html_proofer/runner.rb +45 -44
- data/lib/html_proofer/url_validator/external.rb +157 -152
- data/lib/html_proofer/url_validator/internal.rb +72 -62
- data/lib/html_proofer/utils.rb +5 -5
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +11 -9
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c827c0b89b31ad51e1a4b177f190bdcbbb8f8fd933f235420be60da28e43cfb6
|
4
|
+
data.tar.gz: e47a4bf7944efc46622762720036c846c1ccb82acf339d99a93d7fade05eb1d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4b46613b516b0fbbaf52747d812dd390598b2bd3db6d45e342d2946ed012a7da2d47f36a0bae9a7c8b09fd484d7d0ecf2e4d12714f1044bb0ef2cfd5bd1d6da
|
7
|
+
data.tar.gz: '005058a0df180ce5619c2733438189842645840e1b94b6209bd3b2b6751b6cdd5f30b2adaa4aaed1e797e1332cfaf98388d54e06bf4497f5eb6f9796be71a1d8'
|
data/bin/htmlproofer
CHANGED
@@ -24,12 +24,13 @@ Mercenary.program(:htmlproofer) do |p|
|
|
24
24
|
p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
|
25
25
|
p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
|
26
26
|
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
|
27
|
-
p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
27
|
+
p.option 'enforce_https', '--enforce-https <false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
28
28
|
p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
|
29
|
+
p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
|
29
30
|
p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
30
31
|
p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
|
31
|
-
p.option 'ignore_missing_alt', '--
|
32
|
-
p.option 'ignore_status_codes', '--
|
32
|
+
p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
|
33
|
+
p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
|
33
34
|
p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
|
34
35
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
35
36
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
@@ -66,6 +67,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
66
67
|
end
|
67
68
|
end
|
68
69
|
|
70
|
+
options[:enforce_https] = false if opts['enforce_https'] == "false"
|
71
|
+
|
69
72
|
options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
|
70
73
|
|
71
74
|
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus', opts['typhoeus'], symbolize_names: false) unless opts['typhoeus'].nil?
|
data/lib/html-proofer.rb
CHANGED
@@ -1,231 +1,243 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Attribute
|
5
|
+
class Url < HTMLProofer::Attribute
|
6
|
+
attr_reader :url
|
5
7
|
|
6
|
-
|
8
|
+
REMOTE_SCHEMES = ["http", "https"].freeze
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
def initialize(runner, link_attribute, base_url: nil)
|
11
|
+
super
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
if @raw_attribute.nil?
|
14
|
+
@url = nil
|
15
|
+
else
|
16
|
+
@url = @raw_attribute.delete("\u200b").strip
|
17
|
+
@url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
+
swap_urls!
|
20
|
+
clean_url!
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
# convert "//" links to "https://"
|
23
|
+
@url.start_with?("//") ? @url = "https:#{@url}" : @url
|
24
|
+
end
|
25
|
+
end
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
def to_s
|
28
|
+
@url
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
+
def known_extension?
|
32
|
+
return true if hash_link?
|
31
33
|
|
32
|
-
|
34
|
+
ext = File.extname(path)
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
+
# no extension means we use the assumed one
|
37
|
+
return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
|
36
38
|
|
37
|
-
|
38
|
-
|
39
|
+
@runner.options[:extensions].include?(ext)
|
40
|
+
end
|
39
41
|
|
40
|
-
|
41
|
-
|
42
|
-
|
42
|
+
def unknown_extension?
|
43
|
+
!known_extension?
|
44
|
+
end
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
def ignore?
|
47
|
+
return true if /^javascript:/.match?(@url)
|
48
|
+
return true if ignores_pattern?(@runner.options[:ignore_urls])
|
49
|
+
end
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
51
|
+
def valid?
|
52
|
+
!parts.nil?
|
53
|
+
end
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
55
|
+
def path?
|
56
|
+
!parts.host.nil? && !parts.path.nil?
|
57
|
+
end
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
def parts
|
60
|
+
@parts ||= Addressable::URI.parse(@url)
|
61
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
62
|
+
@parts = nil
|
63
|
+
end
|
62
64
|
|
63
|
-
|
64
|
-
|
65
|
-
|
65
|
+
def path
|
66
|
+
Addressable::URI.unencode(parts.path) unless parts.nil?
|
67
|
+
end
|
66
68
|
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
def hash
|
70
|
+
parts&.fragment
|
71
|
+
end
|
70
72
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
73
|
+
# Does the URL have a hash?
|
74
|
+
def hash?
|
75
|
+
!blank?(hash)
|
76
|
+
end
|
75
77
|
|
76
|
-
|
77
|
-
|
78
|
-
|
78
|
+
def scheme
|
79
|
+
parts&.scheme
|
80
|
+
end
|
79
81
|
|
80
|
-
|
81
|
-
|
82
|
-
|
82
|
+
def remote?
|
83
|
+
REMOTE_SCHEMES.include?(scheme)
|
84
|
+
end
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
|
86
|
+
def http?
|
87
|
+
scheme == "http"
|
88
|
+
end
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
def https?
|
91
|
+
scheme == "https"
|
92
|
+
end
|
91
93
|
|
92
|
-
|
93
|
-
|
94
|
-
|
94
|
+
def non_http_remote?
|
95
|
+
!scheme.nil? && !remote?
|
96
|
+
end
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
98
|
+
def host
|
99
|
+
parts&.host
|
100
|
+
end
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
-
|
102
|
+
def domain_path
|
103
|
+
(host || "") + path
|
104
|
+
end
|
103
105
|
|
104
|
-
|
105
|
-
|
106
|
-
|
106
|
+
def query_values
|
107
|
+
parts&.query_values
|
108
|
+
end
|
107
109
|
|
108
|
-
|
109
|
-
|
110
|
-
|
110
|
+
# checks if a file exists relative to the current pwd
|
111
|
+
def exists?
|
112
|
+
return true if base64?
|
111
113
|
|
112
|
-
|
114
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
113
115
|
|
114
|
-
|
115
|
-
|
116
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
117
|
+
end
|
116
118
|
|
117
|
-
|
118
|
-
|
119
|
-
|
119
|
+
def base64?
|
120
|
+
/^data:image/.match?(@raw_attribute)
|
121
|
+
end
|
120
122
|
|
121
|
-
|
122
|
-
|
123
|
+
def absolute_path
|
124
|
+
path = file_path || @runner.current_filename
|
123
125
|
|
124
|
-
|
125
|
-
|
126
|
+
File.expand_path(path, Dir.pwd)
|
127
|
+
end
|
126
128
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
129
|
+
def file_path
|
130
|
+
return if path.nil? || path.empty?
|
131
|
+
|
132
|
+
path_dot_ext = ""
|
133
|
+
|
134
|
+
path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
|
135
|
+
|
136
|
+
base = if absolute_path?(path) # path relative to root
|
137
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
138
|
+
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
|
139
|
+
# relative links, path is a file
|
140
|
+
elsif File.exist?(File.expand_path(path,
|
141
|
+
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
|
142
|
+
File.dirname(@runner.current_filename)
|
143
|
+
# relative links in nested dir, path is a file
|
144
|
+
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
|
145
|
+
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
|
146
|
+
File.dirname(@runner.current_filename)
|
147
|
+
# relative link, path is a directory
|
148
|
+
else
|
149
|
+
@runner.current_filename
|
150
|
+
end
|
151
|
+
|
152
|
+
file = File.join(base, path)
|
153
|
+
|
154
|
+
if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
|
155
|
+
file = "#{file}#{@runner.options[:assume_extension]}"
|
156
|
+
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
157
|
+
file = File.join(file, @runner.options[:directory_index_file])
|
158
|
+
end
|
159
|
+
|
160
|
+
file
|
161
|
+
end
|
155
162
|
|
156
|
-
|
157
|
-
|
163
|
+
def unslashed_directory?(file)
|
164
|
+
return false unless File.directory?(file)
|
158
165
|
|
159
|
-
|
160
|
-
|
161
|
-
end
|
166
|
+
!file.end_with?(File::SEPARATOR) && !follow_location?
|
167
|
+
end
|
162
168
|
|
163
|
-
|
164
|
-
|
165
|
-
|
169
|
+
def follow_location?
|
170
|
+
@runner.options[:typhoeus] && @runner.options[:typhoeus][:followlocation]
|
171
|
+
end
|
166
172
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
173
|
+
def absolute_path?(path)
|
174
|
+
path.start_with?("/")
|
175
|
+
end
|
171
176
|
|
172
|
-
|
173
|
-
|
174
|
-
|
177
|
+
# path is external to the file
|
178
|
+
def external?
|
179
|
+
!internal?
|
180
|
+
end
|
175
181
|
|
176
|
-
|
177
|
-
|
178
|
-
|
182
|
+
def internal?
|
183
|
+
relative_link? || internal_absolute_link? || hash_link?
|
184
|
+
end
|
179
185
|
|
180
|
-
|
181
|
-
|
186
|
+
def internal_absolute_link?
|
187
|
+
url.start_with?("/")
|
188
|
+
end
|
182
189
|
|
183
|
-
|
184
|
-
|
190
|
+
def relative_link?
|
191
|
+
return false if remote?
|
185
192
|
|
186
|
-
|
187
|
-
|
188
|
-
end
|
193
|
+
hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
|
194
|
+
end
|
189
195
|
|
190
|
-
|
191
|
-
|
192
|
-
|
196
|
+
def link_points_to_same_page?
|
197
|
+
hash_link || param_link
|
198
|
+
end
|
193
199
|
|
194
|
-
|
195
|
-
|
196
|
-
|
200
|
+
def hash_link?
|
201
|
+
url.start_with?("#")
|
202
|
+
end
|
197
203
|
|
198
|
-
|
199
|
-
|
200
|
-
|
204
|
+
def param_link?
|
205
|
+
url.start_with?("?")
|
206
|
+
end
|
201
207
|
|
202
|
-
|
203
|
-
|
204
|
-
|
208
|
+
def sans_hash
|
209
|
+
@url.to_s.sub(/##{hash}/, "")
|
210
|
+
end
|
205
211
|
|
206
|
-
|
207
|
-
|
212
|
+
# catch any obvious issues, like strings in port numbers
|
213
|
+
private def clean_url!
|
214
|
+
return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
208
215
|
|
209
|
-
|
210
|
-
|
216
|
+
@url = Addressable::URI.parse(@url).normalize.to_s
|
217
|
+
end
|
211
218
|
|
212
|
-
|
213
|
-
|
214
|
-
end
|
215
|
-
end
|
219
|
+
private def swap_urls!
|
220
|
+
return @url if blank?(replacements = @runner.options[:swap_urls])
|
216
221
|
|
217
|
-
|
218
|
-
|
222
|
+
replacements.each do |link, replace|
|
223
|
+
@url = @url.gsub(link, replace)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
private def ignores_pattern?(links_to_ignore)
|
228
|
+
return false unless links_to_ignore.is_a?(Array)
|
219
229
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
230
|
+
links_to_ignore.each do |link_to_ignore|
|
231
|
+
case link_to_ignore
|
232
|
+
when String
|
233
|
+
return true if link_to_ignore == @raw_attribute
|
234
|
+
when Regexp
|
235
|
+
return true if link_to_ignore&.match?(@raw_attribute)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
false
|
226
240
|
end
|
227
241
|
end
|
228
|
-
|
229
|
-
false
|
230
242
|
end
|
231
243
|
end
|