html-proofer 4.0.0.rc3 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +6 -3
- data/lib/html-proofer.rb +1 -1
- data/lib/html_proofer/attribute/url.rb +186 -174
- data/lib/html_proofer/cache.rb +128 -85
- data/lib/html_proofer/check/favicon.rb +29 -24
- data/lib/html_proofer/check/images.rb +87 -47
- data/lib/html_proofer/check/links.rb +109 -98
- data/lib/html_proofer/check/open_graph.rb +30 -25
- data/lib/html_proofer/check/scripts.rb +36 -28
- data/lib/html_proofer/check.rb +11 -10
- data/lib/html_proofer/configuration.rb +16 -15
- data/lib/html_proofer/element.rb +41 -19
- data/lib/html_proofer/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +22 -18
- data/lib/html_proofer/reporter.rb +3 -3
- data/lib/html_proofer/runner.rb +45 -44
- data/lib/html_proofer/url_validator/external.rb +157 -152
- data/lib/html_proofer/url_validator/internal.rb +72 -62
- data/lib/html_proofer/utils.rb +5 -5
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +11 -9
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c827c0b89b31ad51e1a4b177f190bdcbbb8f8fd933f235420be60da28e43cfb6
|
4
|
+
data.tar.gz: e47a4bf7944efc46622762720036c846c1ccb82acf339d99a93d7fade05eb1d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4b46613b516b0fbbaf52747d812dd390598b2bd3db6d45e342d2946ed012a7da2d47f36a0bae9a7c8b09fd484d7d0ecf2e4d12714f1044bb0ef2cfd5bd1d6da
|
7
|
+
data.tar.gz: '005058a0df180ce5619c2733438189842645840e1b94b6209bd3b2b6751b6cdd5f30b2adaa4aaed1e797e1332cfaf98388d54e06bf4497f5eb6f9796be71a1d8'
|
data/bin/htmlproofer
CHANGED
@@ -24,12 +24,13 @@ Mercenary.program(:htmlproofer) do |p|
|
|
24
24
|
p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
|
25
25
|
p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
|
26
26
|
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
|
27
|
-
p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
27
|
+
p.option 'enforce_https', '--enforce-https <false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
28
28
|
p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
|
29
|
+
p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
|
29
30
|
p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
30
31
|
p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
|
31
|
-
p.option 'ignore_missing_alt', '--
|
32
|
-
p.option 'ignore_status_codes', '--
|
32
|
+
p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
|
33
|
+
p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
|
33
34
|
p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
|
34
35
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
35
36
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
@@ -66,6 +67,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
66
67
|
end
|
67
68
|
end
|
68
69
|
|
70
|
+
options[:enforce_https] = false if opts['enforce_https'] == "false"
|
71
|
+
|
69
72
|
options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
|
70
73
|
|
71
74
|
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus', opts['typhoeus'], symbolize_names: false) unless opts['typhoeus'].nil?
|
data/lib/html-proofer.rb
CHANGED
@@ -1,231 +1,243 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Attribute
|
5
|
+
class Url < HTMLProofer::Attribute
|
6
|
+
attr_reader :url
|
5
7
|
|
6
|
-
|
8
|
+
REMOTE_SCHEMES = ["http", "https"].freeze
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
def initialize(runner, link_attribute, base_url: nil)
|
11
|
+
super
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
if @raw_attribute.nil?
|
14
|
+
@url = nil
|
15
|
+
else
|
16
|
+
@url = @raw_attribute.delete("\u200b").strip
|
17
|
+
@url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
+
swap_urls!
|
20
|
+
clean_url!
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
# convert "//" links to "https://"
|
23
|
+
@url.start_with?("//") ? @url = "https:#{@url}" : @url
|
24
|
+
end
|
25
|
+
end
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
def to_s
|
28
|
+
@url
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
+
def known_extension?
|
32
|
+
return true if hash_link?
|
31
33
|
|
32
|
-
|
34
|
+
ext = File.extname(path)
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
+
# no extension means we use the assumed one
|
37
|
+
return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
|
36
38
|
|
37
|
-
|
38
|
-
|
39
|
+
@runner.options[:extensions].include?(ext)
|
40
|
+
end
|
39
41
|
|
40
|
-
|
41
|
-
|
42
|
-
|
42
|
+
def unknown_extension?
|
43
|
+
!known_extension?
|
44
|
+
end
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
def ignore?
|
47
|
+
return true if /^javascript:/.match?(@url)
|
48
|
+
return true if ignores_pattern?(@runner.options[:ignore_urls])
|
49
|
+
end
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
51
|
+
def valid?
|
52
|
+
!parts.nil?
|
53
|
+
end
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
55
|
+
def path?
|
56
|
+
!parts.host.nil? && !parts.path.nil?
|
57
|
+
end
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
def parts
|
60
|
+
@parts ||= Addressable::URI.parse(@url)
|
61
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
62
|
+
@parts = nil
|
63
|
+
end
|
62
64
|
|
63
|
-
|
64
|
-
|
65
|
-
|
65
|
+
def path
|
66
|
+
Addressable::URI.unencode(parts.path) unless parts.nil?
|
67
|
+
end
|
66
68
|
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
def hash
|
70
|
+
parts&.fragment
|
71
|
+
end
|
70
72
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
73
|
+
# Does the URL have a hash?
|
74
|
+
def hash?
|
75
|
+
!blank?(hash)
|
76
|
+
end
|
75
77
|
|
76
|
-
|
77
|
-
|
78
|
-
|
78
|
+
def scheme
|
79
|
+
parts&.scheme
|
80
|
+
end
|
79
81
|
|
80
|
-
|
81
|
-
|
82
|
-
|
82
|
+
def remote?
|
83
|
+
REMOTE_SCHEMES.include?(scheme)
|
84
|
+
end
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
|
86
|
+
def http?
|
87
|
+
scheme == "http"
|
88
|
+
end
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
def https?
|
91
|
+
scheme == "https"
|
92
|
+
end
|
91
93
|
|
92
|
-
|
93
|
-
|
94
|
-
|
94
|
+
def non_http_remote?
|
95
|
+
!scheme.nil? && !remote?
|
96
|
+
end
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
98
|
+
def host
|
99
|
+
parts&.host
|
100
|
+
end
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
-
|
102
|
+
def domain_path
|
103
|
+
(host || "") + path
|
104
|
+
end
|
103
105
|
|
104
|
-
|
105
|
-
|
106
|
-
|
106
|
+
def query_values
|
107
|
+
parts&.query_values
|
108
|
+
end
|
107
109
|
|
108
|
-
|
109
|
-
|
110
|
-
|
110
|
+
# checks if a file exists relative to the current pwd
|
111
|
+
def exists?
|
112
|
+
return true if base64?
|
111
113
|
|
112
|
-
|
114
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
113
115
|
|
114
|
-
|
115
|
-
|
116
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
117
|
+
end
|
116
118
|
|
117
|
-
|
118
|
-
|
119
|
-
|
119
|
+
def base64?
|
120
|
+
/^data:image/.match?(@raw_attribute)
|
121
|
+
end
|
120
122
|
|
121
|
-
|
122
|
-
|
123
|
+
def absolute_path
|
124
|
+
path = file_path || @runner.current_filename
|
123
125
|
|
124
|
-
|
125
|
-
|
126
|
+
File.expand_path(path, Dir.pwd)
|
127
|
+
end
|
126
128
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
129
|
+
def file_path
|
130
|
+
return if path.nil? || path.empty?
|
131
|
+
|
132
|
+
path_dot_ext = ""
|
133
|
+
|
134
|
+
path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
|
135
|
+
|
136
|
+
base = if absolute_path?(path) # path relative to root
|
137
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
138
|
+
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
|
139
|
+
# relative links, path is a file
|
140
|
+
elsif File.exist?(File.expand_path(path,
|
141
|
+
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
|
142
|
+
File.dirname(@runner.current_filename)
|
143
|
+
# relative links in nested dir, path is a file
|
144
|
+
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
|
145
|
+
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
|
146
|
+
File.dirname(@runner.current_filename)
|
147
|
+
# relative link, path is a directory
|
148
|
+
else
|
149
|
+
@runner.current_filename
|
150
|
+
end
|
151
|
+
|
152
|
+
file = File.join(base, path)
|
153
|
+
|
154
|
+
if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
|
155
|
+
file = "#{file}#{@runner.options[:assume_extension]}"
|
156
|
+
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
157
|
+
file = File.join(file, @runner.options[:directory_index_file])
|
158
|
+
end
|
159
|
+
|
160
|
+
file
|
161
|
+
end
|
155
162
|
|
156
|
-
|
157
|
-
|
163
|
+
def unslashed_directory?(file)
|
164
|
+
return false unless File.directory?(file)
|
158
165
|
|
159
|
-
|
160
|
-
|
161
|
-
end
|
166
|
+
!file.end_with?(File::SEPARATOR) && !follow_location?
|
167
|
+
end
|
162
168
|
|
163
|
-
|
164
|
-
|
165
|
-
|
169
|
+
def follow_location?
|
170
|
+
@runner.options[:typhoeus] && @runner.options[:typhoeus][:followlocation]
|
171
|
+
end
|
166
172
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
173
|
+
def absolute_path?(path)
|
174
|
+
path.start_with?("/")
|
175
|
+
end
|
171
176
|
|
172
|
-
|
173
|
-
|
174
|
-
|
177
|
+
# path is external to the file
|
178
|
+
def external?
|
179
|
+
!internal?
|
180
|
+
end
|
175
181
|
|
176
|
-
|
177
|
-
|
178
|
-
|
182
|
+
def internal?
|
183
|
+
relative_link? || internal_absolute_link? || hash_link?
|
184
|
+
end
|
179
185
|
|
180
|
-
|
181
|
-
|
186
|
+
def internal_absolute_link?
|
187
|
+
url.start_with?("/")
|
188
|
+
end
|
182
189
|
|
183
|
-
|
184
|
-
|
190
|
+
def relative_link?
|
191
|
+
return false if remote?
|
185
192
|
|
186
|
-
|
187
|
-
|
188
|
-
end
|
193
|
+
hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
|
194
|
+
end
|
189
195
|
|
190
|
-
|
191
|
-
|
192
|
-
|
196
|
+
def link_points_to_same_page?
|
197
|
+
hash_link || param_link
|
198
|
+
end
|
193
199
|
|
194
|
-
|
195
|
-
|
196
|
-
|
200
|
+
def hash_link?
|
201
|
+
url.start_with?("#")
|
202
|
+
end
|
197
203
|
|
198
|
-
|
199
|
-
|
200
|
-
|
204
|
+
def param_link?
|
205
|
+
url.start_with?("?")
|
206
|
+
end
|
201
207
|
|
202
|
-
|
203
|
-
|
204
|
-
|
208
|
+
def sans_hash
|
209
|
+
@url.to_s.sub(/##{hash}/, "")
|
210
|
+
end
|
205
211
|
|
206
|
-
|
207
|
-
|
212
|
+
# catch any obvious issues, like strings in port numbers
|
213
|
+
private def clean_url!
|
214
|
+
return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
208
215
|
|
209
|
-
|
210
|
-
|
216
|
+
@url = Addressable::URI.parse(@url).normalize.to_s
|
217
|
+
end
|
211
218
|
|
212
|
-
|
213
|
-
|
214
|
-
end
|
215
|
-
end
|
219
|
+
private def swap_urls!
|
220
|
+
return @url if blank?(replacements = @runner.options[:swap_urls])
|
216
221
|
|
217
|
-
|
218
|
-
|
222
|
+
replacements.each do |link, replace|
|
223
|
+
@url = @url.gsub(link, replace)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
private def ignores_pattern?(links_to_ignore)
|
228
|
+
return false unless links_to_ignore.is_a?(Array)
|
219
229
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
230
|
+
links_to_ignore.each do |link_to_ignore|
|
231
|
+
case link_to_ignore
|
232
|
+
when String
|
233
|
+
return true if link_to_ignore == @raw_attribute
|
234
|
+
when Regexp
|
235
|
+
return true if link_to_ignore&.match?(@raw_attribute)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
false
|
226
240
|
end
|
227
241
|
end
|
228
|
-
|
229
|
-
false
|
230
242
|
end
|
231
243
|
end
|