html-proofer 5.0.0 → 5.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html_proofer/attribute/url.rb +54 -37
- data/lib/html_proofer/cache.rb +8 -5
- data/lib/html_proofer/check/favicon.rb +8 -4
- data/lib/html_proofer/check/images.rb +31 -17
- data/lib/html_proofer/check/links.rb +36 -21
- data/lib/html_proofer/check/open_graph.rb +11 -7
- data/lib/html_proofer/check/scripts.rb +21 -11
- data/lib/html_proofer/check.rb +12 -6
- data/lib/html_proofer/configuration.rb +60 -30
- data/lib/html_proofer/element.rb +24 -9
- data/lib/html_proofer/log.rb +6 -4
- data/lib/html_proofer/reporter.rb +2 -2
- data/lib/html_proofer/runner.rb +17 -9
- data/lib/html_proofer/url_validator/external.rb +32 -4
- data/lib/html_proofer/url_validator/internal.rb +29 -22
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +1 -1
- data/lib/html_proofer.rb +4 -1
- metadata +28 -104
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9ed4a509a2d120a3aff5ad4809f6b91feff3842dceb371015aca5fd08e69bb2
|
4
|
+
data.tar.gz: 0db88e877a8fc19022d4fd1f382e4d38bfeead1540c42817e33799ccf2527bc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c888e7975c91eb41124057eba57fb3bdd9f8d82f38be6958c82649458526d7a41cb9aac8d4221678c273db419708cb2abfce2ce0be7396acd2b97295e938844
|
7
|
+
data.tar.gz: 61acc2e9c73ba8ca51abca84e21524e145a3f98f8d0d8db4c9683d13407610ea13dfe04342cc861ca0109cd8f9ac1fa24300bb0df684a198787ee5a2d40c8cde
|
@@ -3,13 +3,16 @@
|
|
3
3
|
module HTMLProofer
|
4
4
|
class Attribute
|
5
5
|
class Url < HTMLProofer::Attribute
|
6
|
-
attr_reader :url, :size
|
6
|
+
attr_reader :url, :size, :source, :filename
|
7
7
|
|
8
8
|
REMOTE_SCHEMES = ["http", "https"].freeze
|
9
9
|
|
10
|
-
def initialize(runner, link_attribute, base_url: nil, extract_size: false)
|
10
|
+
def initialize(runner, link_attribute, base_url: nil, source: nil, filename: nil, extract_size: false)
|
11
11
|
super
|
12
12
|
|
13
|
+
@source = source
|
14
|
+
@filename = filename
|
15
|
+
|
13
16
|
if @raw_attribute.nil?
|
14
17
|
@url = nil
|
15
18
|
else
|
@@ -49,7 +52,8 @@ module HTMLProofer
|
|
49
52
|
|
50
53
|
def ignore?
|
51
54
|
return true if /^javascript:/.match?(@url)
|
52
|
-
|
55
|
+
|
56
|
+
true if ignores_pattern?(@runner.options[:ignore_urls])
|
53
57
|
end
|
54
58
|
|
55
59
|
def valid?
|
@@ -115,9 +119,29 @@ module HTMLProofer
|
|
115
119
|
def exists?
|
116
120
|
return true if base64?
|
117
121
|
|
118
|
-
|
122
|
+
!resolved_path.nil?
|
123
|
+
end
|
124
|
+
|
125
|
+
def resolved_path
|
126
|
+
path_to_resolve = absolute_path
|
119
127
|
|
120
|
-
@runner.
|
128
|
+
return @runner.resolved_paths[path_to_resolve] if @runner.resolved_paths.key?(path_to_resolve)
|
129
|
+
|
130
|
+
# extensionless URLs
|
131
|
+
path_with_extension = "#{path_to_resolve}#{@runner.options[:assume_extension]}"
|
132
|
+
resolved = if @runner.options[:assume_extension] && File.file?(path_with_extension)
|
133
|
+
path_with_extension # existence checked implicitly by File.file?
|
134
|
+
# implicit index support
|
135
|
+
elsif File.directory?(path_to_resolve) && !unslashed_directory?(path_to_resolve)
|
136
|
+
path_with_index = File.join(path_to_resolve, @runner.options[:directory_index_file])
|
137
|
+
path_with_index if File.file?(path_with_index)
|
138
|
+
# explicit file or directory
|
139
|
+
elsif File.exist?(path_to_resolve)
|
140
|
+
path_to_resolve
|
141
|
+
end
|
142
|
+
@runner.resolved_paths[path_to_resolve] = resolved
|
143
|
+
|
144
|
+
resolved
|
121
145
|
end
|
122
146
|
|
123
147
|
def base64?
|
@@ -125,43 +149,23 @@ module HTMLProofer
|
|
125
149
|
end
|
126
150
|
|
127
151
|
def absolute_path
|
128
|
-
path =
|
152
|
+
path = full_path || @filename
|
129
153
|
|
130
154
|
File.expand_path(path, Dir.pwd)
|
131
155
|
end
|
132
156
|
|
133
|
-
def
|
157
|
+
def full_path
|
134
158
|
return if path.nil? || path.empty?
|
135
159
|
|
136
|
-
path_dot_ext = ""
|
137
|
-
|
138
|
-
path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
|
139
|
-
|
140
160
|
base = if absolute_path?(path) # path relative to root
|
141
|
-
# either overwrite with root_dir; or, if source is directory, use that; or, just get the
|
142
|
-
@runner.options[:root_dir] || (File.directory?(@
|
143
|
-
# relative links, path is a file
|
144
|
-
elsif File.exist?(File.expand_path(path,
|
145
|
-
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
|
146
|
-
File.dirname(@runner.current_filename)
|
147
|
-
# relative links in nested dir, path is a file
|
148
|
-
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
|
149
|
-
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
|
150
|
-
File.dirname(@runner.current_filename)
|
151
|
-
# relative link, path is a directory
|
161
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the source file's dirname
|
162
|
+
@runner.options[:root_dir] || (File.directory?(@source) ? @source : File.dirname(@source))
|
152
163
|
else
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
file = File.join(base, path)
|
157
|
-
|
158
|
-
if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
|
159
|
-
file = "#{file}#{@runner.options[:assume_extension]}"
|
160
|
-
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
161
|
-
file = File.join(file, @runner.options[:directory_index_file])
|
164
|
+
# path relative to the file where the link is defined
|
165
|
+
File.dirname(@filename)
|
162
166
|
end
|
163
167
|
|
164
|
-
|
168
|
+
File.join(base, path)
|
165
169
|
end
|
166
170
|
|
167
171
|
def unslashed_directory?(file)
|
@@ -213,15 +217,28 @@ module HTMLProofer
|
|
213
217
|
url.start_with?("?")
|
214
218
|
end
|
215
219
|
|
216
|
-
def
|
220
|
+
def without_hash
|
217
221
|
@url.to_s.sub(/##{hash}/, "")
|
218
222
|
end
|
219
223
|
|
220
|
-
# catch any obvious issues
|
224
|
+
# catch any obvious issues
|
221
225
|
private def clean_url!
|
222
|
-
|
223
|
-
|
224
|
-
|
226
|
+
parsed_url = Addressable::URI.parse(@url)
|
227
|
+
url = if parsed_url.scheme.nil?
|
228
|
+
parsed_url
|
229
|
+
else
|
230
|
+
parsed_url.normalize
|
231
|
+
end.to_s
|
232
|
+
|
233
|
+
# normalize strips this off, which causes issues with cache
|
234
|
+
@url = if @url.end_with?("/") && !url.end_with?("/")
|
235
|
+
"#{url}/"
|
236
|
+
elsif !@url.end_with?("/") && url.end_with?("/")
|
237
|
+
url.chop
|
238
|
+
else
|
239
|
+
url
|
240
|
+
end
|
241
|
+
rescue Addressable::URI::InvalidURIError # rubocop:disable Lint/SuppressedException; error will be reported at check time
|
225
242
|
end
|
226
243
|
|
227
244
|
private def swap_urls!
|
data/lib/html_proofer/cache.rb
CHANGED
@@ -41,7 +41,7 @@ module HTMLProofer
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def parsed_timeframe(timeframe)
|
44
|
-
return
|
44
|
+
return if timeframe.nil?
|
45
45
|
|
46
46
|
time, date = timeframe.match(/(\d+)(\D)/).captures
|
47
47
|
time = time.to_i
|
@@ -93,8 +93,10 @@ module HTMLProofer
|
|
93
93
|
# if there are no urls, bail
|
94
94
|
return {} if urls_detected.empty?
|
95
95
|
|
96
|
-
|
97
|
-
|
96
|
+
if type == :external
|
97
|
+
urls_detected = urls_detected.transform_keys do |url|
|
98
|
+
cleaned_url(url)
|
99
|
+
end
|
98
100
|
end
|
99
101
|
|
100
102
|
urls_to_check = detect_url_changes(urls_detected, type)
|
@@ -250,7 +252,7 @@ module HTMLProofer
|
|
250
252
|
SECONDS_PER_HOUR = 3600
|
251
253
|
SECONDS_PER_DAY = 86400
|
252
254
|
SECONDS_PER_WEEK = 604800
|
253
|
-
SECONDS_PER_MONTH = 2629746
|
255
|
+
SECONDS_PER_MONTH = 2629746 # 1/12 of a gregorian year
|
254
256
|
|
255
257
|
private def time_ago(measurement, unit)
|
256
258
|
case unit
|
@@ -267,7 +269,8 @@ module HTMLProofer
|
|
267
269
|
|
268
270
|
private def url_matches_type?(url, type)
|
269
271
|
return true if type == :internal && url !~ URI_REGEXP
|
270
|
-
|
272
|
+
|
273
|
+
true if type == :external && url =~ URI_REGEXP
|
271
274
|
end
|
272
275
|
|
273
276
|
private def cleaned_url(url)
|
@@ -17,13 +17,17 @@ module HTMLProofer
|
|
17
17
|
|
18
18
|
if found
|
19
19
|
if @favicon.url.protocol_relative?
|
20
|
-
add_failure(
|
21
|
-
|
20
|
+
add_failure(
|
21
|
+
"favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
|
22
|
+
element: @favicon,
|
23
|
+
)
|
22
24
|
elsif @favicon.url.remote?
|
23
25
|
add_to_external_urls(@favicon.url, @favicon.line)
|
24
26
|
elsif !@favicon.url.exists?
|
25
|
-
add_failure(
|
26
|
-
|
27
|
+
add_failure(
|
28
|
+
"internal favicon #{@favicon.url.raw_attribute} does not exist",
|
29
|
+
element: @favicon,
|
30
|
+
)
|
27
31
|
end
|
28
32
|
else
|
29
33
|
add_failure("no favicon provided")
|
@@ -12,31 +12,39 @@ module HTMLProofer
|
|
12
12
|
next if @img.ignore?
|
13
13
|
|
14
14
|
# screenshot filenames should return because of terrible names
|
15
|
-
add_failure(
|
16
|
-
|
15
|
+
add_failure(
|
16
|
+
"image has a terrible filename (#{@img.url.raw_attribute})",
|
17
|
+
element: @img,
|
18
|
+
) if terrible_filename?
|
17
19
|
|
18
20
|
# does the image exist?
|
19
21
|
if missing_src?
|
20
|
-
add_failure("image has no src or srcset attribute",
|
22
|
+
add_failure("image has no src or srcset attribute", element: @img)
|
21
23
|
elsif @img.url.protocol_relative?
|
22
|
-
add_failure(
|
23
|
-
|
24
|
+
add_failure(
|
25
|
+
"image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
|
26
|
+
element: @img,
|
27
|
+
)
|
24
28
|
elsif @img.url.remote?
|
25
29
|
add_to_external_urls(@img.url, @img.line)
|
26
30
|
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
|
27
|
-
add_failure(
|
28
|
-
|
31
|
+
add_failure(
|
32
|
+
"internal image #{@img.url.raw_attribute} does not exist",
|
33
|
+
element: @img,
|
34
|
+
)
|
29
35
|
elsif @img.multiple_srcsets? || @img.multiple_sizes?
|
30
36
|
@img.srcsets_wo_sizes.each do |srcset|
|
31
|
-
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
|
37
|
+
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, source: @img.url.source, filename: @img.url.filename, extract_size: true)
|
32
38
|
|
33
39
|
if srcset_url.protocol_relative?
|
34
|
-
add_failure(
|
35
|
-
|
40
|
+
add_failure(
|
41
|
+
"image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
|
42
|
+
element: @img,
|
43
|
+
)
|
36
44
|
elsif srcset_url.remote?
|
37
45
|
add_to_external_urls(srcset_url.url, @img.line)
|
38
46
|
elsif !srcset_url.exists?
|
39
|
-
add_failure("internal image #{srcset} does not exist",
|
47
|
+
add_failure("internal image #{srcset} does not exist", element: @img)
|
40
48
|
end
|
41
49
|
end
|
42
50
|
end
|
@@ -44,16 +52,22 @@ module HTMLProofer
|
|
44
52
|
# if this is an img element, check that the alt attribute is present
|
45
53
|
if @img.img_tag? && !ignore_element?
|
46
54
|
if missing_alt_tag? && !ignore_missing_alt?
|
47
|
-
add_failure(
|
48
|
-
|
55
|
+
add_failure(
|
56
|
+
"image #{@img.url.raw_attribute} does not have an alt attribute",
|
57
|
+
element: @img,
|
58
|
+
)
|
49
59
|
elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
|
50
|
-
add_failure(
|
51
|
-
|
60
|
+
add_failure(
|
61
|
+
"image #{@img.url.raw_attribute} has an alt attribute, but no content",
|
62
|
+
element: @img,
|
63
|
+
)
|
52
64
|
end
|
53
65
|
end
|
54
66
|
|
55
|
-
add_failure(
|
56
|
-
|
67
|
+
add_failure(
|
68
|
+
"image #{@img.url.raw_attribute} uses the http scheme",
|
69
|
+
element: @img,
|
70
|
+
) if @runner.enforce_https? && @img.url.http?
|
57
71
|
end
|
58
72
|
|
59
73
|
external_urls
|
@@ -10,7 +10,7 @@ module HTMLProofer
|
|
10
10
|
next if @link.ignore?
|
11
11
|
|
12
12
|
if !allow_hash_href? && @link.node["href"] == "#"
|
13
|
-
add_failure("linking to internal hash #, which points to nowhere",
|
13
|
+
add_failure("linking to internal hash #, which points to nowhere", element: @link)
|
14
14
|
next
|
15
15
|
end
|
16
16
|
|
@@ -18,19 +18,21 @@ module HTMLProofer
|
|
18
18
|
if blank?(@link.url.raw_attribute)
|
19
19
|
next if allow_missing_href?
|
20
20
|
|
21
|
-
add_failure("'#{@link.node.name}' tag is missing a reference",
|
21
|
+
add_failure("'#{@link.node.name}' tag is missing a reference", element: @link)
|
22
22
|
next
|
23
23
|
end
|
24
24
|
|
25
25
|
# is it even a valid URL?
|
26
26
|
unless @link.url.valid?
|
27
|
-
add_failure("#{@link.href} is an invalid URL",
|
27
|
+
add_failure("#{@link.href} is an invalid URL", element: @link)
|
28
28
|
next
|
29
29
|
end
|
30
30
|
|
31
31
|
if @link.url.protocol_relative?
|
32
|
-
add_failure(
|
33
|
-
|
32
|
+
add_failure(
|
33
|
+
"#{@link.url} is a protocol-relative URL, use explicit https:// instead",
|
34
|
+
element: @link,
|
35
|
+
)
|
34
36
|
next
|
35
37
|
end
|
36
38
|
|
@@ -47,7 +49,7 @@ module HTMLProofer
|
|
47
49
|
next if @link.node["rel"] == "dns-prefetch"
|
48
50
|
|
49
51
|
unless @link.url.path?
|
50
|
-
add_failure("#{@link.url.raw_attribute} is an invalid URL",
|
52
|
+
add_failure("#{@link.url.raw_attribute} is an invalid URL", element: @link)
|
51
53
|
next
|
52
54
|
end
|
53
55
|
|
@@ -55,8 +57,10 @@ module HTMLProofer
|
|
55
57
|
elsif @link.url.internal?
|
56
58
|
# does the local directory have a trailing slash?
|
57
59
|
if @link.url.unslashed_directory?(@link.url.absolute_path)
|
58
|
-
add_failure(
|
59
|
-
|
60
|
+
add_failure(
|
61
|
+
"internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
|
62
|
+
element: @link,
|
63
|
+
)
|
60
64
|
next
|
61
65
|
end
|
62
66
|
|
@@ -82,23 +86,30 @@ module HTMLProofer
|
|
82
86
|
when "http"
|
83
87
|
return unless @runner.options[:enforce_https]
|
84
88
|
|
85
|
-
add_failure("#{@link.url.raw_attribute} is not an HTTPS link",
|
89
|
+
add_failure("#{@link.url.raw_attribute} is not an HTTPS link", element: @link)
|
86
90
|
end
|
87
91
|
end
|
88
92
|
|
89
93
|
def handle_mailto
|
90
94
|
if @link.url.path.empty?
|
91
|
-
add_failure(
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
95
|
+
add_failure(
|
96
|
+
"#{@link.url.raw_attribute} contains no email address",
|
97
|
+
element: @link,
|
98
|
+
) unless ignore_empty_mailto?
|
99
|
+
# eg., if any do not match a valid URL
|
100
|
+
elsif @link.url.path.split(",").any? { |email| !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(email) }
|
101
|
+
add_failure(
|
102
|
+
"#{@link.url.raw_attribute} contains an invalid email address",
|
103
|
+
element: @link,
|
104
|
+
)
|
96
105
|
end
|
97
106
|
end
|
98
107
|
|
99
108
|
def handle_tel
|
100
|
-
add_failure(
|
101
|
-
|
109
|
+
add_failure(
|
110
|
+
"#{@link.url.raw_attribute} contains no phone number",
|
111
|
+
element: @link,
|
112
|
+
) if @link.url.path.empty?
|
102
113
|
end
|
103
114
|
|
104
115
|
def ignore_empty_mailto?
|
@@ -113,13 +124,17 @@ module HTMLProofer
|
|
113
124
|
return unless SRI_REL_TYPES.include?(@link.node["rel"])
|
114
125
|
|
115
126
|
if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
|
116
|
-
add_failure(
|
117
|
-
|
127
|
+
add_failure(
|
128
|
+
"SRI and CORS not provided in: #{@link.url.raw_attribute}",
|
129
|
+
element: @link,
|
130
|
+
)
|
118
131
|
elsif blank?(@link.node["integrity"])
|
119
|
-
add_failure("Integrity is missing in: #{@link.url.raw_attribute}",
|
132
|
+
add_failure("Integrity is missing in: #{@link.url.raw_attribute}", element: @link)
|
120
133
|
elsif blank?(@link.node["crossorigin"])
|
121
|
-
add_failure(
|
122
|
-
|
134
|
+
add_failure(
|
135
|
+
"CORS not provided for external resource in: #{@link.link.url.raw_attribute}",
|
136
|
+
element: @link,
|
137
|
+
)
|
123
138
|
end
|
124
139
|
end
|
125
140
|
|
@@ -11,19 +11,23 @@ module HTMLProofer
|
|
11
11
|
|
12
12
|
# does the open_graph exist?
|
13
13
|
if missing_content?
|
14
|
-
add_failure("open graph has no content attribute",
|
14
|
+
add_failure("open graph has no content attribute", element: @open_graph)
|
15
15
|
elsif empty_content?
|
16
|
-
add_failure("open graph content attribute is empty",
|
16
|
+
add_failure("open graph content attribute is empty", element: @open_graph)
|
17
17
|
elsif !@open_graph.url.valid?
|
18
|
-
add_failure("#{@open_graph.src} is an invalid URL",
|
18
|
+
add_failure("#{@open_graph.src} is an invalid URL", element: @open_graph)
|
19
19
|
elsif @open_graph.url.protocol_relative?
|
20
|
-
add_failure(
|
21
|
-
|
20
|
+
add_failure(
|
21
|
+
"open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
|
22
|
+
element: @open_graph,
|
23
|
+
)
|
22
24
|
elsif @open_graph.url.remote?
|
23
25
|
add_to_external_urls(@open_graph.url, @open_graph.line)
|
24
26
|
else
|
25
|
-
add_failure(
|
26
|
-
|
27
|
+
add_failure(
|
28
|
+
"internal open graph #{@open_graph.url.raw_attribute} does not exist",
|
29
|
+
element: @open_graph,
|
30
|
+
) unless @open_graph.url.exists?
|
27
31
|
end
|
28
32
|
end
|
29
33
|
|
@@ -12,16 +12,20 @@ module HTMLProofer
|
|
12
12
|
|
13
13
|
# does the script exist?
|
14
14
|
if missing_src?
|
15
|
-
add_failure("script is empty and has no src attribute",
|
15
|
+
add_failure("script is empty and has no src attribute", element: @script)
|
16
16
|
elsif @script.url.protocol_relative?
|
17
|
-
add_failure(
|
18
|
-
|
17
|
+
add_failure(
|
18
|
+
"script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
|
19
|
+
element: @script,
|
20
|
+
)
|
19
21
|
elsif @script.url.remote?
|
20
22
|
add_to_external_urls(@script.url, @script.line)
|
21
23
|
check_sri if @runner.check_sri?
|
22
24
|
elsif !@script.url.exists?
|
23
|
-
add_failure(
|
24
|
-
|
25
|
+
add_failure(
|
26
|
+
"internal script reference #{@script.src} does not exist",
|
27
|
+
element: @script,
|
28
|
+
)
|
25
29
|
end
|
26
30
|
end
|
27
31
|
|
@@ -34,14 +38,20 @@ module HTMLProofer
|
|
34
38
|
|
35
39
|
def check_sri
|
36
40
|
if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
|
37
|
-
add_failure(
|
38
|
-
|
41
|
+
add_failure(
|
42
|
+
"SRI and CORS not provided in: #{@script.url.raw_attribute}",
|
43
|
+
element: @script,
|
44
|
+
)
|
39
45
|
elsif blank?(@script.node["integrity"])
|
40
|
-
add_failure(
|
41
|
-
|
46
|
+
add_failure(
|
47
|
+
"Integrity is missing in: #{@script.url.raw_attribute}",
|
48
|
+
element: @script,
|
49
|
+
)
|
42
50
|
elsif blank?(@script.node["crossorigin"])
|
43
|
-
add_failure(
|
44
|
-
|
51
|
+
add_failure(
|
52
|
+
"CORS not provided for external resource in: #{@script.url.raw_attribute}",
|
53
|
+
element: @script,
|
54
|
+
)
|
45
55
|
end
|
46
56
|
end
|
47
57
|
end
|
data/lib/html_proofer/check.rb
CHANGED
@@ -24,9 +24,15 @@ module HTMLProofer
|
|
24
24
|
raise NotImplementedError, "HTMLProofer::Check subclasses must implement #run"
|
25
25
|
end
|
26
26
|
|
27
|
-
def add_failure(description, line: nil, status: nil, content: nil)
|
28
|
-
@failures << Failure.new(
|
29
|
-
|
27
|
+
def add_failure(description, element: nil, line: nil, status: nil, content: nil)
|
28
|
+
@failures << Failure.new(
|
29
|
+
@runner.current_filename,
|
30
|
+
short_name,
|
31
|
+
description,
|
32
|
+
line: element.nil? ? line : element.line,
|
33
|
+
status: status,
|
34
|
+
content: element.nil? ? content : element.content,
|
35
|
+
)
|
30
36
|
end
|
31
37
|
|
32
38
|
def short_name
|
@@ -39,8 +45,8 @@ module HTMLProofer
|
|
39
45
|
@internal_urls[url_string] = [] if @internal_urls[url_string].nil?
|
40
46
|
|
41
47
|
metadata = {
|
42
|
-
source:
|
43
|
-
filename:
|
48
|
+
source: url.source,
|
49
|
+
filename: url.filename,
|
44
50
|
line: line,
|
45
51
|
base_url: base_url,
|
46
52
|
found: false,
|
@@ -53,7 +59,7 @@ module HTMLProofer
|
|
53
59
|
|
54
60
|
@external_urls[url_string] = [] if @external_urls[url_string].nil?
|
55
61
|
|
56
|
-
@external_urls[url_string] << { filename:
|
62
|
+
@external_urls[url_string] << { filename: url.filename, line: line }
|
57
63
|
end
|
58
64
|
|
59
65
|
class << self
|
@@ -134,7 +134,17 @@ module HTMLProofer
|
|
134
134
|
|
135
135
|
section(opts, "Ignore Configuration") do
|
136
136
|
set_option(opts, "--ignore-files [FILE1,FILE2,...]") do |long_opt_symbol, list|
|
137
|
-
@options[long_opt_symbol] = list.nil?
|
137
|
+
@options[long_opt_symbol] = if list.nil?
|
138
|
+
[]
|
139
|
+
else
|
140
|
+
list.split(",").map.each do |l|
|
141
|
+
if l.start_with?("/") && l.end_with?("/")
|
142
|
+
Regexp.new(l[1...-1])
|
143
|
+
else
|
144
|
+
l
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
138
148
|
end
|
139
149
|
|
140
150
|
set_option(opts, "--[no-]ignore-empty-alt") do |long_opt_symbol, arg|
|
@@ -187,12 +197,12 @@ module HTMLProofer
|
|
187
197
|
@options[long_opt_symbol] = parse_json_option("typhoeus", arg, symbolize_names: false)
|
188
198
|
end
|
189
199
|
|
190
|
-
set_option(opts, "--hydra <CONFIG>") do |long_opt_symbol,
|
191
|
-
@options[long_opt_symbol] = parse_json_option("hydra", arg, symbolize_names:
|
200
|
+
set_option(opts, "--hydra <CONFIG>") do |long_opt_symbol, arg|
|
201
|
+
@options[long_opt_symbol] = parse_json_option("hydra", arg, symbolize_names: true)
|
192
202
|
end
|
193
203
|
|
194
|
-
set_option(opts, "--cache <CONFIG>") do |long_opt_symbol,
|
195
|
-
@options[long_opt_symbol] = parse_json_option("cache", arg, symbolize_names:
|
204
|
+
set_option(opts, "--cache <CONFIG>") do |long_opt_symbol, arg|
|
205
|
+
@options[long_opt_symbol] = parse_json_option("cache", arg, symbolize_names: true)
|
196
206
|
end
|
197
207
|
end
|
198
208
|
|
@@ -223,8 +233,8 @@ module HTMLProofer
|
|
223
233
|
arg.split(",").each_with_object({}) do |s, hsh|
|
224
234
|
split = s.split(/(?<!\\):/, 2)
|
225
235
|
|
226
|
-
re = split[0].gsub(
|
227
|
-
string = split[1].gsub(
|
236
|
+
re = split[0].gsub("\\:", ":")
|
237
|
+
string = split[1].gsub("\\:", ":")
|
228
238
|
hsh[Regexp.new(re)] = string
|
229
239
|
end
|
230
240
|
end
|
@@ -270,49 +280,69 @@ module HTMLProofer
|
|
270
280
|
module ConfigurationHelp
|
271
281
|
TEXT = {
|
272
282
|
as_links: ["Assumes that `PATH` is a comma-separated array of links to check."],
|
273
|
-
assume_extension: [
|
274
|
-
|
283
|
+
assume_extension: [
|
284
|
+
"Automatically add specified extension to files for internal links, ",
|
285
|
+
"to allow extensionless URLs (as supported by most servers) (default: `.html`).",
|
286
|
+
],
|
275
287
|
directory_index_file: ["Sets the file to look for when a link refers to a directory. (default: `index.html`)."],
|
276
|
-
extensions: [
|
277
|
-
|
288
|
+
extensions: [
|
289
|
+
"A comma-separated list of Strings indicating the file extensions you",
|
290
|
+
"would like to check (default: `.html`)",
|
291
|
+
],
|
278
292
|
|
279
293
|
allow_hash_href: ['"If `true`, assumes `href="#"` anchors are valid (default: `true`)"'],
|
280
|
-
allow_missing_href: [
|
281
|
-
|
282
|
-
|
283
|
-
|
294
|
+
allow_missing_href: [
|
295
|
+
"If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically ",
|
296
|
+
"allowed, but could also be human error. (default: `false`)",
|
297
|
+
],
|
298
|
+
checks: [
|
299
|
+
"A comma-separated list of Strings indicating which checks you",
|
300
|
+
"want to run (default: `[\"Links\", \"Images\", \"Scripts\"]",
|
301
|
+
],
|
284
302
|
check_external_hash: ["Checks whether external hashes exist (even if the webpage exists) (default: `true`)."],
|
285
303
|
check_internal_hash: ["Checks whether internal hashes exist (even if the webpage exists) (default: `true`)."],
|
286
304
|
check_sri: ["Check that `<link>` and `<script>` external resources use SRI (default: `false`)."],
|
287
305
|
disable_external: ["If `true`, does not run the external link checker (default: `false`)."],
|
288
|
-
enforce_https: ["Fails a link if it
|
306
|
+
enforce_https: ["Fails a link if it's not marked as `https` (default: `true`)."],
|
289
307
|
root_dir: ["The absolute path to the directory serving your html-files."],
|
290
308
|
|
291
|
-
ignore_empty_alt: [
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
309
|
+
ignore_empty_alt: [
|
310
|
+
"If `true`, ignores images with empty/missing ",
|
311
|
+
"alt tags (in other words, `<img alt>` and `<img alt=\"\">`",
|
312
|
+
"are valid; set this to `false` to flag those) (default: `true`).",
|
313
|
+
],
|
314
|
+
ignore_empty_mailto: [
|
315
|
+
"If `true`, allows `mailto:` `href`s which don't",
|
316
|
+
"contain an email address (default: `false`)'.",
|
317
|
+
],
|
296
318
|
ignore_missing_alt: ["If `true`, ignores images with missing alt tags (default: `false`)."],
|
297
319
|
ignore_status_codes: ["A comma-separated list of numbers representing status codes to ignore."],
|
298
320
|
ignore_files: ["A comma-separated list of Strings or RegExps containing file paths that are safe to ignore"],
|
299
|
-
ignore_urls: [
|
300
|
-
|
321
|
+
ignore_urls: [
|
322
|
+
"A comma-separated list of Strings or RegExps containing URLs that are",
|
323
|
+
"safe to ignore. This affects all HTML attributes, such as `alt` tags on images.",
|
324
|
+
],
|
301
325
|
only_status_codes: ["A comma-separated list of numbers representing the only status codes to report on."],
|
302
326
|
only_4xx: ["Only reports errors for links that fall within the 4xx status code range."],
|
303
327
|
|
304
|
-
swap_attributes: [
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
328
|
+
swap_attributes: [
|
329
|
+
"JSON-formatted config that maps element names to the",
|
330
|
+
"preferred attribute to check (default: `{}`).",
|
331
|
+
],
|
332
|
+
swap_urls: [
|
333
|
+
"A comma-separated list containing key-value pairs of `RegExp => String`.",
|
334
|
+
"It transforms URLs that match `RegExp` into `String` via `gsub`.",
|
335
|
+
"The escape sequences `\\:` should be used to produce literal `:`s.",
|
336
|
+
],
|
309
337
|
|
310
338
|
typhoeus: ["JSON-formatted string of Typhoeus config; if set, overrides the html-proofer defaults."],
|
311
339
|
hydra: ["JSON-formatted string of Hydra config; if set, overrides the html-proofer defaults."],
|
312
340
|
cache: ["JSON-formatted string of cache config; if set, overrides the html-proofer defaults."],
|
313
341
|
|
314
|
-
log_level: [
|
315
|
-
|
342
|
+
log_level: [
|
343
|
+
"Sets the logging level. One of `:debug`, `:info`, ",
|
344
|
+
"`:warn`, `:error`, or `:fatal`. (default: `:info`)",
|
345
|
+
],
|
316
346
|
|
317
347
|
version: ["Prints the version of html-proofer."],
|
318
348
|
}.freeze
|
data/lib/html_proofer/element.rb
CHANGED
@@ -16,7 +16,7 @@ module HTMLProofer
|
|
16
16
|
swap_attributes!
|
17
17
|
|
18
18
|
@base_url = base_url
|
19
|
-
@url = Attribute::Url.new(runner, link_attribute, base_url: base_url)
|
19
|
+
@url = Attribute::Url.new(runner, link_attribute, base_url: base_url, source: @runner.current_source, filename: @runner.current_filename)
|
20
20
|
|
21
21
|
@line = node.line
|
22
22
|
@content = node.content
|
@@ -27,7 +27,7 @@ module HTMLProofer
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def meta_content
|
30
|
-
return
|
30
|
+
return unless meta_tag?
|
31
31
|
|
32
32
|
@node["content"]
|
33
33
|
end
|
@@ -37,7 +37,7 @@ module HTMLProofer
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def src
|
40
|
-
return
|
40
|
+
return if !img_tag? && !script_tag? && !source_tag?
|
41
41
|
|
42
42
|
@node["src"]
|
43
43
|
end
|
@@ -51,7 +51,7 @@ module HTMLProofer
|
|
51
51
|
end
|
52
52
|
|
53
53
|
def srcset
|
54
|
-
return
|
54
|
+
return if !img_tag? && !source_tag?
|
55
55
|
|
56
56
|
@node["srcset"]
|
57
57
|
end
|
@@ -61,7 +61,7 @@ module HTMLProofer
|
|
61
61
|
end
|
62
62
|
|
63
63
|
def href
|
64
|
-
return
|
64
|
+
return if !a_tag? && !link_tag?
|
65
65
|
|
66
66
|
@node["href"]
|
67
67
|
end
|
@@ -82,10 +82,25 @@ module HTMLProofer
|
|
82
82
|
!blank?(srcset) && srcset.split(",").size > 1
|
83
83
|
end
|
84
84
|
|
85
|
+
# From https://github.com/sindresorhus/srcset/blob/f7c48acd7facf18e94dec47e6b96e84e0f0e69dc/index.js#LL1-L16C71
|
86
|
+
# This regex represents a loose rule of an “image candidate string”; see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute
|
87
|
+
# An “image candidate string” roughly consists of the following:
|
88
|
+
# 1. Zero or more whitespace characters.
|
89
|
+
# 2. A non-empty URL that does not start or end with `,`.
|
90
|
+
# 3. Zero or more whitespace characters.
|
91
|
+
# 4. An optional “descriptor” that starts with a whitespace character.
|
92
|
+
# 5. Zero or more whitespace characters.
|
93
|
+
# 6. Each image candidate string is separated by a `,`.
|
94
|
+
# We intentionally implement a loose rule here so that we can perform more aggressive error handling and reporting in the below code.
|
95
|
+
|
96
|
+
IMAGE_CANDIDATE_REGEX = /\s*([^,]\S*[^,](?:\s+[^,]+)?)\s*(?:,|$)/
|
97
|
+
|
85
98
|
def srcsets
|
86
|
-
return
|
99
|
+
return if blank?(srcset)
|
87
100
|
|
88
|
-
srcset.split(
|
101
|
+
srcset.split(IMAGE_CANDIDATE_REGEX).select.with_index do |_part, idx|
|
102
|
+
idx.odd?
|
103
|
+
end.map(&:strip)
|
89
104
|
end
|
90
105
|
|
91
106
|
def multiple_sizes?
|
@@ -97,7 +112,7 @@ module HTMLProofer
|
|
97
112
|
end
|
98
113
|
|
99
114
|
def srcsets_wo_sizes
|
100
|
-
return
|
115
|
+
return if blank?(srcsets)
|
101
116
|
|
102
117
|
srcsets.map do |srcset|
|
103
118
|
srcset.split(" ").first
|
@@ -118,7 +133,7 @@ module HTMLProofer
|
|
118
133
|
|
119
134
|
attrs = @runner.options[:swap_attributes][@node.name]
|
120
135
|
|
121
|
-
|
136
|
+
true unless blank?(attrs)
|
122
137
|
end
|
123
138
|
|
124
139
|
private def swap_attributes!
|
data/lib/html_proofer/log.rb
CHANGED
@@ -11,9 +11,11 @@ module HTMLProofer
|
|
11
11
|
STDERR_LEVELS = [:error, :fatal].freeze
|
12
12
|
|
13
13
|
def initialize(log_level)
|
14
|
-
@logger = Yell.new(
|
14
|
+
@logger = Yell.new(
|
15
|
+
format: false,
|
15
16
|
name: "HTMLProofer", \
|
16
|
-
level: "gte.#{log_level}"
|
17
|
+
level: "gte.#{log_level}",
|
18
|
+
) do |l|
|
17
19
|
l.adapter(:stdout, level: "lte.warn")
|
18
20
|
l.adapter(:stderr, level: "gte.error")
|
19
21
|
end
|
@@ -39,8 +41,8 @@ module HTMLProofer
|
|
39
41
|
:red
|
40
42
|
end
|
41
43
|
|
42
|
-
if
|
43
|
-
|
44
|
+
if STDOUT_LEVELS.include?(level) ||
|
45
|
+
STDERR_LEVELS.include?(level)
|
44
46
|
Rainbow(message).send(color)
|
45
47
|
else
|
46
48
|
message
|
@@ -11,8 +11,8 @@ module HTMLProofer
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def failures=(failures)
|
14
|
-
@failures = failures.group_by(&:check_name)
|
15
|
-
.transform_values { |issues| issues.sort_by { |issue| [issue.path, issue.line] } }
|
14
|
+
@failures = failures.group_by(&:check_name)
|
15
|
+
.transform_values { |issues| issues.sort_by { |issue| [issue.path, issue.line] } }
|
16
16
|
.sort
|
17
17
|
end
|
18
18
|
|
data/lib/html_proofer/runner.rb
CHANGED
@@ -6,8 +6,8 @@ module HTMLProofer
|
|
6
6
|
class Runner
|
7
7
|
include HTMLProofer::Utils
|
8
8
|
|
9
|
-
attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :
|
10
|
-
attr_accessor :
|
9
|
+
attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :resolved_paths, :current_check, :current_filename, :current_source
|
10
|
+
attr_accessor :reporter
|
11
11
|
|
12
12
|
URL_TYPES = [:external, :internal].freeze
|
13
13
|
|
@@ -26,7 +26,7 @@ module HTMLProofer
|
|
26
26
|
|
27
27
|
@before_request = []
|
28
28
|
|
29
|
-
@
|
29
|
+
@resolved_paths = {}
|
30
30
|
|
31
31
|
@current_check = nil
|
32
32
|
@current_source = nil
|
@@ -42,8 +42,10 @@ module HTMLProofer
|
|
42
42
|
@logger.log(:info, "Running #{check_text} (#{format_checks_list(checks)}) on #{@source} ...\n\n")
|
43
43
|
check_list_of_links unless @options[:disable_external]
|
44
44
|
else
|
45
|
-
@logger.log(
|
46
|
-
|
45
|
+
@logger.log(
|
46
|
+
:info,
|
47
|
+
"Running #{check_text} (#{format_checks_list(checks)}) in #{@source} on *#{@options[:extensions].join(", ")} files ...\n\n",
|
48
|
+
)
|
47
49
|
|
48
50
|
check_files
|
49
51
|
@logger.log(:info, "Ran on #{pluralize(files.length, "file", "files")}!\n\n")
|
@@ -166,8 +168,14 @@ module HTMLProofer
|
|
166
168
|
|
167
169
|
def ignore_file?(file)
|
168
170
|
@options[:ignore_files].each do |pattern|
|
169
|
-
|
170
|
-
|
171
|
+
if pattern.is_a?(String) && pattern == file
|
172
|
+
@logger.log(:debug, "Ignoring #{file} because it matches #{pattern}")
|
173
|
+
return true
|
174
|
+
end
|
175
|
+
next unless pattern.is_a?(Regexp) && pattern.match(file)
|
176
|
+
|
177
|
+
@logger.log(:debug, "Ignoring #{file} because it matches regexp #{pattern}")
|
178
|
+
return true
|
171
179
|
end
|
172
180
|
|
173
181
|
false
|
@@ -228,7 +236,7 @@ module HTMLProofer
|
|
228
236
|
end
|
229
237
|
|
230
238
|
private def load_cache(type)
|
231
|
-
ivar = instance_variable_get("@#{type}_urls")
|
239
|
+
ivar = instance_variable_get(:"@#{type}_urls")
|
232
240
|
|
233
241
|
existing_urls_count = @cache.size(type)
|
234
242
|
cache_text = pluralize(existing_urls_count, "#{type} link", "#{type} links")
|
@@ -241,7 +249,7 @@ module HTMLProofer
|
|
241
249
|
|
242
250
|
private def format_checks_list(checks)
|
243
251
|
checks.map do |check|
|
244
|
-
check.sub(
|
252
|
+
check.sub("HTMLProofer::Check::", "")
|
245
253
|
end.sort.join(", ")
|
246
254
|
end
|
247
255
|
end
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "typhoeus"
|
4
|
-
require "uri"
|
4
|
+
require "open-uri"
|
5
|
+
# require "uri"
|
6
|
+
require "pdf-reader"
|
5
7
|
|
6
8
|
module HTMLProofer
|
7
9
|
class UrlValidator
|
@@ -88,8 +90,12 @@ module HTMLProofer
|
|
88
90
|
return if @runner.options[:ignore_status_codes].include?(response_code)
|
89
91
|
|
90
92
|
if response_code.between?(200, 299)
|
91
|
-
@cache.add_external(href, filenames, response_code, "OK", true) unless check_hash_in_2xx_response(
|
92
|
-
|
93
|
+
@cache.add_external(href, filenames, response_code, "OK", true) unless check_hash_in_2xx_response(
|
94
|
+
href,
|
95
|
+
url,
|
96
|
+
response,
|
97
|
+
filenames,
|
98
|
+
)
|
93
99
|
elsif response.timed_out?
|
94
100
|
handle_timeout(href, filenames, response_code)
|
95
101
|
elsif response_code.zero?
|
@@ -115,6 +121,28 @@ module HTMLProofer
|
|
115
121
|
return false unless url.hash?
|
116
122
|
|
117
123
|
hash = url.hash
|
124
|
+
headers = response.options.fetch(:headers, {})
|
125
|
+
content_type = headers.find { |k, _| k.casecmp("content-type").zero? }
|
126
|
+
|
127
|
+
# attempt to verify PDF hash ref; see #787 for more details
|
128
|
+
# FIXME: this is re-reading the PDF response
|
129
|
+
if content_type && content_type[1].include?("pdf")
|
130
|
+
io = URI.parse(url.to_s).open
|
131
|
+
reader = PDF::Reader.new(io)
|
132
|
+
|
133
|
+
pages = reader.pages
|
134
|
+
if hash =~ /\Apage=(\d+)\z/
|
135
|
+
page = Regexp.last_match[1].to_i
|
136
|
+
|
137
|
+
unless pages[page - 1]
|
138
|
+
msg = "External link #{href} failed: #{url.without_hash} exists, but the hash '#{hash}' does not"
|
139
|
+
add_failure(filenames, msg, response.code)
|
140
|
+
@cache.add_external(href, filenames, response.code, msg, false)
|
141
|
+
end
|
142
|
+
|
143
|
+
return true
|
144
|
+
end
|
145
|
+
end
|
118
146
|
|
119
147
|
body_doc = create_nokogiri(response.body)
|
120
148
|
|
@@ -130,7 +158,7 @@ module HTMLProofer
|
|
130
158
|
|
131
159
|
return unless body_doc.xpath(xpath.join("|")).empty?
|
132
160
|
|
133
|
-
msg = "External link #{href} failed: #{url.
|
161
|
+
msg = "External link #{href} failed: #{url.without_hash} exists, but the hash '#{hash}' does not"
|
134
162
|
add_failure(filenames, msg, response.code)
|
135
163
|
@cache.add_external(href, filenames, response.code, msg, false)
|
136
164
|
true
|
@@ -29,15 +29,17 @@ module HTMLProofer
|
|
29
29
|
matched_count_to_log = pluralize(matched_files.count, "reference", "references")
|
30
30
|
@logger.log(:debug, "(#{i + 1} / #{links.count}) Internal link #{link}: Checking #{matched_count_to_log}")
|
31
31
|
matched_files.each do |metadata|
|
32
|
-
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url], source: metadata[:source], filename: metadata[:filename])
|
33
|
+
|
34
|
+
unless url.exists?
|
35
|
+
@failed_checks << Failure.new(
|
36
|
+
metadata[:filename],
|
37
|
+
"Links > Internal",
|
38
|
+
"internally linking to #{url}, which does not exist",
|
39
|
+
line: metadata[:line],
|
40
|
+
status: nil,
|
41
|
+
content: nil,
|
42
|
+
)
|
41
43
|
to_add << [url, metadata, false]
|
42
44
|
next
|
43
45
|
end
|
@@ -45,6 +47,7 @@ module HTMLProofer
|
|
45
47
|
hash_exists = hash_exists_for_url?(url)
|
46
48
|
if hash_exists.nil?
|
47
49
|
# the hash needs to be checked in the target file, we collect the url and metadata
|
50
|
+
target_file_path = url.resolved_path
|
48
51
|
unless file_paths_hashes_to_check.key?(target_file_path)
|
49
52
|
file_paths_hashes_to_check[target_file_path] = {}
|
50
53
|
end
|
@@ -55,8 +58,14 @@ module HTMLProofer
|
|
55
58
|
next
|
56
59
|
end
|
57
60
|
unless hash_exists
|
58
|
-
@failed_checks << Failure.new(
|
59
|
-
|
61
|
+
@failed_checks << Failure.new(
|
62
|
+
metadata[:filename],
|
63
|
+
"Links > Internal",
|
64
|
+
"internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not",
|
65
|
+
line: metadata[:line],
|
66
|
+
status: nil,
|
67
|
+
content: nil,
|
68
|
+
)
|
60
69
|
to_add << [url, metadata, false]
|
61
70
|
next
|
62
71
|
end
|
@@ -75,8 +84,14 @@ module HTMLProofer
|
|
75
84
|
exists = hash_exists_in_html?(href_hash, html)
|
76
85
|
url_metadata.each do |(url, metadata)|
|
77
86
|
unless exists
|
78
|
-
@failed_checks << Failure.new(
|
79
|
-
|
87
|
+
@failed_checks << Failure.new(
|
88
|
+
metadata[:filename],
|
89
|
+
"Links > Internal",
|
90
|
+
"internally linking to #{url}; the file exists, but the hash '#{href_hash}' does not",
|
91
|
+
line: metadata[:line],
|
92
|
+
status: nil,
|
93
|
+
content: nil,
|
94
|
+
)
|
80
95
|
end
|
81
96
|
to_add << [url, metadata, exists]
|
82
97
|
end
|
@@ -91,12 +106,6 @@ module HTMLProofer
|
|
91
106
|
@failed_checks
|
92
107
|
end
|
93
108
|
|
94
|
-
private def file_exists?(absolute_path)
|
95
|
-
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
96
|
-
|
97
|
-
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
98
|
-
end
|
99
|
-
|
100
109
|
# verify the hash w/o just based on the URL, w/o looking at the target file
|
101
110
|
# => returns nil if the has could not be verified
|
102
111
|
private def hash_exists_for_url?(url)
|
@@ -111,9 +120,7 @@ module HTMLProofer
|
|
111
120
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
112
121
|
fragment_ids = [href_hash, decoded_href_hash]
|
113
122
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
114
|
-
|
115
|
-
|
116
|
-
nil
|
123
|
+
true if fragment_ids.include?("top")
|
117
124
|
end
|
118
125
|
|
119
126
|
private def hash_exists_in_html?(href_hash, html)
|
data/lib/html_proofer/version.rb
CHANGED
@@ -4,7 +4,7 @@ module HTMLProofer
|
|
4
4
|
# https://stackoverflow.com/a/8812293
|
5
5
|
class XpathFunctions
|
6
6
|
def case_sensitive_equals(node_set, str_to_match)
|
7
|
-
node_set.find_all { |node| node.to_s
|
7
|
+
node_set.find_all { |node| node.to_s == str_to_match.to_s }
|
8
8
|
end
|
9
9
|
end
|
10
10
|
end
|
data/lib/html_proofer.rb
CHANGED
@@ -15,8 +15,11 @@ require "html_proofer/version"
|
|
15
15
|
require "fileutils"
|
16
16
|
|
17
17
|
if ENV.fetch("DEBUG", false)
|
18
|
-
require "awesome_print"
|
19
18
|
require "debug"
|
19
|
+
begin
|
20
|
+
require "amazing_print"
|
21
|
+
rescue LoadError # rubocop:disable Lint/SuppressedException
|
22
|
+
end
|
20
23
|
end
|
21
24
|
|
22
25
|
module HTMLProofer
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.0.
|
4
|
+
version: 5.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -24,6 +24,7 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.3'
|
27
|
+
force_ruby_platform: false
|
27
28
|
- !ruby/object:Gem::Dependency
|
28
29
|
name: async
|
29
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,6 +39,7 @@ dependencies:
|
|
38
39
|
- - "~>"
|
39
40
|
- !ruby/object:Gem::Version
|
40
41
|
version: '2.1'
|
42
|
+
force_ruby_platform: false
|
41
43
|
- !ruby/object:Gem::Dependency
|
42
44
|
name: nokogiri
|
43
45
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +54,22 @@ dependencies:
|
|
52
54
|
- - "~>"
|
53
55
|
- !ruby/object:Gem::Version
|
54
56
|
version: '1.13'
|
57
|
+
force_ruby_platform: false
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: pdf-reader
|
60
|
+
requirement: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - "~>"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '2.11'
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - "~>"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '2.11'
|
72
|
+
force_ruby_platform: false
|
55
73
|
- !ruby/object:Gem::Dependency
|
56
74
|
name: rainbow
|
57
75
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +84,7 @@ dependencies:
|
|
66
84
|
- - "~>"
|
67
85
|
- !ruby/object:Gem::Version
|
68
86
|
version: '3.0'
|
87
|
+
force_ruby_platform: false
|
69
88
|
- !ruby/object:Gem::Dependency
|
70
89
|
name: typhoeus
|
71
90
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +99,7 @@ dependencies:
|
|
80
99
|
- - "~>"
|
81
100
|
- !ruby/object:Gem::Version
|
82
101
|
version: '1.3'
|
102
|
+
force_ruby_platform: false
|
83
103
|
- !ruby/object:Gem::Dependency
|
84
104
|
name: yell
|
85
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,6 +114,7 @@ dependencies:
|
|
94
114
|
- - "~>"
|
95
115
|
- !ruby/object:Gem::Version
|
96
116
|
version: '2.0'
|
117
|
+
force_ruby_platform: false
|
97
118
|
- !ruby/object:Gem::Dependency
|
98
119
|
name: zeitwerk
|
99
120
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,62 +129,7 @@ dependencies:
|
|
108
129
|
- - "~>"
|
109
130
|
- !ruby/object:Gem::Version
|
110
131
|
version: '2.5'
|
111
|
-
|
112
|
-
name: awesome_print
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - ">="
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '0'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - ">="
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '0'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: debug
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - ">="
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0'
|
132
|
-
type: :development
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - ">="
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '0'
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: rake
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - ">="
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: '0'
|
146
|
-
type: :development
|
147
|
-
prerelease: false
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
149
|
-
requirements:
|
150
|
-
- - ">="
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
version: '0'
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: redcarpet
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - ">="
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
160
|
-
type: :development
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - ">="
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
132
|
+
force_ruby_platform: false
|
167
133
|
- !ruby/object:Gem::Dependency
|
168
134
|
name: rspec
|
169
135
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,48 +144,6 @@ dependencies:
|
|
178
144
|
- - "~>"
|
179
145
|
- !ruby/object:Gem::Version
|
180
146
|
version: '3.1'
|
181
|
-
- !ruby/object:Gem::Dependency
|
182
|
-
name: rubocop
|
183
|
-
requirement: !ruby/object:Gem::Requirement
|
184
|
-
requirements:
|
185
|
-
- - ">="
|
186
|
-
- !ruby/object:Gem::Version
|
187
|
-
version: '0'
|
188
|
-
type: :development
|
189
|
-
prerelease: false
|
190
|
-
version_requirements: !ruby/object:Gem::Requirement
|
191
|
-
requirements:
|
192
|
-
- - ">="
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
version: '0'
|
195
|
-
- !ruby/object:Gem::Dependency
|
196
|
-
name: rubocop-rspec
|
197
|
-
requirement: !ruby/object:Gem::Requirement
|
198
|
-
requirements:
|
199
|
-
- - ">="
|
200
|
-
- !ruby/object:Gem::Version
|
201
|
-
version: '0'
|
202
|
-
type: :development
|
203
|
-
prerelease: false
|
204
|
-
version_requirements: !ruby/object:Gem::Requirement
|
205
|
-
requirements:
|
206
|
-
- - ">="
|
207
|
-
- !ruby/object:Gem::Version
|
208
|
-
version: '0'
|
209
|
-
- !ruby/object:Gem::Dependency
|
210
|
-
name: rubocop-standard
|
211
|
-
requirement: !ruby/object:Gem::Requirement
|
212
|
-
requirements:
|
213
|
-
- - ">="
|
214
|
-
- !ruby/object:Gem::Version
|
215
|
-
version: '0'
|
216
|
-
type: :development
|
217
|
-
prerelease: false
|
218
|
-
version_requirements: !ruby/object:Gem::Requirement
|
219
|
-
requirements:
|
220
|
-
- - ">="
|
221
|
-
- !ruby/object:Gem::Version
|
222
|
-
version: '0'
|
223
147
|
- !ruby/object:Gem::Dependency
|
224
148
|
name: timecop
|
225
149
|
requirement: !ruby/object:Gem::Requirement
|
@@ -288,7 +212,7 @@ licenses:
|
|
288
212
|
metadata:
|
289
213
|
funding_uri: https://github.com/sponsors/gjtorikian/
|
290
214
|
rubygems_mfa_required: 'true'
|
291
|
-
post_install_message:
|
215
|
+
post_install_message:
|
292
216
|
rdoc_options: []
|
293
217
|
require_paths:
|
294
218
|
- lib
|
@@ -306,8 +230,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
306
230
|
- !ruby/object:Gem::Version
|
307
231
|
version: '0'
|
308
232
|
requirements: []
|
309
|
-
rubygems_version: 3.3
|
310
|
-
signing_key:
|
233
|
+
rubygems_version: 3.5.3
|
234
|
+
signing_key:
|
311
235
|
specification_version: 4
|
312
236
|
summary: A set of tests to validate your HTML output. These tests check if your image
|
313
237
|
references are legitimate, if they have alt tags, if your internal links are working,
|