html-proofer 4.0.0.rc3 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +3 -2
- data/lib/html-proofer.rb +1 -1
- data/lib/html_proofer/attribute/url.rb +180 -174
- data/lib/html_proofer/cache.rb +128 -85
- data/lib/html_proofer/check/favicon.rb +29 -24
- data/lib/html_proofer/check/images.rb +78 -47
- data/lib/html_proofer/check/links.rb +109 -98
- data/lib/html_proofer/check/open_graph.rb +30 -25
- data/lib/html_proofer/check/scripts.rb +36 -28
- data/lib/html_proofer/check.rb +11 -10
- data/lib/html_proofer/configuration.rb +16 -15
- data/lib/html_proofer/element.rb +19 -19
- data/lib/html_proofer/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +22 -18
- data/lib/html_proofer/reporter.rb +3 -3
- data/lib/html_proofer/runner.rb +45 -44
- data/lib/html_proofer/url_validator/external.rb +157 -152
- data/lib/html_proofer/url_validator/internal.rb +72 -62
- data/lib/html_proofer/utils.rb +5 -5
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +11 -9
- metadata +8 -7
data/lib/html_proofer/cache.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
3
|
+
require "date"
|
4
|
+
require "json"
|
5
|
+
require "uri"
|
6
6
|
|
7
7
|
module HTMLProofer
|
8
8
|
class Cache
|
@@ -10,8 +10,8 @@ module HTMLProofer
|
|
10
10
|
|
11
11
|
CACHE_VERSION = 2
|
12
12
|
|
13
|
-
DEFAULT_STORAGE_DIR = File.join(
|
14
|
-
DEFAULT_CACHE_FILE_NAME =
|
13
|
+
DEFAULT_STORAGE_DIR = File.join("tmp", ".htmlproofer")
|
14
|
+
DEFAULT_CACHE_FILE_NAME = "cache.json"
|
15
15
|
|
16
16
|
URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
17
17
|
|
@@ -21,7 +21,7 @@ module HTMLProofer
|
|
21
21
|
@runner = runner
|
22
22
|
@logger = @runner.logger
|
23
23
|
|
24
|
-
@cache_datetime =
|
24
|
+
@cache_datetime = Time.now
|
25
25
|
@cache_time = @cache_datetime.to_time
|
26
26
|
|
27
27
|
if blank?(options)
|
@@ -29,28 +29,25 @@ module HTMLProofer
|
|
29
29
|
else
|
30
30
|
define_singleton_method(:enabled?) { true }
|
31
31
|
setup_cache!(options)
|
32
|
-
@parsed_timeframe = parsed_timeframe(options[:timeframe])
|
33
|
-
end
|
34
|
-
end
|
35
32
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
time = Time.parse(time) if time.is_a?(String)
|
40
|
-
(@parsed_timeframe..@cache_time).cover?(time)
|
33
|
+
@external_timeframe = parsed_timeframe(options[:timeframe][:external])
|
34
|
+
@internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
|
35
|
+
end
|
41
36
|
end
|
42
37
|
|
43
38
|
def parsed_timeframe(timeframe)
|
39
|
+
return nil if timeframe.nil?
|
40
|
+
|
44
41
|
time, date = timeframe.match(/(\d+)(\D)/).captures
|
45
42
|
time = time.to_i
|
46
43
|
case date
|
47
|
-
when
|
44
|
+
when "M"
|
48
45
|
time_ago(time, :months)
|
49
|
-
when
|
46
|
+
when "w"
|
50
47
|
time_ago(time, :weeks)
|
51
|
-
when
|
48
|
+
when "d"
|
52
49
|
time_ago(time, :days)
|
53
|
-
when
|
50
|
+
when "h"
|
54
51
|
time_ago(time, :hours)
|
55
52
|
else
|
56
53
|
raise ArgumentError, "#{date} is not a valid timeframe!"
|
@@ -71,7 +68,8 @@ module HTMLProofer
|
|
71
68
|
found = status_code.between?(200, 299)
|
72
69
|
|
73
70
|
clean_url = cleaned_url(url)
|
74
|
-
@cache_log[:external][clean_url] =
|
71
|
+
@cache_log[:external][clean_url] =
|
72
|
+
{ time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
|
75
73
|
end
|
76
74
|
|
77
75
|
def detect_url_changes(urls_detected, type)
|
@@ -82,39 +80,104 @@ module HTMLProofer
|
|
82
80
|
additions
|
83
81
|
end
|
84
82
|
|
83
|
+
def write
|
84
|
+
return unless enabled?
|
85
|
+
|
86
|
+
File.write(@cache_file, @cache_log.to_json)
|
87
|
+
end
|
88
|
+
|
89
|
+
def retrieve_urls(urls_detected, type)
|
90
|
+
# if there are no urls, bail
|
91
|
+
return {} if urls_detected.empty?
|
92
|
+
|
93
|
+
urls_detected = urls_detected.transform_keys do |url|
|
94
|
+
cleaned_url(url)
|
95
|
+
end
|
96
|
+
|
97
|
+
urls_to_check = detect_url_changes(urls_detected, type)
|
98
|
+
|
99
|
+
@cache_log[type].each_pair do |url, cache|
|
100
|
+
within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time])
|
101
|
+
next if within_timeframe
|
102
|
+
|
103
|
+
urls_to_check[url] = cache[:metadata] # recheck expired links
|
104
|
+
end
|
105
|
+
|
106
|
+
urls_to_check
|
107
|
+
end
|
108
|
+
|
109
|
+
def within_external_timeframe?(time)
|
110
|
+
within_timeframe?(time, @external_timeframe)
|
111
|
+
end
|
112
|
+
|
113
|
+
def within_internal_timeframe?(time)
|
114
|
+
within_timeframe?(time, @internal_timeframe)
|
115
|
+
end
|
116
|
+
|
117
|
+
def empty?
|
118
|
+
blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
|
119
|
+
end
|
120
|
+
|
121
|
+
def size(type)
|
122
|
+
@cache_log[type].size
|
123
|
+
end
|
124
|
+
|
85
125
|
private def construct_internal_link_metadata(metadata, found)
|
86
126
|
{
|
87
127
|
source: metadata[:source],
|
88
|
-
|
128
|
+
filename: metadata[:filename],
|
89
129
|
line: metadata[:line],
|
90
130
|
base_url: metadata[:base_url],
|
91
|
-
found: found
|
131
|
+
found: found,
|
92
132
|
}
|
93
133
|
end
|
94
134
|
|
95
135
|
# prepare to add new URLs detected
|
96
136
|
private def determine_additions(urls_detected, type)
|
97
|
-
additions = urls_detected
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
137
|
+
additions = type == :external ? determine_external_additions(urls_detected) : determine_internal_additions(urls_detected)
|
138
|
+
|
139
|
+
new_link_count = additions.length
|
140
|
+
new_link_text = pluralize(new_link_count, "new #{type} link", "new #{type} links")
|
141
|
+
@logger.log(:debug, "Adding #{new_link_text} to the cache")
|
142
|
+
|
143
|
+
additions
|
144
|
+
end
|
145
|
+
|
146
|
+
private def determine_external_additions(urls_detected)
|
147
|
+
urls_detected.reject do |url, _metadata|
|
148
|
+
if @cache_log[:external].include?(url)
|
149
|
+
@cache_log[:external][url][:found] # if this is false, we're trying again
|
107
150
|
else
|
108
|
-
@logger.log
|
151
|
+
@logger.log(:debug, "Adding #{url} to external cache")
|
109
152
|
false
|
110
153
|
end
|
111
154
|
end
|
155
|
+
end
|
112
156
|
|
113
|
-
|
114
|
-
|
115
|
-
|
157
|
+
private def determine_internal_additions(urls_detected)
|
158
|
+
urls_detected.each_with_object({}) do |(url, metadata), hsh|
|
159
|
+
# url is not even in cache
|
160
|
+
if @cache_log[:internal][url].nil?
|
161
|
+
hsh[url] = metadata
|
162
|
+
next
|
163
|
+
end
|
116
164
|
|
117
|
-
|
165
|
+
cache_metadata = @cache_log[:internal][url][:metadata]
|
166
|
+
incoming_metadata = urls_detected[url].each_with_object([]) do |incoming_url, arr|
|
167
|
+
existing_cache_metadata = cache_metadata.find { |k, _| k[:filename] == incoming_url[:filename] }
|
168
|
+
|
169
|
+
# cache for this url, from an existing path, exists as found
|
170
|
+
if !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
|
171
|
+
metadata.find { |m| m[:filename] == existing_cache_metadata[:filename] }[:found] = true
|
172
|
+
next
|
173
|
+
end
|
174
|
+
|
175
|
+
@logger.log(:debug, "Adding #{incoming_url} to internal cache")
|
176
|
+
arr << incoming_url
|
177
|
+
end
|
178
|
+
|
179
|
+
hsh[url] = incoming_metadata
|
180
|
+
end
|
118
181
|
end
|
119
182
|
|
120
183
|
# remove from cache URLs that no longer exist
|
@@ -125,54 +188,21 @@ module HTMLProofer
|
|
125
188
|
if urls_detected.include?(url)
|
126
189
|
false
|
127
190
|
elsif url_matches_type?(url, type)
|
128
|
-
@logger.log
|
191
|
+
@logger.log(:debug, "Removing #{url} from #{type} cache")
|
129
192
|
deletions += 1
|
130
193
|
true
|
131
194
|
end
|
132
195
|
end
|
133
196
|
|
134
197
|
del_link_text = pluralize(deletions, "outdated #{type} link", "outdated #{type} links")
|
135
|
-
@logger.log
|
136
|
-
end
|
137
|
-
|
138
|
-
def write
|
139
|
-
return unless enabled?
|
140
|
-
|
141
|
-
File.write(@cache_file, @cache_log.to_json)
|
142
|
-
end
|
143
|
-
|
144
|
-
def retrieve_urls(urls_detected, type)
|
145
|
-
# if there are no urls, bail
|
146
|
-
return {} if urls_detected.empty?
|
147
|
-
|
148
|
-
urls_detected = urls_detected.transform_keys do |url|
|
149
|
-
cleaned_url(url)
|
150
|
-
end
|
151
|
-
|
152
|
-
urls_to_check = detect_url_changes(urls_detected, type)
|
153
|
-
|
154
|
-
@cache_log[type].each_pair do |url, cache|
|
155
|
-
next if within_timeframe?(cache[:time])
|
156
|
-
|
157
|
-
urls_to_check[url] = cache[:metadata] # recheck expired links
|
158
|
-
end
|
159
|
-
|
160
|
-
urls_to_check
|
161
|
-
end
|
162
|
-
|
163
|
-
def empty?
|
164
|
-
blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
|
165
|
-
end
|
166
|
-
|
167
|
-
def size(type)
|
168
|
-
@cache_log[type].size
|
198
|
+
@logger.log(:debug, "Removing #{del_link_text} from the cache")
|
169
199
|
end
|
170
200
|
|
171
201
|
private def setup_cache!(options)
|
172
202
|
default_structure = {
|
173
203
|
version: CACHE_VERSION,
|
174
204
|
internal: {},
|
175
|
-
external: {}
|
205
|
+
external: {},
|
176
206
|
}
|
177
207
|
|
178
208
|
@storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
|
@@ -193,26 +223,32 @@ module HTMLProofer
|
|
193
223
|
|
194
224
|
old_cache = (cache_version = log[:version]).nil?
|
195
225
|
@cache_log = if old_cache # previous cache version, create a new one
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
226
|
+
default_structure
|
227
|
+
elsif cache_version != CACHE_VERSION
|
228
|
+
# if cache version is newer...do something
|
229
|
+
else
|
230
|
+
log[:internal] = log[:internal].transform_keys(&:to_s)
|
231
|
+
log[:external] = log[:external].transform_keys(&:to_s)
|
232
|
+
log
|
233
|
+
end
|
204
234
|
end
|
205
235
|
|
236
|
+
# https://github.com/rails/rails/blob/3872bc0e54d32e8bf3a6299b0bfe173d94b072fc/activesupport/lib/active_support/duration.rb#L112-L117
|
237
|
+
SECONDS_PER_HOUR = 3600
|
238
|
+
SECONDS_PER_DAY = 86400
|
239
|
+
SECONDS_PER_WEEK = 604800
|
240
|
+
SECONDS_PER_MONTH = 2629746 # 1/12 of a gregorian year
|
241
|
+
|
206
242
|
private def time_ago(measurement, unit)
|
207
243
|
case unit
|
208
244
|
when :months
|
209
|
-
@cache_datetime
|
245
|
+
@cache_datetime - (SECONDS_PER_MONTH * measurement)
|
210
246
|
when :weeks
|
211
|
-
@cache_datetime - (
|
247
|
+
@cache_datetime - (SECONDS_PER_WEEK * measurement)
|
212
248
|
when :days
|
213
|
-
@cache_datetime - measurement
|
249
|
+
@cache_datetime - (SECONDS_PER_DAY * measurement)
|
214
250
|
when :hours
|
215
|
-
@cache_datetime - Rational(
|
251
|
+
@cache_datetime - Rational(SECONDS_PER_HOUR * measurement)
|
216
252
|
end.to_time
|
217
253
|
end
|
218
254
|
|
@@ -224,7 +260,7 @@ module HTMLProofer
|
|
224
260
|
private def cleaned_url(url)
|
225
261
|
cleaned_url = escape_unescape(url)
|
226
262
|
|
227
|
-
return cleaned_url unless cleaned_url.end_with?(
|
263
|
+
return cleaned_url unless cleaned_url.end_with?("/", "#", "?") && cleaned_url.length > 1
|
228
264
|
|
229
265
|
cleaned_url[0..-2]
|
230
266
|
end
|
@@ -232,5 +268,12 @@ module HTMLProofer
|
|
232
268
|
private def escape_unescape(url)
|
233
269
|
Addressable::URI.parse(url).normalize.to_s
|
234
270
|
end
|
271
|
+
|
272
|
+
private def within_timeframe?(current_time, parsed_timeframe)
|
273
|
+
return false if current_time.nil? || parsed_timeframe.nil?
|
274
|
+
|
275
|
+
current_time = Time.parse(current_time) if current_time.is_a?(String)
|
276
|
+
(parsed_timeframe..@cache_time).cover?(current_time)
|
277
|
+
end
|
235
278
|
end
|
236
279
|
end
|
@@ -1,35 +1,40 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Check
|
5
|
+
class Favicon < HTMLProofer::Check
|
6
|
+
def run
|
7
|
+
found = false
|
8
|
+
@html.css("link").each do |node|
|
9
|
+
@favicon = create_element(node)
|
8
10
|
|
9
|
-
|
11
|
+
next if @favicon.ignore?
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
+
break if (found = @favicon.node["rel"].split.last.eql?("icon"))
|
14
|
+
end
|
13
15
|
|
14
|
-
|
16
|
+
return if immediate_redirect?
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
if found
|
19
|
+
if @favicon.url.remote?
|
20
|
+
add_to_external_urls(@favicon.url, @favicon.line)
|
21
|
+
elsif !@favicon.url.exists?
|
22
|
+
add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
|
23
|
+
content: @favicon.content)
|
24
|
+
end
|
25
|
+
else
|
26
|
+
add_failure("no favicon provided")
|
27
|
+
end
|
21
28
|
end
|
22
|
-
else
|
23
|
-
add_failure('no favicon provided')
|
24
|
-
end
|
25
|
-
end
|
26
29
|
|
27
|
-
|
30
|
+
private
|
28
31
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
# allow any instant-redirect meta tag
|
33
|
+
def immediate_redirect?
|
34
|
+
@html.xpath("//meta[@http-equiv='refresh']").attribute("content").value.start_with?("0;")
|
35
|
+
rescue StandardError
|
36
|
+
false
|
37
|
+
end
|
38
|
+
end
|
34
39
|
end
|
35
40
|
end
|
@@ -1,62 +1,93 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Check
|
5
|
+
class Images < HTMLProofer::Check
|
6
|
+
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
|
7
|
+
|
8
|
+
def run
|
9
|
+
@html.css("img").each do |node|
|
10
|
+
@img = create_element(node)
|
11
|
+
|
12
|
+
next if @img.ignore?
|
13
|
+
|
14
|
+
# screenshot filenames should return because of terrible names
|
15
|
+
add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
|
16
|
+
content: @img.content) if terrible_filename?
|
17
|
+
|
18
|
+
# does the image exist?
|
19
|
+
if missing_src?
|
20
|
+
add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
|
21
|
+
elsif @img.url.remote?
|
22
|
+
add_to_external_urls(@img.url, @img.line)
|
23
|
+
elsif !@img.url.exists? && !@img.multiple_srcsets?
|
24
|
+
add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
|
25
|
+
content: @img.content)
|
26
|
+
elsif @img.multiple_srcsets?
|
27
|
+
srcsets = @img.srcset.split(",").map(&:strip)
|
28
|
+
srcsets.each do |srcset|
|
29
|
+
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
|
30
|
+
|
31
|
+
if srcset_url.remote?
|
32
|
+
add_to_external_urls(srcset_url.url, @img.line)
|
33
|
+
elsif !srcset_url.exists?
|
34
|
+
add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
unless ignore_element?
|
40
|
+
if missing_alt_tag? && !ignore_missing_alt?
|
41
|
+
add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
|
42
|
+
content: @img.content)
|
43
|
+
elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
|
44
|
+
add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
|
45
|
+
content: @img.content)
|
46
|
+
end
|
31
47
|
end
|
48
|
+
|
49
|
+
add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
|
50
|
+
content: @img.content) if @runner.enforce_https? && @img.url.http?
|
32
51
|
end
|
52
|
+
|
53
|
+
external_urls
|
33
54
|
end
|
34
55
|
|
35
|
-
|
56
|
+
def ignore_missing_alt?
|
57
|
+
@runner.options[:ignore_missing_alt]
|
58
|
+
end
|
36
59
|
|
37
|
-
|
38
|
-
|
60
|
+
def ignore_empty_alt?
|
61
|
+
@runner.options[:ignore_empty_alt]
|
62
|
+
end
|
39
63
|
|
40
|
-
|
41
|
-
|
64
|
+
def ignore_element?
|
65
|
+
@img.url.ignore? || @img.aria_hidden?
|
66
|
+
end
|
42
67
|
|
43
|
-
|
44
|
-
|
45
|
-
|
68
|
+
def missing_alt_tag?
|
69
|
+
@img.node["alt"].nil?
|
70
|
+
end
|
46
71
|
|
47
|
-
|
48
|
-
|
49
|
-
|
72
|
+
def empty_alt_tag?
|
73
|
+
!missing_alt_tag? && @img.node["alt"].empty?
|
74
|
+
end
|
50
75
|
|
51
|
-
|
52
|
-
|
53
|
-
|
76
|
+
def empty_whitespace_alt_tag?
|
77
|
+
!missing_alt_tag? && @img.node["alt"].strip.empty?
|
78
|
+
end
|
54
79
|
|
55
|
-
|
56
|
-
|
57
|
-
|
80
|
+
def alt_all_spaces?
|
81
|
+
!missing_alt_tag? && @img.node["alt"].split.all?(" ")
|
82
|
+
end
|
83
|
+
|
84
|
+
def terrible_filename?
|
85
|
+
@img.url.to_s =~ SCREEN_SHOT_REGEX
|
86
|
+
end
|
58
87
|
|
59
|
-
|
60
|
-
|
88
|
+
def missing_src?
|
89
|
+
blank?(@img.url.to_s)
|
90
|
+
end
|
91
|
+
end
|
61
92
|
end
|
62
93
|
end
|