html-proofer 4.0.0.rc3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'date'
4
- require 'json'
5
- require 'uri'
3
+ require "date"
4
+ require "json"
5
+ require "uri"
6
6
 
7
7
  module HTMLProofer
8
8
  class Cache
@@ -10,8 +10,8 @@ module HTMLProofer
10
10
 
11
11
  CACHE_VERSION = 2
12
12
 
13
- DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
14
- DEFAULT_CACHE_FILE_NAME = 'cache.json'
13
+ DEFAULT_STORAGE_DIR = File.join("tmp", ".htmlproofer")
14
+ DEFAULT_CACHE_FILE_NAME = "cache.json"
15
15
 
16
16
  URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
17
17
 
@@ -21,7 +21,7 @@ module HTMLProofer
21
21
  @runner = runner
22
22
  @logger = @runner.logger
23
23
 
24
- @cache_datetime = DateTime.now
24
+ @cache_datetime = Time.now
25
25
  @cache_time = @cache_datetime.to_time
26
26
 
27
27
  if blank?(options)
@@ -29,28 +29,25 @@ module HTMLProofer
29
29
  else
30
30
  define_singleton_method(:enabled?) { true }
31
31
  setup_cache!(options)
32
- @parsed_timeframe = parsed_timeframe(options[:timeframe])
33
- end
34
- end
35
32
 
36
- def within_timeframe?(time)
37
- return false if time.nil?
38
-
39
- time = Time.parse(time) if time.is_a?(String)
40
- (@parsed_timeframe..@cache_time).cover?(time)
33
+ @external_timeframe = parsed_timeframe(options[:timeframe][:external])
34
+ @internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
35
+ end
41
36
  end
42
37
 
43
38
  def parsed_timeframe(timeframe)
39
+ return nil if timeframe.nil?
40
+
44
41
  time, date = timeframe.match(/(\d+)(\D)/).captures
45
42
  time = time.to_i
46
43
  case date
47
- when 'M'
44
+ when "M"
48
45
  time_ago(time, :months)
49
- when 'w'
46
+ when "w"
50
47
  time_ago(time, :weeks)
51
- when 'd'
48
+ when "d"
52
49
  time_ago(time, :days)
53
- when 'h'
50
+ when "h"
54
51
  time_ago(time, :hours)
55
52
  else
56
53
  raise ArgumentError, "#{date} is not a valid timeframe!"
@@ -71,7 +68,8 @@ module HTMLProofer
71
68
  found = status_code.between?(200, 299)
72
69
 
73
70
  clean_url = cleaned_url(url)
74
- @cache_log[:external][clean_url] = { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
71
+ @cache_log[:external][clean_url] =
72
+ { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
75
73
  end
76
74
 
77
75
  def detect_url_changes(urls_detected, type)
@@ -82,39 +80,104 @@ module HTMLProofer
82
80
  additions
83
81
  end
84
82
 
83
+ def write
84
+ return unless enabled?
85
+
86
+ File.write(@cache_file, @cache_log.to_json)
87
+ end
88
+
89
+ def retrieve_urls(urls_detected, type)
90
+ # if there are no urls, bail
91
+ return {} if urls_detected.empty?
92
+
93
+ urls_detected = urls_detected.transform_keys do |url|
94
+ cleaned_url(url)
95
+ end
96
+
97
+ urls_to_check = detect_url_changes(urls_detected, type)
98
+
99
+ @cache_log[type].each_pair do |url, cache|
100
+ within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time])
101
+ next if within_timeframe
102
+
103
+ urls_to_check[url] = cache[:metadata] # recheck expired links
104
+ end
105
+
106
+ urls_to_check
107
+ end
108
+
109
+ def within_external_timeframe?(time)
110
+ within_timeframe?(time, @external_timeframe)
111
+ end
112
+
113
+ def within_internal_timeframe?(time)
114
+ within_timeframe?(time, @internal_timeframe)
115
+ end
116
+
117
+ def empty?
118
+ blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
119
+ end
120
+
121
+ def size(type)
122
+ @cache_log[type].size
123
+ end
124
+
85
125
  private def construct_internal_link_metadata(metadata, found)
86
126
  {
87
127
  source: metadata[:source],
88
- current_path: metadata[:current_path],
128
+ filename: metadata[:filename],
89
129
  line: metadata[:line],
90
130
  base_url: metadata[:base_url],
91
- found: found
131
+ found: found,
92
132
  }
93
133
  end
94
134
 
95
135
  # prepare to add new URLs detected
96
136
  private def determine_additions(urls_detected, type)
97
- additions = urls_detected.reject do |url, metadata|
98
- if @cache_log[type].include?(url)
99
- @cache_log[type][url][:metadata] = metadata
100
-
101
- # if this is false, we're trying again
102
- if type == :external
103
- @cache_log[type][url][:found]
104
- else
105
- @cache_log[type][url][:metadata].none? { |m| m[:found] }
106
- end
137
+ additions = type == :external ? determine_external_additions(urls_detected) : determine_internal_additions(urls_detected)
138
+
139
+ new_link_count = additions.length
140
+ new_link_text = pluralize(new_link_count, "new #{type} link", "new #{type} links")
141
+ @logger.log(:debug, "Adding #{new_link_text} to the cache")
142
+
143
+ additions
144
+ end
145
+
146
+ private def determine_external_additions(urls_detected)
147
+ urls_detected.reject do |url, _metadata|
148
+ if @cache_log[:external].include?(url)
149
+ @cache_log[:external][url][:found] # if this is false, we're trying again
107
150
  else
108
- @logger.log :debug, "Adding #{url} to #{type} cache"
151
+ @logger.log(:debug, "Adding #{url} to external cache")
109
152
  false
110
153
  end
111
154
  end
155
+ end
112
156
 
113
- new_link_count = additions.length
114
- new_link_text = pluralize(new_link_count, "new #{type} link", "new #{type} links")
115
- @logger.log :debug, "Adding #{new_link_text} to the cache"
157
+ private def determine_internal_additions(urls_detected)
158
+ urls_detected.each_with_object({}) do |(url, metadata), hsh|
159
+ # url is not even in cache
160
+ if @cache_log[:internal][url].nil?
161
+ hsh[url] = metadata
162
+ next
163
+ end
116
164
 
117
- additions
165
+ cache_metadata = @cache_log[:internal][url][:metadata]
166
+ incoming_metadata = urls_detected[url].each_with_object([]) do |incoming_url, arr|
167
+ existing_cache_metadata = cache_metadata.find { |k, _| k[:filename] == incoming_url[:filename] }
168
+
169
+ # cache for this url, from an existing path, exists as found
170
+ if !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
171
+ metadata.find { |m| m[:filename] == existing_cache_metadata[:filename] }[:found] = true
172
+ next
173
+ end
174
+
175
+ @logger.log(:debug, "Adding #{incoming_url} to internal cache")
176
+ arr << incoming_url
177
+ end
178
+
179
+ hsh[url] = incoming_metadata
180
+ end
118
181
  end
119
182
 
120
183
  # remove from cache URLs that no longer exist
@@ -125,54 +188,21 @@ module HTMLProofer
125
188
  if urls_detected.include?(url)
126
189
  false
127
190
  elsif url_matches_type?(url, type)
128
- @logger.log :debug, "Removing #{url} from #{type} cache"
191
+ @logger.log(:debug, "Removing #{url} from #{type} cache")
129
192
  deletions += 1
130
193
  true
131
194
  end
132
195
  end
133
196
 
134
197
  del_link_text = pluralize(deletions, "outdated #{type} link", "outdated #{type} links")
135
- @logger.log :debug, "Removing #{del_link_text} from the cache"
136
- end
137
-
138
- def write
139
- return unless enabled?
140
-
141
- File.write(@cache_file, @cache_log.to_json)
142
- end
143
-
144
- def retrieve_urls(urls_detected, type)
145
- # if there are no urls, bail
146
- return {} if urls_detected.empty?
147
-
148
- urls_detected = urls_detected.transform_keys do |url|
149
- cleaned_url(url)
150
- end
151
-
152
- urls_to_check = detect_url_changes(urls_detected, type)
153
-
154
- @cache_log[type].each_pair do |url, cache|
155
- next if within_timeframe?(cache[:time])
156
-
157
- urls_to_check[url] = cache[:metadata] # recheck expired links
158
- end
159
-
160
- urls_to_check
161
- end
162
-
163
- def empty?
164
- blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
165
- end
166
-
167
- def size(type)
168
- @cache_log[type].size
198
+ @logger.log(:debug, "Removing #{del_link_text} from the cache")
169
199
  end
170
200
 
171
201
  private def setup_cache!(options)
172
202
  default_structure = {
173
203
  version: CACHE_VERSION,
174
204
  internal: {},
175
- external: {}
205
+ external: {},
176
206
  }
177
207
 
178
208
  @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
@@ -193,26 +223,32 @@ module HTMLProofer
193
223
 
194
224
  old_cache = (cache_version = log[:version]).nil?
195
225
  @cache_log = if old_cache # previous cache version, create a new one
196
- default_structure
197
- elsif cache_version != CACHE_VERSION
198
- # if cache version is newer...do something
199
- else
200
- log[:internal] = log[:internal].transform_keys(&:to_s)
201
- log[:external] = log[:external].transform_keys(&:to_s)
202
- log
203
- end
226
+ default_structure
227
+ elsif cache_version != CACHE_VERSION
228
+ # if cache version is newer...do something
229
+ else
230
+ log[:internal] = log[:internal].transform_keys(&:to_s)
231
+ log[:external] = log[:external].transform_keys(&:to_s)
232
+ log
233
+ end
204
234
  end
205
235
 
236
+ # https://github.com/rails/rails/blob/3872bc0e54d32e8bf3a6299b0bfe173d94b072fc/activesupport/lib/active_support/duration.rb#L112-L117
237
+ SECONDS_PER_HOUR = 3600
238
+ SECONDS_PER_DAY = 86400
239
+ SECONDS_PER_WEEK = 604800
240
+ SECONDS_PER_MONTH = 2629746 # 1/12 of a gregorian year
241
+
206
242
  private def time_ago(measurement, unit)
207
243
  case unit
208
244
  when :months
209
- @cache_datetime >> -measurement
245
+ @cache_datetime - (SECONDS_PER_MONTH * measurement)
210
246
  when :weeks
211
- @cache_datetime - (measurement * 7)
247
+ @cache_datetime - (SECONDS_PER_WEEK * measurement)
212
248
  when :days
213
- @cache_datetime - measurement
249
+ @cache_datetime - (SECONDS_PER_DAY * measurement)
214
250
  when :hours
215
- @cache_datetime - Rational(measurement / 24.0)
251
+ @cache_datetime - Rational(SECONDS_PER_HOUR * measurement)
216
252
  end.to_time
217
253
  end
218
254
 
@@ -224,7 +260,7 @@ module HTMLProofer
224
260
  private def cleaned_url(url)
225
261
  cleaned_url = escape_unescape(url)
226
262
 
227
- return cleaned_url unless cleaned_url.end_with?('/', '#', '?') && cleaned_url.length > 1
263
+ return cleaned_url unless cleaned_url.end_with?("/", "#", "?") && cleaned_url.length > 1
228
264
 
229
265
  cleaned_url[0..-2]
230
266
  end
@@ -232,5 +268,12 @@ module HTMLProofer
232
268
  private def escape_unescape(url)
233
269
  Addressable::URI.parse(url).normalize.to_s
234
270
  end
271
+
272
+ private def within_timeframe?(current_time, parsed_timeframe)
273
+ return false if current_time.nil? || parsed_timeframe.nil?
274
+
275
+ current_time = Time.parse(current_time) if current_time.is_a?(String)
276
+ (parsed_timeframe..@cache_time).cover?(current_time)
277
+ end
235
278
  end
236
279
  end
@@ -1,35 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Favicon < HTMLProofer::Check
4
- def run
5
- found = false
6
- @html.css('link').each do |node|
7
- @favicon = create_element(node)
3
+ module HTMLProofer
4
+ class Check
5
+ class Favicon < HTMLProofer::Check
6
+ def run
7
+ found = false
8
+ @html.css("link").each do |node|
9
+ @favicon = create_element(node)
8
10
 
9
- next if @favicon.ignore?
11
+ next if @favicon.ignore?
10
12
 
11
- break if (found = @favicon.node['rel'].split.last.eql? 'icon')
12
- end
13
+ break if (found = @favicon.node["rel"].split.last.eql?("icon"))
14
+ end
13
15
 
14
- return if immediate_redirect?
16
+ return if immediate_redirect?
15
17
 
16
- if found
17
- if @favicon.url.remote?
18
- add_to_external_urls(@favicon.url, @favicon.line)
19
- elsif !@favicon.url.exists?
20
- add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line, content: @favicon.content)
18
+ if found
19
+ if @favicon.url.remote?
20
+ add_to_external_urls(@favicon.url, @favicon.line)
21
+ elsif !@favicon.url.exists?
22
+ add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
23
+ content: @favicon.content)
24
+ end
25
+ else
26
+ add_failure("no favicon provided")
27
+ end
21
28
  end
22
- else
23
- add_failure('no favicon provided')
24
- end
25
- end
26
29
 
27
- private
30
+ private
28
31
 
29
- # allow any instant-redirect meta tag
30
- def immediate_redirect?
31
- @html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
32
- rescue StandardError
33
- false
32
+ # allow any instant-redirect meta tag
33
+ def immediate_redirect?
34
+ @html.xpath("//meta[@http-equiv='refresh']").attribute("content").value.start_with?("0;")
35
+ rescue StandardError
36
+ false
37
+ end
38
+ end
34
39
  end
35
40
  end
@@ -1,62 +1,93 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Images < HTMLProofer::Check
4
- SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
5
-
6
- def run
7
- @html.css('img').each do |node|
8
- @img = create_element(node)
9
-
10
- next if @img.ignore?
11
-
12
- # screenshot filenames should return because of terrible names
13
- add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line, content: @img.content) if terrible_filename?
14
-
15
- # does the image exist?
16
- if missing_src?
17
- add_failure('image has no src or srcset attribute', line: @img.line, content: @img.content)
18
- elsif @img.url.remote?
19
- add_to_external_urls(@img.url, @img.line)
20
- elsif !@img.url.exists? && !@img.multiple_srcsets?
21
- add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line, content: @img.content)
22
- elsif @img.multiple_srcsets?
23
- srcsets = @img.srcset.split(',').map(&:strip)
24
- srcsets.each do |srcset|
25
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
26
-
27
- if srcset_url.remote?
28
- add_to_external_urls(srcset_url.url, @img.line)
29
- elsif !srcset_url.exists?
30
- add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
3
+ module HTMLProofer
4
+ class Check
5
+ class Images < HTMLProofer::Check
6
+ SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
7
+
8
+ def run
9
+ @html.css("img").each do |node|
10
+ @img = create_element(node)
11
+
12
+ next if @img.ignore?
13
+
14
+ # screenshot filenames should return because of terrible names
15
+ add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
16
+ content: @img.content) if terrible_filename?
17
+
18
+ # does the image exist?
19
+ if missing_src?
20
+ add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
21
+ elsif @img.url.remote?
22
+ add_to_external_urls(@img.url, @img.line)
23
+ elsif !@img.url.exists? && !@img.multiple_srcsets?
24
+ add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
25
+ content: @img.content)
26
+ elsif @img.multiple_srcsets?
27
+ srcsets = @img.srcset.split(",").map(&:strip)
28
+ srcsets.each do |srcset|
29
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
30
+
31
+ if srcset_url.remote?
32
+ add_to_external_urls(srcset_url.url, @img.line)
33
+ elsif !srcset_url.exists?
34
+ add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
35
+ end
36
+ end
37
+ end
38
+
39
+ unless ignore_element?
40
+ if missing_alt_tag? && !ignore_missing_alt?
41
+ add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
42
+ content: @img.content)
43
+ elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
44
+ add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
45
+ content: @img.content)
46
+ end
31
47
  end
48
+
49
+ add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
50
+ content: @img.content) if @runner.enforce_https? && @img.url.http?
32
51
  end
52
+
53
+ external_urls
33
54
  end
34
55
 
35
- add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line, content: @img.content) if empty_alt_tag? && !ignore_missing_alt? && !ignore_alt?
56
+ def ignore_missing_alt?
57
+ @runner.options[:ignore_missing_alt]
58
+ end
36
59
 
37
- add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line, content: @img.content) if @runner.enforce_https? && @img.url.http?
38
- end
60
+ def ignore_empty_alt?
61
+ @runner.options[:ignore_empty_alt]
62
+ end
39
63
 
40
- external_urls
41
- end
64
+ def ignore_element?
65
+ @img.url.ignore? || @img.aria_hidden?
66
+ end
42
67
 
43
- def ignore_missing_alt?
44
- @runner.options[:ignore_missing_alt]
45
- end
68
+ def missing_alt_tag?
69
+ @img.node["alt"].nil?
70
+ end
46
71
 
47
- def ignore_alt?
48
- @img.url.ignore? || @img.aria_hidden?
49
- end
72
+ def empty_alt_tag?
73
+ !missing_alt_tag? && @img.node["alt"].empty?
74
+ end
50
75
 
51
- def empty_alt_tag?
52
- @img.node['alt'].nil? || @img.node['alt'].strip.empty?
53
- end
76
+ def empty_whitespace_alt_tag?
77
+ !missing_alt_tag? && @img.node["alt"].strip.empty?
78
+ end
54
79
 
55
- def terrible_filename?
56
- @img.url.to_s =~ SCREEN_SHOT_REGEX
57
- end
80
+ def alt_all_spaces?
81
+ !missing_alt_tag? && @img.node["alt"].split.all?(" ")
82
+ end
83
+
84
+ def terrible_filename?
85
+ @img.url.to_s =~ SCREEN_SHOT_REGEX
86
+ end
58
87
 
59
- def missing_src?
60
- blank?(@img.url.to_s)
88
+ def missing_src?
89
+ blank?(@img.url.to_s)
90
+ end
91
+ end
61
92
  end
62
93
  end