html-proofer 4.0.0.rc2 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'date'
4
- require 'json'
5
- require 'uri'
3
+ require "date"
4
+ require "json"
5
+ require "uri"
6
6
 
7
7
  module HTMLProofer
8
8
  class Cache
@@ -10,8 +10,8 @@ module HTMLProofer
10
10
 
11
11
  CACHE_VERSION = 2
12
12
 
13
- DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
14
- DEFAULT_CACHE_FILE_NAME = 'cache.json'
13
+ DEFAULT_STORAGE_DIR = File.join("tmp", ".htmlproofer")
14
+ DEFAULT_CACHE_FILE_NAME = "cache.json"
15
15
 
16
16
  URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
17
17
 
@@ -21,7 +21,7 @@ module HTMLProofer
21
21
  @runner = runner
22
22
  @logger = @runner.logger
23
23
 
24
- @cache_datetime = DateTime.now
24
+ @cache_datetime = Time.now
25
25
  @cache_time = @cache_datetime.to_time
26
26
 
27
27
  if blank?(options)
@@ -29,28 +29,25 @@ module HTMLProofer
29
29
  else
30
30
  define_singleton_method(:enabled?) { true }
31
31
  setup_cache!(options)
32
- @parsed_timeframe = parsed_timeframe(options[:timeframe])
33
- end
34
- end
35
32
 
36
- def within_timeframe?(time)
37
- return false if time.nil?
38
-
39
- time = Time.parse(time) if time.is_a?(String)
40
- (@parsed_timeframe..@cache_time).cover?(time)
33
+ @external_timeframe = parsed_timeframe(options[:timeframe][:external])
34
+ @internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
35
+ end
41
36
  end
42
37
 
43
38
  def parsed_timeframe(timeframe)
39
+ return nil if timeframe.nil?
40
+
44
41
  time, date = timeframe.match(/(\d+)(\D)/).captures
45
42
  time = time.to_i
46
43
  case date
47
- when 'M'
44
+ when "M"
48
45
  time_ago(time, :months)
49
- when 'w'
46
+ when "w"
50
47
  time_ago(time, :weeks)
51
- when 'd'
48
+ when "d"
52
49
  time_ago(time, :days)
53
- when 'h'
50
+ when "h"
54
51
  time_ago(time, :hours)
55
52
  else
56
53
  raise ArgumentError, "#{date} is not a valid timeframe!"
@@ -71,7 +68,8 @@ module HTMLProofer
71
68
  found = status_code.between?(200, 299)
72
69
 
73
70
  clean_url = cleaned_url(url)
74
- @cache_log[:external][clean_url] = { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
71
+ @cache_log[:external][clean_url] =
72
+ { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
75
73
  end
76
74
 
77
75
  def detect_url_changes(urls_detected, type)
@@ -82,39 +80,104 @@ module HTMLProofer
82
80
  additions
83
81
  end
84
82
 
83
+ def write
84
+ return unless enabled?
85
+
86
+ File.write(@cache_file, @cache_log.to_json)
87
+ end
88
+
89
+ def retrieve_urls(urls_detected, type)
90
+ # if there are no urls, bail
91
+ return {} if urls_detected.empty?
92
+
93
+ urls_detected = urls_detected.transform_keys do |url|
94
+ cleaned_url(url)
95
+ end
96
+
97
+ urls_to_check = detect_url_changes(urls_detected, type)
98
+
99
+ @cache_log[type].each_pair do |url, cache|
100
+ within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time])
101
+ next if within_timeframe
102
+
103
+ urls_to_check[url] = cache[:metadata] # recheck expired links
104
+ end
105
+
106
+ urls_to_check
107
+ end
108
+
109
+ def within_external_timeframe?(time)
110
+ within_timeframe?(time, @external_timeframe)
111
+ end
112
+
113
+ def within_internal_timeframe?(time)
114
+ within_timeframe?(time, @internal_timeframe)
115
+ end
116
+
117
+ def empty?
118
+ blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
119
+ end
120
+
121
+ def size(type)
122
+ @cache_log[type].size
123
+ end
124
+
85
125
  private def construct_internal_link_metadata(metadata, found)
86
126
  {
87
127
  source: metadata[:source],
88
- current_path: metadata[:current_path],
128
+ filename: metadata[:filename],
89
129
  line: metadata[:line],
90
130
  base_url: metadata[:base_url],
91
- found: found
131
+ found: found,
92
132
  }
93
133
  end
94
134
 
95
135
  # prepare to add new URLs detected
96
136
  private def determine_additions(urls_detected, type)
97
- additions = urls_detected.reject do |url, metadata|
98
- if @cache_log[type].include?(url)
99
- @cache_log[type][url][:metadata] = metadata
100
-
101
- # if this is false, we're trying again
102
- if type == :external
103
- @cache_log[type][url][:found]
104
- else
105
- @cache_log[type][url][:metadata].none? { |m| m[:found] }
106
- end
137
+ additions = type == :external ? determine_external_additions(urls_detected) : determine_internal_additions(urls_detected)
138
+
139
+ new_link_count = additions.length
140
+ new_link_text = pluralize(new_link_count, "new #{type} link", "new #{type} links")
141
+ @logger.log(:debug, "Adding #{new_link_text} to the cache")
142
+
143
+ additions
144
+ end
145
+
146
+ private def determine_external_additions(urls_detected)
147
+ urls_detected.reject do |url, _metadata|
148
+ if @cache_log[:external].include?(url)
149
+ @cache_log[:external][url][:found] # if this is false, we're trying again
107
150
  else
108
- @logger.log :debug, "Adding #{url} to #{type} cache"
151
+ @logger.log(:debug, "Adding #{url} to external cache")
109
152
  false
110
153
  end
111
154
  end
155
+ end
112
156
 
113
- new_link_count = additions.length
114
- new_link_text = pluralize(new_link_count, "new #{type} link", "new #{type} links")
115
- @logger.log :debug, "Adding #{new_link_text} to the cache"
157
+ private def determine_internal_additions(urls_detected)
158
+ urls_detected.each_with_object({}) do |(url, metadata), hsh|
159
+ # url is not even in cache
160
+ if @cache_log[:internal][url].nil?
161
+ hsh[url] = metadata
162
+ next
163
+ end
116
164
 
117
- additions
165
+ cache_metadata = @cache_log[:internal][url][:metadata]
166
+ incoming_metadata = urls_detected[url].each_with_object([]) do |incoming_url, arr|
167
+ existing_cache_metadata = cache_metadata.find { |k, _| k[:filename] == incoming_url[:filename] }
168
+
169
+ # cache for this url, from an existing path, exists as found
170
+ if !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
171
+ metadata.find { |m| m[:filename] == existing_cache_metadata[:filename] }[:found] = true
172
+ next
173
+ end
174
+
175
+ @logger.log(:debug, "Adding #{incoming_url} to internal cache")
176
+ arr << incoming_url
177
+ end
178
+
179
+ hsh[url] = incoming_metadata
180
+ end
118
181
  end
119
182
 
120
183
  # remove from cache URLs that no longer exist
@@ -125,54 +188,21 @@ module HTMLProofer
125
188
  if urls_detected.include?(url)
126
189
  false
127
190
  elsif url_matches_type?(url, type)
128
- @logger.log :debug, "Removing #{url} from #{type} cache"
191
+ @logger.log(:debug, "Removing #{url} from #{type} cache")
129
192
  deletions += 1
130
193
  true
131
194
  end
132
195
  end
133
196
 
134
197
  del_link_text = pluralize(deletions, "outdated #{type} link", "outdated #{type} links")
135
- @logger.log :debug, "Removing #{del_link_text} from the cache"
136
- end
137
-
138
- def write
139
- return unless enabled?
140
-
141
- File.write(@cache_file, @cache_log.to_json)
142
- end
143
-
144
- def retrieve_urls(urls_detected, type)
145
- # if there are no urls, bail
146
- return {} if urls_detected.empty?
147
-
148
- urls_detected = urls_detected.transform_keys do |url|
149
- cleaned_url(url)
150
- end
151
-
152
- urls_to_check = detect_url_changes(urls_detected, type)
153
-
154
- @cache_log[type].each_pair do |url, cache|
155
- next if within_timeframe?(cache[:time])
156
-
157
- urls_to_check[url] = cache[:metadata] # recheck expired links
158
- end
159
-
160
- urls_to_check
161
- end
162
-
163
- def empty?
164
- blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
165
- end
166
-
167
- def size(type)
168
- @cache_log[type].size
198
+ @logger.log(:debug, "Removing #{del_link_text} from the cache")
169
199
  end
170
200
 
171
201
  private def setup_cache!(options)
172
202
  default_structure = {
173
203
  version: CACHE_VERSION,
174
204
  internal: {},
175
- external: {}
205
+ external: {},
176
206
  }
177
207
 
178
208
  @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
@@ -193,26 +223,32 @@ module HTMLProofer
193
223
 
194
224
  old_cache = (cache_version = log[:version]).nil?
195
225
  @cache_log = if old_cache # previous cache version, create a new one
196
- default_structure
197
- elsif cache_version != CACHE_VERSION
198
- # if cache version is newer...do something
199
- else
200
- log[:internal] = log[:internal].transform_keys(&:to_s)
201
- log[:external] = log[:external].transform_keys(&:to_s)
202
- log
203
- end
226
+ default_structure
227
+ elsif cache_version != CACHE_VERSION
228
+ # if cache version is newer...do something
229
+ else
230
+ log[:internal] = log[:internal].transform_keys(&:to_s)
231
+ log[:external] = log[:external].transform_keys(&:to_s)
232
+ log
233
+ end
204
234
  end
205
235
 
236
+ # https://github.com/rails/rails/blob/3872bc0e54d32e8bf3a6299b0bfe173d94b072fc/activesupport/lib/active_support/duration.rb#L112-L117
237
+ SECONDS_PER_HOUR = 3600
238
+ SECONDS_PER_DAY = 86400
239
+ SECONDS_PER_WEEK = 604800
240
+ SECONDS_PER_MONTH = 2629746 # 1/12 of a gregorian year
241
+
206
242
  private def time_ago(measurement, unit)
207
243
  case unit
208
244
  when :months
209
- @cache_datetime >> -measurement
245
+ @cache_datetime - (SECONDS_PER_MONTH * measurement)
210
246
  when :weeks
211
- @cache_datetime - (measurement * 7)
247
+ @cache_datetime - (SECONDS_PER_WEEK * measurement)
212
248
  when :days
213
- @cache_datetime - measurement
249
+ @cache_datetime - (SECONDS_PER_DAY * measurement)
214
250
  when :hours
215
- @cache_datetime - Rational(measurement / 24.0)
251
+ @cache_datetime - Rational(SECONDS_PER_HOUR * measurement)
216
252
  end.to_time
217
253
  end
218
254
 
@@ -224,7 +260,7 @@ module HTMLProofer
224
260
  private def cleaned_url(url)
225
261
  cleaned_url = escape_unescape(url)
226
262
 
227
- return cleaned_url unless cleaned_url.end_with?('/', '#', '?') && cleaned_url.length > 1
263
+ return cleaned_url unless cleaned_url.end_with?("/", "#", "?") && cleaned_url.length > 1
228
264
 
229
265
  cleaned_url[0..-2]
230
266
  end
@@ -232,5 +268,12 @@ module HTMLProofer
232
268
  private def escape_unescape(url)
233
269
  Addressable::URI.parse(url).normalize.to_s
234
270
  end
271
+
272
+ private def within_timeframe?(current_time, parsed_timeframe)
273
+ return false if current_time.nil? || parsed_timeframe.nil?
274
+
275
+ current_time = Time.parse(current_time) if current_time.is_a?(String)
276
+ (parsed_timeframe..@cache_time).cover?(current_time)
277
+ end
235
278
  end
236
279
  end
@@ -1,35 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Favicon < HTMLProofer::Check
4
- def run
5
- found = false
6
- @html.css('link').each do |node|
7
- @favicon = create_element(node)
3
+ module HTMLProofer
4
+ class Check
5
+ class Favicon < HTMLProofer::Check
6
+ def run
7
+ found = false
8
+ @html.css("link").each do |node|
9
+ @favicon = create_element(node)
8
10
 
9
- next if @favicon.ignore?
11
+ next if @favicon.ignore?
10
12
 
11
- break if (found = @favicon.node['rel'].split.last.eql? 'icon')
12
- end
13
+ break if (found = @favicon.node["rel"].split.last.eql?("icon"))
14
+ end
13
15
 
14
- return if immediate_redirect?
16
+ return if immediate_redirect?
15
17
 
16
- if found
17
- if @favicon.url.remote?
18
- add_to_external_urls(@favicon.url, @favicon.line)
19
- elsif !@favicon.url.exists?
20
- add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line, content: @favicon.content)
18
+ if found
19
+ if @favicon.url.remote?
20
+ add_to_external_urls(@favicon.url, @favicon.line)
21
+ elsif !@favicon.url.exists?
22
+ add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
23
+ content: @favicon.content)
24
+ end
25
+ else
26
+ add_failure("no favicon provided")
27
+ end
21
28
  end
22
- else
23
- add_failure('no favicon provided')
24
- end
25
- end
26
29
 
27
- private
30
+ private
28
31
 
29
- # allow any instant-redirect meta tag
30
- def immediate_redirect?
31
- @html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
32
- rescue StandardError
33
- false
32
+ # allow any instant-redirect meta tag
33
+ def immediate_redirect?
34
+ @html.xpath("//meta[@http-equiv='refresh']").attribute("content").value.start_with?("0;")
35
+ rescue StandardError
36
+ false
37
+ end
38
+ end
34
39
  end
35
40
  end
@@ -1,62 +1,93 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Images < HTMLProofer::Check
4
- SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
5
-
6
- def run
7
- @html.css('img').each do |node|
8
- @img = create_element(node)
9
-
10
- next if @img.ignore?
11
-
12
- # screenshot filenames should return because of terrible names
13
- add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line, content: @img.content) if terrible_filename?
14
-
15
- # does the image exist?
16
- if missing_src?
17
- add_failure('image has no src or srcset attribute', line: @img.line, content: @img.content)
18
- elsif @img.url.remote?
19
- add_to_external_urls(@img.url, @img.line)
20
- elsif !@img.url.exists? && !@img.multiple_srcsets?
21
- add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line, content: @img.content)
22
- elsif @img.multiple_srcsets?
23
- srcsets = @img.srcset.split(',').map(&:strip)
24
- srcsets.each do |srcset|
25
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
26
-
27
- if srcset_url.remote?
28
- add_to_external_urls(srcset_url.url, @img.line)
29
- elsif !srcset_url.exists?
30
- add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
3
+ module HTMLProofer
4
+ class Check
5
+ class Images < HTMLProofer::Check
6
+ SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
7
+
8
+ def run
9
+ @html.css("img").each do |node|
10
+ @img = create_element(node)
11
+
12
+ next if @img.ignore?
13
+
14
+ # screenshot filenames should return because of terrible names
15
+ add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
16
+ content: @img.content) if terrible_filename?
17
+
18
+ # does the image exist?
19
+ if missing_src?
20
+ add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
21
+ elsif @img.url.remote?
22
+ add_to_external_urls(@img.url, @img.line)
23
+ elsif !@img.url.exists? && !@img.multiple_srcsets?
24
+ add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
25
+ content: @img.content)
26
+ elsif @img.multiple_srcsets?
27
+ srcsets = @img.srcset.split(",").map(&:strip)
28
+ srcsets.each do |srcset|
29
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
30
+
31
+ if srcset_url.remote?
32
+ add_to_external_urls(srcset_url.url, @img.line)
33
+ elsif !srcset_url.exists?
34
+ add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
35
+ end
36
+ end
37
+ end
38
+
39
+ unless ignore_element?
40
+ if missing_alt_tag? && !ignore_missing_alt?
41
+ add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
42
+ content: @img.content)
43
+ elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
44
+ add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
45
+ content: @img.content)
46
+ end
31
47
  end
48
+
49
+ add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
50
+ content: @img.content) if @runner.enforce_https? && @img.url.http?
32
51
  end
52
+
53
+ external_urls
33
54
  end
34
55
 
35
- add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line, content: @img.content) if empty_alt_tag? && !ignore_missing_alt? && !ignore_alt?
56
+ def ignore_missing_alt?
57
+ @runner.options[:ignore_missing_alt]
58
+ end
36
59
 
37
- add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line, content: @img.content) if @runner.enforce_https? && @img.url.http?
38
- end
60
+ def ignore_empty_alt?
61
+ @runner.options[:ignore_empty_alt]
62
+ end
39
63
 
40
- external_urls
41
- end
64
+ def ignore_element?
65
+ @img.url.ignore? || @img.aria_hidden?
66
+ end
42
67
 
43
- def ignore_missing_alt?
44
- @runner.options[:ignore_missing_alt]
45
- end
68
+ def missing_alt_tag?
69
+ @img.node["alt"].nil?
70
+ end
46
71
 
47
- def ignore_alt?
48
- @img.url.ignore? || @img.aria_hidden?
49
- end
72
+ def empty_alt_tag?
73
+ !missing_alt_tag? && @img.node["alt"].empty?
74
+ end
50
75
 
51
- def empty_alt_tag?
52
- @img.node['alt'].nil? || @img.node['alt'].strip.empty?
53
- end
76
+ def empty_whitespace_alt_tag?
77
+ !missing_alt_tag? && @img.node["alt"].strip.empty?
78
+ end
54
79
 
55
- def terrible_filename?
56
- @img.url.to_s =~ SCREEN_SHOT_REGEX
57
- end
80
+ def alt_all_spaces?
81
+ !missing_alt_tag? && @img.node["alt"].split.all?(" ")
82
+ end
83
+
84
+ def terrible_filename?
85
+ @img.url.to_s =~ SCREEN_SHOT_REGEX
86
+ end
58
87
 
59
- def missing_src?
60
- blank?(@img.url.to_s)
88
+ def missing_src?
89
+ blank?(@img.url.to_s)
90
+ end
91
+ end
61
92
  end
62
93
  end