html-proofer 3.19.4 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +44 -59
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +251 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +292 -0
- data/lib/html_proofer/check/favicon.rb +43 -0
- data/lib/html_proofer/check/images.rb +99 -0
- data/lib/html_proofer/check/links.rb +135 -0
- data/lib/html_proofer/check/open_graph.rb +42 -0
- data/lib/html_proofer/check/scripts.rb +49 -0
- data/lib/html_proofer/check.rb +94 -0
- data/lib/html_proofer/configuration.rb +91 -0
- data/lib/html_proofer/element.rb +144 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +33 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +244 -0
- data/lib/html_proofer/url_validator/external.rb +193 -0
- data/lib/html_proofer/url_validator/internal.rb +97 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +9 -12
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +59 -0
- metadata +42 -22
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/configuration.rb +0 -88
- data/lib/html-proofer/element.rb +0 -265
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -249
- data/lib/html-proofer/url_validator.rb +0 -237
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
require "json"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
module HTMLProofer
|
|
8
|
+
class Cache
|
|
9
|
+
include HTMLProofer::Utils
|
|
10
|
+
|
|
11
|
+
CACHE_VERSION = 2
|
|
12
|
+
|
|
13
|
+
DEFAULT_STORAGE_DIR = File.join("tmp", ".htmlproofer")
|
|
14
|
+
DEFAULT_CACHE_FILE_NAME = "cache.json"
|
|
15
|
+
|
|
16
|
+
URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
|
17
|
+
|
|
18
|
+
attr_reader :exists, :cache_log, :storage_dir, :cache_file
|
|
19
|
+
|
|
20
|
+
def initialize(runner, options)
|
|
21
|
+
@runner = runner
|
|
22
|
+
@logger = @runner.logger
|
|
23
|
+
|
|
24
|
+
@cache_datetime = Time.now
|
|
25
|
+
@cache_time = @cache_datetime.to_time
|
|
26
|
+
|
|
27
|
+
if blank?(options)
|
|
28
|
+
define_singleton_method(:enabled?) { false }
|
|
29
|
+
define_singleton_method(:external_enabled?) { false }
|
|
30
|
+
define_singleton_method(:internal_enabled?) { false }
|
|
31
|
+
else
|
|
32
|
+
# we still consider the cache as enabled, regardless of the specic timeframes
|
|
33
|
+
define_singleton_method(:enabled?) { true }
|
|
34
|
+
setup_cache!(options)
|
|
35
|
+
|
|
36
|
+
@external_timeframe = parsed_timeframe(options[:timeframe][:external])
|
|
37
|
+
define_singleton_method(:external_enabled?) { !@external_timeframe.nil? }
|
|
38
|
+
@internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
|
|
39
|
+
define_singleton_method(:internal_enabled?) { !@internal_timeframe.nil? }
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def parsed_timeframe(timeframe)
|
|
44
|
+
return nil if timeframe.nil?
|
|
45
|
+
|
|
46
|
+
time, date = timeframe.match(/(\d+)(\D)/).captures
|
|
47
|
+
time = time.to_i
|
|
48
|
+
case date
|
|
49
|
+
when "M"
|
|
50
|
+
time_ago(time, :months)
|
|
51
|
+
when "w"
|
|
52
|
+
time_ago(time, :weeks)
|
|
53
|
+
when "d"
|
|
54
|
+
time_ago(time, :days)
|
|
55
|
+
when "h"
|
|
56
|
+
time_ago(time, :hours)
|
|
57
|
+
else
|
|
58
|
+
raise ArgumentError, "#{date} is not a valid timeframe!"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def add_internal(url, metadata, found)
|
|
63
|
+
return unless internal_enabled?
|
|
64
|
+
|
|
65
|
+
@cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil?
|
|
66
|
+
|
|
67
|
+
@cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def add_external(url, filenames, status_code, msg, found)
|
|
71
|
+
return unless external_enabled?
|
|
72
|
+
|
|
73
|
+
clean_url = cleaned_url(url)
|
|
74
|
+
@cache_log[:external][clean_url] =
|
|
75
|
+
{ time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def detect_url_changes(urls_detected, type)
|
|
79
|
+
determine_deletions(urls_detected, type)
|
|
80
|
+
|
|
81
|
+
additions = determine_additions(urls_detected, type)
|
|
82
|
+
|
|
83
|
+
additions
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def write
|
|
87
|
+
return unless enabled?
|
|
88
|
+
|
|
89
|
+
File.write(@cache_file, @cache_log.to_json)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def retrieve_urls(urls_detected, type)
|
|
93
|
+
# if there are no urls, bail
|
|
94
|
+
return {} if urls_detected.empty?
|
|
95
|
+
|
|
96
|
+
urls_detected = urls_detected.transform_keys do |url|
|
|
97
|
+
cleaned_url(url)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
urls_to_check = detect_url_changes(urls_detected, type)
|
|
101
|
+
|
|
102
|
+
urls_to_check
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def within_external_timeframe?(time)
|
|
106
|
+
within_timeframe?(time, @external_timeframe)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def within_internal_timeframe?(time)
|
|
110
|
+
within_timeframe?(time, @internal_timeframe)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def empty?
|
|
114
|
+
blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def size(type)
|
|
118
|
+
@cache_log[type].size
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private def construct_internal_link_metadata(metadata, found)
|
|
122
|
+
{
|
|
123
|
+
source: metadata[:source],
|
|
124
|
+
filename: metadata[:filename],
|
|
125
|
+
line: metadata[:line],
|
|
126
|
+
base_url: metadata[:base_url],
|
|
127
|
+
found: found,
|
|
128
|
+
}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# prepare to add new URLs detected
|
|
132
|
+
private def determine_additions(urls_detected, type)
|
|
133
|
+
additions = type == :external ? determine_external_additions(urls_detected) : determine_internal_additions(urls_detected)
|
|
134
|
+
|
|
135
|
+
new_link_count = additions.length
|
|
136
|
+
new_link_text = pluralize(new_link_count, "new #{type} link", "new #{type} links")
|
|
137
|
+
@logger.log(:debug, "Adding #{new_link_text} to the cache")
|
|
138
|
+
|
|
139
|
+
additions
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private def determine_external_additions(urls_detected)
|
|
143
|
+
urls_detected.reject do |url, _metadata|
|
|
144
|
+
if @cache_log[:external].include?(url)
|
|
145
|
+
found = @cache_log[:external][url][:found] # if this is false, we're trying again
|
|
146
|
+
unless found
|
|
147
|
+
@logger.log(:debug, "Adding #{url} to external cache (not found)")
|
|
148
|
+
end
|
|
149
|
+
found
|
|
150
|
+
else
|
|
151
|
+
@logger.log(:debug, "Adding #{url} to external cache")
|
|
152
|
+
false
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
private def determine_internal_additions(urls_detected)
|
|
158
|
+
urls_detected.each_with_object({}) do |(url, detected_metadata), hsh|
|
|
159
|
+
# url is not even in cache
|
|
160
|
+
if @cache_log[:internal][url].nil?
|
|
161
|
+
@logger.log(:debug, "Adding #{url} to internal cache")
|
|
162
|
+
hsh[url] = detected_metadata
|
|
163
|
+
next
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# detect metadata additions
|
|
167
|
+
# NOTE: the time-stamp for the whole url key will not be updated,
|
|
168
|
+
# so that it reflects the earliest time any of the metadata was checked
|
|
169
|
+
cache_metadata = @cache_log[:internal][url][:metadata]
|
|
170
|
+
metadata_additions = detected_metadata.reject do |detected|
|
|
171
|
+
existing_cache_metadata = cache_metadata.find { |cached, _| cached[:filename] == detected[:filename] }
|
|
172
|
+
# cache for this url, from an existing path, exists as found
|
|
173
|
+
found = !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
|
|
174
|
+
unless found
|
|
175
|
+
@logger.log(:debug, "Adding #{detected} to internal cache for #{url}")
|
|
176
|
+
end
|
|
177
|
+
found
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
if metadata_additions.empty?
|
|
181
|
+
next
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
hsh[url] = metadata_additions
|
|
185
|
+
# remove from the cache the detected metadata additions as they correspond to failures to be rechecked
|
|
186
|
+
# (this works assuming the detected url metadata have "found" set to false)
|
|
187
|
+
@cache_log[:internal][url][:metadata] = cache_metadata.difference(metadata_additions)
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# remove from cache URLs that no longer exist
|
|
192
|
+
private def determine_deletions(urls_detected, type)
|
|
193
|
+
deletions = 0
|
|
194
|
+
|
|
195
|
+
@cache_log[type].delete_if do |url, cache|
|
|
196
|
+
expired_timeframe = type == :external ? !within_external_timeframe?(cache[:time]) : !within_internal_timeframe?(cache[:time])
|
|
197
|
+
if expired_timeframe
|
|
198
|
+
@logger.log(:debug, "Removing #{url} from #{type} cache (expired timeframe)")
|
|
199
|
+
deletions += 1
|
|
200
|
+
true
|
|
201
|
+
elsif urls_detected.include?(url)
|
|
202
|
+
false
|
|
203
|
+
elsif url_matches_type?(url, type)
|
|
204
|
+
@logger.log(:debug, "Removing #{url} from #{type} cache (not detected anymore)")
|
|
205
|
+
deletions += 1
|
|
206
|
+
true
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
del_link_text = pluralize(deletions, "outdated #{type} link", "outdated #{type} links")
|
|
211
|
+
@logger.log(:debug, "Removing #{del_link_text} from the cache")
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
private def setup_cache!(options)
|
|
215
|
+
default_structure = {
|
|
216
|
+
version: CACHE_VERSION,
|
|
217
|
+
internal: {},
|
|
218
|
+
external: {},
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
@storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
|
|
222
|
+
|
|
223
|
+
FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)
|
|
224
|
+
|
|
225
|
+
cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME
|
|
226
|
+
|
|
227
|
+
@cache_file = File.join(storage_dir, cache_file_name)
|
|
228
|
+
|
|
229
|
+
return (@cache_log = default_structure) unless File.exist?(@cache_file)
|
|
230
|
+
|
|
231
|
+
contents = File.read(@cache_file)
|
|
232
|
+
|
|
233
|
+
return (@cache_log = default_structure) if blank?(contents)
|
|
234
|
+
|
|
235
|
+
log = JSON.parse(contents, symbolize_names: true)
|
|
236
|
+
|
|
237
|
+
old_cache = (cache_version = log[:version]).nil?
|
|
238
|
+
@cache_log = if old_cache # previous cache version, create a new one
|
|
239
|
+
default_structure
|
|
240
|
+
elsif cache_version != CACHE_VERSION
|
|
241
|
+
# if cache version is newer...do something
|
|
242
|
+
else
|
|
243
|
+
log[:internal] = log[:internal].transform_keys(&:to_s)
|
|
244
|
+
log[:external] = log[:external].transform_keys(&:to_s)
|
|
245
|
+
log
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# https://github.com/rails/rails/blob/3872bc0e54d32e8bf3a6299b0bfe173d94b072fc/activesupport/lib/active_support/duration.rb#L112-L117
|
|
250
|
+
SECONDS_PER_HOUR = 3600
|
|
251
|
+
SECONDS_PER_DAY = 86400
|
|
252
|
+
SECONDS_PER_WEEK = 604800
|
|
253
|
+
SECONDS_PER_MONTH = 2629746 # 1/12 of a gregorian year
|
|
254
|
+
|
|
255
|
+
private def time_ago(measurement, unit)
|
|
256
|
+
case unit
|
|
257
|
+
when :months
|
|
258
|
+
@cache_datetime - (SECONDS_PER_MONTH * measurement)
|
|
259
|
+
when :weeks
|
|
260
|
+
@cache_datetime - (SECONDS_PER_WEEK * measurement)
|
|
261
|
+
when :days
|
|
262
|
+
@cache_datetime - (SECONDS_PER_DAY * measurement)
|
|
263
|
+
when :hours
|
|
264
|
+
@cache_datetime - Rational(SECONDS_PER_HOUR * measurement)
|
|
265
|
+
end.to_time
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
private def url_matches_type?(url, type)
|
|
269
|
+
return true if type == :internal && url !~ URI_REGEXP
|
|
270
|
+
return true if type == :external && url =~ URI_REGEXP
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
private def cleaned_url(url)
|
|
274
|
+
cleaned_url = escape_unescape(url)
|
|
275
|
+
|
|
276
|
+
return cleaned_url unless cleaned_url.end_with?("/", "#", "?") && cleaned_url.length > 1
|
|
277
|
+
|
|
278
|
+
cleaned_url[0..-2]
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
private def escape_unescape(url)
|
|
282
|
+
Addressable::URI.parse(url).normalize.to_s
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
private def within_timeframe?(current_time, parsed_timeframe)
|
|
286
|
+
return false if current_time.nil? || parsed_timeframe.nil?
|
|
287
|
+
|
|
288
|
+
current_time = Time.parse(current_time) if current_time.is_a?(String)
|
|
289
|
+
(parsed_timeframe..@cache_time).cover?(current_time)
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTMLProofer
|
|
4
|
+
class Check
|
|
5
|
+
class Favicon < HTMLProofer::Check
|
|
6
|
+
def run
|
|
7
|
+
found = false
|
|
8
|
+
@html.css("link").each do |node|
|
|
9
|
+
@favicon = create_element(node)
|
|
10
|
+
|
|
11
|
+
next if @favicon.ignore?
|
|
12
|
+
|
|
13
|
+
break if (found = @favicon.node["rel"].split.last.eql?("icon"))
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
return if immediate_redirect?
|
|
17
|
+
|
|
18
|
+
if found
|
|
19
|
+
if @favicon.url.protocol_relative?
|
|
20
|
+
add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
|
|
21
|
+
line: @favicon.line, content: @favicon.content)
|
|
22
|
+
elsif @favicon.url.remote?
|
|
23
|
+
add_to_external_urls(@favicon.url, @favicon.line)
|
|
24
|
+
elsif !@favicon.url.exists?
|
|
25
|
+
add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
|
|
26
|
+
content: @favicon.content)
|
|
27
|
+
end
|
|
28
|
+
else
|
|
29
|
+
add_failure("no favicon provided")
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
# allow any instant-redirect meta tag
|
|
36
|
+
def immediate_redirect?
|
|
37
|
+
@html.xpath("//meta[@http-equiv='refresh']").attribute("content").value.start_with?("0;")
|
|
38
|
+
rescue StandardError
|
|
39
|
+
false
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTMLProofer
|
|
4
|
+
class Check
|
|
5
|
+
class Images < HTMLProofer::Check
|
|
6
|
+
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
|
|
7
|
+
|
|
8
|
+
def run
|
|
9
|
+
@html.css("img, source").each do |node|
|
|
10
|
+
@img = create_element(node)
|
|
11
|
+
|
|
12
|
+
next if @img.ignore?
|
|
13
|
+
|
|
14
|
+
# screenshot filenames should return because of terrible names
|
|
15
|
+
add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
|
|
16
|
+
content: @img.content) if terrible_filename?
|
|
17
|
+
|
|
18
|
+
# does the image exist?
|
|
19
|
+
if missing_src?
|
|
20
|
+
add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
|
|
21
|
+
elsif @img.url.protocol_relative?
|
|
22
|
+
add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
|
|
23
|
+
line: @img.line, content: @img.content)
|
|
24
|
+
elsif @img.url.remote?
|
|
25
|
+
add_to_external_urls(@img.url, @img.line)
|
|
26
|
+
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
|
|
27
|
+
add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
|
|
28
|
+
content: @img.content)
|
|
29
|
+
elsif @img.multiple_srcsets? || @img.multiple_sizes?
|
|
30
|
+
@img.srcsets_wo_sizes.each do |srcset|
|
|
31
|
+
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
|
|
32
|
+
|
|
33
|
+
if srcset_url.protocol_relative?
|
|
34
|
+
add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
|
|
35
|
+
line: @img.line, content: @img.content)
|
|
36
|
+
elsif srcset_url.remote?
|
|
37
|
+
add_to_external_urls(srcset_url.url, @img.line)
|
|
38
|
+
elsif !srcset_url.exists?
|
|
39
|
+
add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# if this is an img element, check that the alt attribute is present
|
|
45
|
+
if @img.img_tag? && !ignore_element?
|
|
46
|
+
if missing_alt_tag? && !ignore_missing_alt?
|
|
47
|
+
add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
|
|
48
|
+
content: @img.content)
|
|
49
|
+
elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
|
|
50
|
+
add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
|
|
51
|
+
content: @img.content)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
|
|
56
|
+
content: @img.content) if @runner.enforce_https? && @img.url.http?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
external_urls
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def ignore_missing_alt?
|
|
63
|
+
@runner.options[:ignore_missing_alt]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def ignore_empty_alt?
|
|
67
|
+
@runner.options[:ignore_empty_alt]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def ignore_element?
|
|
71
|
+
@img.url.ignore? || @img.aria_hidden?
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def missing_alt_tag?
|
|
75
|
+
@img.node["alt"].nil?
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def empty_alt_tag?
|
|
79
|
+
!missing_alt_tag? && @img.node["alt"].empty?
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def empty_whitespace_alt_tag?
|
|
83
|
+
!missing_alt_tag? && @img.node["alt"].strip.empty?
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def alt_all_spaces?
|
|
87
|
+
!missing_alt_tag? && @img.node["alt"].split.all?(" ")
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def terrible_filename?
|
|
91
|
+
@img.url.to_s =~ SCREEN_SHOT_REGEX
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def missing_src?
|
|
95
|
+
blank?(@img.url.to_s)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTMLProofer
|
|
4
|
+
class Check
|
|
5
|
+
class Links < HTMLProofer::Check
|
|
6
|
+
def run
|
|
7
|
+
@html.css("a, link").each do |node|
|
|
8
|
+
@link = create_element(node)
|
|
9
|
+
|
|
10
|
+
next if @link.ignore?
|
|
11
|
+
|
|
12
|
+
if !allow_hash_href? && @link.node["href"] == "#"
|
|
13
|
+
add_failure("linking to internal hash #, which points to nowhere", line: @link.line, content: @link.content)
|
|
14
|
+
next
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# is there even an href?
|
|
18
|
+
if blank?(@link.url.raw_attribute)
|
|
19
|
+
next if allow_missing_href?
|
|
20
|
+
|
|
21
|
+
add_failure("'#{@link.node.name}' tag is missing a reference", line: @link.line, content: @link.content)
|
|
22
|
+
next
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# is it even a valid URL?
|
|
26
|
+
unless @link.url.valid?
|
|
27
|
+
add_failure("#{@link.href} is an invalid URL", line: @link.line, content: @link.content)
|
|
28
|
+
next
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
if @link.url.protocol_relative?
|
|
32
|
+
add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
|
|
33
|
+
line: @link.line, content: @link.content)
|
|
34
|
+
next
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
check_schemes
|
|
38
|
+
|
|
39
|
+
# intentionally down here because we still want valid? & missing_href? to execute
|
|
40
|
+
next if @link.url.non_http_remote?
|
|
41
|
+
|
|
42
|
+
if !@link.url.internal? && @link.url.remote?
|
|
43
|
+
check_sri if @runner.check_sri? && @link.link_tag?
|
|
44
|
+
|
|
45
|
+
# we need to skip these for now; although the domain main be valid,
|
|
46
|
+
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
|
47
|
+
next if @link.node["rel"] == "dns-prefetch"
|
|
48
|
+
|
|
49
|
+
unless @link.url.path?
|
|
50
|
+
add_failure("#{@link.url.raw_attribute} is an invalid URL", line: @link.line, content: @link.content)
|
|
51
|
+
next
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
add_to_external_urls(@link.url, @link.line)
|
|
55
|
+
elsif @link.url.internal?
|
|
56
|
+
# does the local directory have a trailing slash?
|
|
57
|
+
if @link.url.unslashed_directory?(@link.url.absolute_path)
|
|
58
|
+
add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
|
|
59
|
+
line: @link.line, content: @link.content)
|
|
60
|
+
next
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
add_to_internal_urls(@link.url, @link.line)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def allow_missing_href?
|
|
69
|
+
@runner.options[:allow_missing_href]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def allow_hash_href?
|
|
73
|
+
@runner.options[:allow_hash_href]
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def check_schemes
|
|
77
|
+
case @link.url.scheme
|
|
78
|
+
when "mailto"
|
|
79
|
+
handle_mailto
|
|
80
|
+
when "tel"
|
|
81
|
+
handle_tel
|
|
82
|
+
when "http"
|
|
83
|
+
return unless @runner.options[:enforce_https]
|
|
84
|
+
|
|
85
|
+
add_failure("#{@link.url.raw_attribute} is not an HTTPS link", line: @link.line, content: @link.content)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def handle_mailto
|
|
90
|
+
if @link.url.path.empty?
|
|
91
|
+
add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line,
|
|
92
|
+
content: @link.content) unless ignore_empty_mailto?
|
|
93
|
+
elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
|
|
94
|
+
add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line,
|
|
95
|
+
content: @link.content)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def handle_tel
|
|
100
|
+
add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line,
|
|
101
|
+
content: @link.content) if @link.url.path.empty?
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def ignore_empty_mailto?
|
|
105
|
+
@runner.options[:ignore_empty_mailto]
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Allowed elements from Subresource Integrity specification
|
|
109
|
+
# https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
|
|
110
|
+
SRI_REL_TYPES = %(stylesheet)
|
|
111
|
+
|
|
112
|
+
def check_sri
|
|
113
|
+
return unless SRI_REL_TYPES.include?(@link.node["rel"])
|
|
114
|
+
|
|
115
|
+
if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
|
|
116
|
+
add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line,
|
|
117
|
+
content: @link.content)
|
|
118
|
+
elsif blank?(@link.node["integrity"])
|
|
119
|
+
add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
|
|
120
|
+
elsif blank?(@link.node["crossorigin"])
|
|
121
|
+
add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line,
|
|
122
|
+
content: @link.content)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
private def source_tag?
|
|
127
|
+
@link.node.name == "source"
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private def anchor_tag?
|
|
131
|
+
@link.node.name == "a"
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTMLProofer
|
|
4
|
+
class Check
|
|
5
|
+
class OpenGraph < HTMLProofer::Check
|
|
6
|
+
def run
|
|
7
|
+
@html.css('meta[property="og:url"], meta[property="og:image"]').each do |node|
|
|
8
|
+
@open_graph = create_element(node)
|
|
9
|
+
|
|
10
|
+
next if @open_graph.ignore?
|
|
11
|
+
|
|
12
|
+
# does the open_graph exist?
|
|
13
|
+
if missing_content?
|
|
14
|
+
add_failure("open graph has no content attribute", line: @open_graph.line, content: @open_graph.content)
|
|
15
|
+
elsif empty_content?
|
|
16
|
+
add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
|
|
17
|
+
elsif !@open_graph.url.valid?
|
|
18
|
+
add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
|
|
19
|
+
elsif @open_graph.url.protocol_relative?
|
|
20
|
+
add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
|
|
21
|
+
line: @open_graph.line, content: @open_graph.content)
|
|
22
|
+
elsif @open_graph.url.remote?
|
|
23
|
+
add_to_external_urls(@open_graph.url, @open_graph.line)
|
|
24
|
+
else
|
|
25
|
+
add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line,
|
|
26
|
+
content: @open_graph.content) unless @open_graph.url.exists?
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
external_urls
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private def missing_content?
|
|
34
|
+
@open_graph.node["content"].nil?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private def empty_content?
|
|
38
|
+
@open_graph.node["content"].empty?
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTMLProofer
|
|
4
|
+
class Check
|
|
5
|
+
class Scripts < HTMLProofer::Check
|
|
6
|
+
def run
|
|
7
|
+
@html.css("script").each do |node|
|
|
8
|
+
@script = create_element(node)
|
|
9
|
+
|
|
10
|
+
next if @script.ignore?
|
|
11
|
+
next unless @script.content.strip.empty?
|
|
12
|
+
|
|
13
|
+
# does the script exist?
|
|
14
|
+
if missing_src?
|
|
15
|
+
add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
|
|
16
|
+
elsif @script.url.protocol_relative?
|
|
17
|
+
add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
|
|
18
|
+
line: @script.line, content: @script.content)
|
|
19
|
+
elsif @script.url.remote?
|
|
20
|
+
add_to_external_urls(@script.url, @script.line)
|
|
21
|
+
check_sri if @runner.check_sri?
|
|
22
|
+
elsif !@script.url.exists?
|
|
23
|
+
add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
|
|
24
|
+
content: @script.content)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
external_urls
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def missing_src?
|
|
32
|
+
@script.node["src"].nil?
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def check_sri
|
|
36
|
+
if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
|
|
37
|
+
add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line,
|
|
38
|
+
content: @script.content)
|
|
39
|
+
elsif blank?(@script.node["integrity"])
|
|
40
|
+
add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line,
|
|
41
|
+
content: @script.content)
|
|
42
|
+
elsif blank?(@script.node["crossorigin"])
|
|
43
|
+
add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line,
|
|
44
|
+
content: @script.content)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|