html-proofer 3.19.4 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/bin/htmlproofer +44 -59
  3. data/lib/html-proofer.rb +1 -54
  4. data/lib/html_proofer/attribute/url.rb +251 -0
  5. data/lib/html_proofer/attribute.rb +15 -0
  6. data/lib/html_proofer/cache.rb +292 -0
  7. data/lib/html_proofer/check/favicon.rb +43 -0
  8. data/lib/html_proofer/check/images.rb +99 -0
  9. data/lib/html_proofer/check/links.rb +135 -0
  10. data/lib/html_proofer/check/open_graph.rb +42 -0
  11. data/lib/html_proofer/check/scripts.rb +49 -0
  12. data/lib/html_proofer/check.rb +94 -0
  13. data/lib/html_proofer/configuration.rb +91 -0
  14. data/lib/html_proofer/element.rb +144 -0
  15. data/lib/html_proofer/failure.rb +17 -0
  16. data/lib/{html-proofer → html_proofer}/log.rb +19 -19
  17. data/lib/html_proofer/reporter/cli.rb +33 -0
  18. data/lib/html_proofer/reporter.rb +23 -0
  19. data/lib/html_proofer/runner.rb +244 -0
  20. data/lib/html_proofer/url_validator/external.rb +193 -0
  21. data/lib/html_proofer/url_validator/internal.rb +97 -0
  22. data/lib/html_proofer/url_validator.rb +16 -0
  23. data/lib/{html-proofer → html_proofer}/utils.rb +9 -12
  24. data/lib/{html-proofer → html_proofer}/version.rb +1 -1
  25. data/lib/html_proofer/xpath_functions.rb +10 -0
  26. data/lib/html_proofer.rb +59 -0
  27. metadata +42 -22
  28. data/lib/html-proofer/cache.rb +0 -194
  29. data/lib/html-proofer/check/favicon.rb +0 -29
  30. data/lib/html-proofer/check/html.rb +0 -37
  31. data/lib/html-proofer/check/images.rb +0 -48
  32. data/lib/html-proofer/check/links.rb +0 -182
  33. data/lib/html-proofer/check/opengraph.rb +0 -46
  34. data/lib/html-proofer/check/scripts.rb +0 -42
  35. data/lib/html-proofer/check.rb +0 -75
  36. data/lib/html-proofer/configuration.rb +0 -88
  37. data/lib/html-proofer/element.rb +0 -265
  38. data/lib/html-proofer/issue.rb +0 -65
  39. data/lib/html-proofer/middleware.rb +0 -82
  40. data/lib/html-proofer/runner.rb +0 -249
  41. data/lib/html-proofer/url_validator.rb +0 -237
@@ -0,0 +1,292 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module HTMLProofer
8
+ class Cache
9
+ include HTMLProofer::Utils
10
+
11
+ CACHE_VERSION = 2
12
+
13
+ DEFAULT_STORAGE_DIR = File.join("tmp", ".htmlproofer")
14
+ DEFAULT_CACHE_FILE_NAME = "cache.json"
15
+
16
+ URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
17
+
18
+ attr_reader :exists, :cache_log, :storage_dir, :cache_file
19
+
20
+ def initialize(runner, options)
21
+ @runner = runner
22
+ @logger = @runner.logger
23
+
24
+ @cache_datetime = Time.now
25
+ @cache_time = @cache_datetime.to_time
26
+
27
+ if blank?(options)
28
+ define_singleton_method(:enabled?) { false }
29
+ define_singleton_method(:external_enabled?) { false }
30
+ define_singleton_method(:internal_enabled?) { false }
31
+ else
32
+ # we still consider the cache as enabled, regardless of the specic timeframes
33
+ define_singleton_method(:enabled?) { true }
34
+ setup_cache!(options)
35
+
36
+ @external_timeframe = parsed_timeframe(options[:timeframe][:external])
37
+ define_singleton_method(:external_enabled?) { !@external_timeframe.nil? }
38
+ @internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
39
+ define_singleton_method(:internal_enabled?) { !@internal_timeframe.nil? }
40
+ end
41
+ end
42
+
43
+ def parsed_timeframe(timeframe)
44
+ return nil if timeframe.nil?
45
+
46
+ time, date = timeframe.match(/(\d+)(\D)/).captures
47
+ time = time.to_i
48
+ case date
49
+ when "M"
50
+ time_ago(time, :months)
51
+ when "w"
52
+ time_ago(time, :weeks)
53
+ when "d"
54
+ time_ago(time, :days)
55
+ when "h"
56
+ time_ago(time, :hours)
57
+ else
58
+ raise ArgumentError, "#{date} is not a valid timeframe!"
59
+ end
60
+ end
61
+
62
+ def add_internal(url, metadata, found)
63
+ return unless internal_enabled?
64
+
65
+ @cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil?
66
+
67
+ @cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found)
68
+ end
69
+
70
+ def add_external(url, filenames, status_code, msg, found)
71
+ return unless external_enabled?
72
+
73
+ clean_url = cleaned_url(url)
74
+ @cache_log[:external][clean_url] =
75
+ { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
76
+ end
77
+
78
+ def detect_url_changes(urls_detected, type)
79
+ determine_deletions(urls_detected, type)
80
+
81
+ additions = determine_additions(urls_detected, type)
82
+
83
+ additions
84
+ end
85
+
86
+ def write
87
+ return unless enabled?
88
+
89
+ File.write(@cache_file, @cache_log.to_json)
90
+ end
91
+
92
+ def retrieve_urls(urls_detected, type)
93
+ # if there are no urls, bail
94
+ return {} if urls_detected.empty?
95
+
96
+ urls_detected = urls_detected.transform_keys do |url|
97
+ cleaned_url(url)
98
+ end
99
+
100
+ urls_to_check = detect_url_changes(urls_detected, type)
101
+
102
+ urls_to_check
103
+ end
104
+
105
+ def within_external_timeframe?(time)
106
+ within_timeframe?(time, @external_timeframe)
107
+ end
108
+
109
+ def within_internal_timeframe?(time)
110
+ within_timeframe?(time, @internal_timeframe)
111
+ end
112
+
113
+ def empty?
114
+ blank?(@cache_log) || (@cache_log[:internal].empty? && @cache_log[:external].empty?)
115
+ end
116
+
117
+ def size(type)
118
+ @cache_log[type].size
119
+ end
120
+
121
+ private def construct_internal_link_metadata(metadata, found)
122
+ {
123
+ source: metadata[:source],
124
+ filename: metadata[:filename],
125
+ line: metadata[:line],
126
+ base_url: metadata[:base_url],
127
+ found: found,
128
+ }
129
+ end
130
+
131
+ # prepare to add new URLs detected
132
+ private def determine_additions(urls_detected, type)
133
+ additions = type == :external ? determine_external_additions(urls_detected) : determine_internal_additions(urls_detected)
134
+
135
+ new_link_count = additions.length
136
+ new_link_text = pluralize(new_link_count, "new #{type} link", "new #{type} links")
137
+ @logger.log(:debug, "Adding #{new_link_text} to the cache")
138
+
139
+ additions
140
+ end
141
+
142
+ private def determine_external_additions(urls_detected)
143
+ urls_detected.reject do |url, _metadata|
144
+ if @cache_log[:external].include?(url)
145
+ found = @cache_log[:external][url][:found] # if this is false, we're trying again
146
+ unless found
147
+ @logger.log(:debug, "Adding #{url} to external cache (not found)")
148
+ end
149
+ found
150
+ else
151
+ @logger.log(:debug, "Adding #{url} to external cache")
152
+ false
153
+ end
154
+ end
155
+ end
156
+
157
+ private def determine_internal_additions(urls_detected)
158
+ urls_detected.each_with_object({}) do |(url, detected_metadata), hsh|
159
+ # url is not even in cache
160
+ if @cache_log[:internal][url].nil?
161
+ @logger.log(:debug, "Adding #{url} to internal cache")
162
+ hsh[url] = detected_metadata
163
+ next
164
+ end
165
+
166
+ # detect metadata additions
167
+ # NOTE: the time-stamp for the whole url key will not be updated,
168
+ # so that it reflects the earliest time any of the metadata was checked
169
+ cache_metadata = @cache_log[:internal][url][:metadata]
170
+ metadata_additions = detected_metadata.reject do |detected|
171
+ existing_cache_metadata = cache_metadata.find { |cached, _| cached[:filename] == detected[:filename] }
172
+ # cache for this url, from an existing path, exists as found
173
+ found = !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
174
+ unless found
175
+ @logger.log(:debug, "Adding #{detected} to internal cache for #{url}")
176
+ end
177
+ found
178
+ end
179
+
180
+ if metadata_additions.empty?
181
+ next
182
+ end
183
+
184
+ hsh[url] = metadata_additions
185
+ # remove from the cache the detected metadata additions as they correspond to failures to be rechecked
186
+ # (this works assuming the detected url metadata have "found" set to false)
187
+ @cache_log[:internal][url][:metadata] = cache_metadata.difference(metadata_additions)
188
+ end
189
+ end
190
+
191
+ # remove from cache URLs that no longer exist
192
+ private def determine_deletions(urls_detected, type)
193
+ deletions = 0
194
+
195
+ @cache_log[type].delete_if do |url, cache|
196
+ expired_timeframe = type == :external ? !within_external_timeframe?(cache[:time]) : !within_internal_timeframe?(cache[:time])
197
+ if expired_timeframe
198
+ @logger.log(:debug, "Removing #{url} from #{type} cache (expired timeframe)")
199
+ deletions += 1
200
+ true
201
+ elsif urls_detected.include?(url)
202
+ false
203
+ elsif url_matches_type?(url, type)
204
+ @logger.log(:debug, "Removing #{url} from #{type} cache (not detected anymore)")
205
+ deletions += 1
206
+ true
207
+ end
208
+ end
209
+
210
+ del_link_text = pluralize(deletions, "outdated #{type} link", "outdated #{type} links")
211
+ @logger.log(:debug, "Removing #{del_link_text} from the cache")
212
+ end
213
+
214
+ private def setup_cache!(options)
215
+ default_structure = {
216
+ version: CACHE_VERSION,
217
+ internal: {},
218
+ external: {},
219
+ }
220
+
221
+ @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
222
+
223
+ FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)
224
+
225
+ cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME
226
+
227
+ @cache_file = File.join(storage_dir, cache_file_name)
228
+
229
+ return (@cache_log = default_structure) unless File.exist?(@cache_file)
230
+
231
+ contents = File.read(@cache_file)
232
+
233
+ return (@cache_log = default_structure) if blank?(contents)
234
+
235
+ log = JSON.parse(contents, symbolize_names: true)
236
+
237
+ old_cache = (cache_version = log[:version]).nil?
238
+ @cache_log = if old_cache # previous cache version, create a new one
239
+ default_structure
240
+ elsif cache_version != CACHE_VERSION
241
+ # if cache version is newer...do something
242
+ else
243
+ log[:internal] = log[:internal].transform_keys(&:to_s)
244
+ log[:external] = log[:external].transform_keys(&:to_s)
245
+ log
246
+ end
247
+ end
248
+
249
+ # https://github.com/rails/rails/blob/3872bc0e54d32e8bf3a6299b0bfe173d94b072fc/activesupport/lib/active_support/duration.rb#L112-L117
250
+ SECONDS_PER_HOUR = 3600
251
+ SECONDS_PER_DAY = 86400
252
+ SECONDS_PER_WEEK = 604800
253
+ SECONDS_PER_MONTH = 2629746 # 1/12 of a gregorian year
254
+
255
+ private def time_ago(measurement, unit)
256
+ case unit
257
+ when :months
258
+ @cache_datetime - (SECONDS_PER_MONTH * measurement)
259
+ when :weeks
260
+ @cache_datetime - (SECONDS_PER_WEEK * measurement)
261
+ when :days
262
+ @cache_datetime - (SECONDS_PER_DAY * measurement)
263
+ when :hours
264
+ @cache_datetime - Rational(SECONDS_PER_HOUR * measurement)
265
+ end.to_time
266
+ end
267
+
268
+ private def url_matches_type?(url, type)
269
+ return true if type == :internal && url !~ URI_REGEXP
270
+ return true if type == :external && url =~ URI_REGEXP
271
+ end
272
+
273
+ private def cleaned_url(url)
274
+ cleaned_url = escape_unescape(url)
275
+
276
+ return cleaned_url unless cleaned_url.end_with?("/", "#", "?") && cleaned_url.length > 1
277
+
278
+ cleaned_url[0..-2]
279
+ end
280
+
281
+ private def escape_unescape(url)
282
+ Addressable::URI.parse(url).normalize.to_s
283
+ end
284
+
285
+ private def within_timeframe?(current_time, parsed_timeframe)
286
+ return false if current_time.nil? || parsed_timeframe.nil?
287
+
288
+ current_time = Time.parse(current_time) if current_time.is_a?(String)
289
+ (parsed_timeframe..@cache_time).cover?(current_time)
290
+ end
291
+ end
292
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Check
5
+ class Favicon < HTMLProofer::Check
6
+ def run
7
+ found = false
8
+ @html.css("link").each do |node|
9
+ @favicon = create_element(node)
10
+
11
+ next if @favicon.ignore?
12
+
13
+ break if (found = @favicon.node["rel"].split.last.eql?("icon"))
14
+ end
15
+
16
+ return if immediate_redirect?
17
+
18
+ if found
19
+ if @favicon.url.protocol_relative?
20
+ add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
21
+ line: @favicon.line, content: @favicon.content)
22
+ elsif @favicon.url.remote?
23
+ add_to_external_urls(@favicon.url, @favicon.line)
24
+ elsif !@favicon.url.exists?
25
+ add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
26
+ content: @favicon.content)
27
+ end
28
+ else
29
+ add_failure("no favicon provided")
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ # allow any instant-redirect meta tag
36
+ def immediate_redirect?
37
+ @html.xpath("//meta[@http-equiv='refresh']").attribute("content").value.start_with?("0;")
38
+ rescue StandardError
39
+ false
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Check
5
+ class Images < HTMLProofer::Check
6
+ SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
7
+
8
+ def run
9
+ @html.css("img, source").each do |node|
10
+ @img = create_element(node)
11
+
12
+ next if @img.ignore?
13
+
14
+ # screenshot filenames should return because of terrible names
15
+ add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
16
+ content: @img.content) if terrible_filename?
17
+
18
+ # does the image exist?
19
+ if missing_src?
20
+ add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
21
+ elsif @img.url.protocol_relative?
22
+ add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
23
+ line: @img.line, content: @img.content)
24
+ elsif @img.url.remote?
25
+ add_to_external_urls(@img.url, @img.line)
26
+ elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
27
+ add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
28
+ content: @img.content)
29
+ elsif @img.multiple_srcsets? || @img.multiple_sizes?
30
+ @img.srcsets_wo_sizes.each do |srcset|
31
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
32
+
33
+ if srcset_url.protocol_relative?
34
+ add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
35
+ line: @img.line, content: @img.content)
36
+ elsif srcset_url.remote?
37
+ add_to_external_urls(srcset_url.url, @img.line)
38
+ elsif !srcset_url.exists?
39
+ add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
40
+ end
41
+ end
42
+ end
43
+
44
+ # if this is an img element, check that the alt attribute is present
45
+ if @img.img_tag? && !ignore_element?
46
+ if missing_alt_tag? && !ignore_missing_alt?
47
+ add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
48
+ content: @img.content)
49
+ elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
50
+ add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
51
+ content: @img.content)
52
+ end
53
+ end
54
+
55
+ add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
56
+ content: @img.content) if @runner.enforce_https? && @img.url.http?
57
+ end
58
+
59
+ external_urls
60
+ end
61
+
62
+ def ignore_missing_alt?
63
+ @runner.options[:ignore_missing_alt]
64
+ end
65
+
66
+ def ignore_empty_alt?
67
+ @runner.options[:ignore_empty_alt]
68
+ end
69
+
70
+ def ignore_element?
71
+ @img.url.ignore? || @img.aria_hidden?
72
+ end
73
+
74
+ def missing_alt_tag?
75
+ @img.node["alt"].nil?
76
+ end
77
+
78
+ def empty_alt_tag?
79
+ !missing_alt_tag? && @img.node["alt"].empty?
80
+ end
81
+
82
+ def empty_whitespace_alt_tag?
83
+ !missing_alt_tag? && @img.node["alt"].strip.empty?
84
+ end
85
+
86
+ def alt_all_spaces?
87
+ !missing_alt_tag? && @img.node["alt"].split.all?(" ")
88
+ end
89
+
90
+ def terrible_filename?
91
+ @img.url.to_s =~ SCREEN_SHOT_REGEX
92
+ end
93
+
94
+ def missing_src?
95
+ blank?(@img.url.to_s)
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Check
5
+ class Links < HTMLProofer::Check
6
+ def run
7
+ @html.css("a, link").each do |node|
8
+ @link = create_element(node)
9
+
10
+ next if @link.ignore?
11
+
12
+ if !allow_hash_href? && @link.node["href"] == "#"
13
+ add_failure("linking to internal hash #, which points to nowhere", line: @link.line, content: @link.content)
14
+ next
15
+ end
16
+
17
+ # is there even an href?
18
+ if blank?(@link.url.raw_attribute)
19
+ next if allow_missing_href?
20
+
21
+ add_failure("'#{@link.node.name}' tag is missing a reference", line: @link.line, content: @link.content)
22
+ next
23
+ end
24
+
25
+ # is it even a valid URL?
26
+ unless @link.url.valid?
27
+ add_failure("#{@link.href} is an invalid URL", line: @link.line, content: @link.content)
28
+ next
29
+ end
30
+
31
+ if @link.url.protocol_relative?
32
+ add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
33
+ line: @link.line, content: @link.content)
34
+ next
35
+ end
36
+
37
+ check_schemes
38
+
39
+ # intentionally down here because we still want valid? & missing_href? to execute
40
+ next if @link.url.non_http_remote?
41
+
42
+ if !@link.url.internal? && @link.url.remote?
43
+ check_sri if @runner.check_sri? && @link.link_tag?
44
+
45
+ # we need to skip these for now; although the domain main be valid,
46
+ # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
47
+ next if @link.node["rel"] == "dns-prefetch"
48
+
49
+ unless @link.url.path?
50
+ add_failure("#{@link.url.raw_attribute} is an invalid URL", line: @link.line, content: @link.content)
51
+ next
52
+ end
53
+
54
+ add_to_external_urls(@link.url, @link.line)
55
+ elsif @link.url.internal?
56
+ # does the local directory have a trailing slash?
57
+ if @link.url.unslashed_directory?(@link.url.absolute_path)
58
+ add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
59
+ line: @link.line, content: @link.content)
60
+ next
61
+ end
62
+
63
+ add_to_internal_urls(@link.url, @link.line)
64
+ end
65
+ end
66
+ end
67
+
68
+ def allow_missing_href?
69
+ @runner.options[:allow_missing_href]
70
+ end
71
+
72
+ def allow_hash_href?
73
+ @runner.options[:allow_hash_href]
74
+ end
75
+
76
+ def check_schemes
77
+ case @link.url.scheme
78
+ when "mailto"
79
+ handle_mailto
80
+ when "tel"
81
+ handle_tel
82
+ when "http"
83
+ return unless @runner.options[:enforce_https]
84
+
85
+ add_failure("#{@link.url.raw_attribute} is not an HTTPS link", line: @link.line, content: @link.content)
86
+ end
87
+ end
88
+
89
+ def handle_mailto
90
+ if @link.url.path.empty?
91
+ add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line,
92
+ content: @link.content) unless ignore_empty_mailto?
93
+ elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
94
+ add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line,
95
+ content: @link.content)
96
+ end
97
+ end
98
+
99
+ def handle_tel
100
+ add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line,
101
+ content: @link.content) if @link.url.path.empty?
102
+ end
103
+
104
+ def ignore_empty_mailto?
105
+ @runner.options[:ignore_empty_mailto]
106
+ end
107
+
108
+ # Allowed elements from Subresource Integrity specification
109
+ # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
110
+ SRI_REL_TYPES = %(stylesheet)
111
+
112
+ def check_sri
113
+ return unless SRI_REL_TYPES.include?(@link.node["rel"])
114
+
115
+ if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
116
+ add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line,
117
+ content: @link.content)
118
+ elsif blank?(@link.node["integrity"])
119
+ add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
120
+ elsif blank?(@link.node["crossorigin"])
121
+ add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line,
122
+ content: @link.content)
123
+ end
124
+ end
125
+
126
+ private def source_tag?
127
+ @link.node.name == "source"
128
+ end
129
+
130
+ private def anchor_tag?
131
+ @link.node.name == "a"
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Check
5
+ class OpenGraph < HTMLProofer::Check
6
+ def run
7
+ @html.css('meta[property="og:url"], meta[property="og:image"]').each do |node|
8
+ @open_graph = create_element(node)
9
+
10
+ next if @open_graph.ignore?
11
+
12
+ # does the open_graph exist?
13
+ if missing_content?
14
+ add_failure("open graph has no content attribute", line: @open_graph.line, content: @open_graph.content)
15
+ elsif empty_content?
16
+ add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
17
+ elsif !@open_graph.url.valid?
18
+ add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
19
+ elsif @open_graph.url.protocol_relative?
20
+ add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
21
+ line: @open_graph.line, content: @open_graph.content)
22
+ elsif @open_graph.url.remote?
23
+ add_to_external_urls(@open_graph.url, @open_graph.line)
24
+ else
25
+ add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line,
26
+ content: @open_graph.content) unless @open_graph.url.exists?
27
+ end
28
+ end
29
+
30
+ external_urls
31
+ end
32
+
33
+ private def missing_content?
34
+ @open_graph.node["content"].nil?
35
+ end
36
+
37
+ private def empty_content?
38
+ @open_graph.node["content"].empty?
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Check
5
+ class Scripts < HTMLProofer::Check
6
+ def run
7
+ @html.css("script").each do |node|
8
+ @script = create_element(node)
9
+
10
+ next if @script.ignore?
11
+ next unless @script.content.strip.empty?
12
+
13
+ # does the script exist?
14
+ if missing_src?
15
+ add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
16
+ elsif @script.url.protocol_relative?
17
+ add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
18
+ line: @script.line, content: @script.content)
19
+ elsif @script.url.remote?
20
+ add_to_external_urls(@script.url, @script.line)
21
+ check_sri if @runner.check_sri?
22
+ elsif !@script.url.exists?
23
+ add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
24
+ content: @script.content)
25
+ end
26
+ end
27
+
28
+ external_urls
29
+ end
30
+
31
+ def missing_src?
32
+ @script.node["src"].nil?
33
+ end
34
+
35
+ def check_sri
36
+ if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
37
+ add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line,
38
+ content: @script.content)
39
+ elsif blank?(@script.node["integrity"])
40
+ add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line,
41
+ content: @script.content)
42
+ elsif blank?(@script.node["crossorigin"])
43
+ add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line,
44
+ content: @script.content)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end