html-proofer 3.19.4 → 4.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproofer +30 -57
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +231 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +234 -0
- data/lib/html_proofer/check/favicon.rb +35 -0
- data/lib/html_proofer/check/images.rb +62 -0
- data/lib/html_proofer/check/links.rb +118 -0
- data/lib/html_proofer/check/open_graph.rb +34 -0
- data/lib/html_proofer/check/scripts.rb +38 -0
- data/lib/html_proofer/check.rb +91 -0
- data/lib/{html-proofer → html_proofer}/configuration.rb +30 -31
- data/lib/html_proofer/element.rb +122 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +0 -0
- data/lib/html_proofer/reporter/cli.rb +29 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +245 -0
- data/lib/html_proofer/url_validator/external.rb +189 -0
- data/lib/html_proofer/url_validator/internal.rb +86 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +5 -8
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +56 -0
- metadata +46 -27
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/element.rb +0 -265
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -249
- data/lib/html-proofer/url_validator.rb +0 -237
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05
|
11
|
+
date: 2022-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
47
|
+
version: '1.12'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1.
|
54
|
+
version: '1.12'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: parallel
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '2.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: zeitwerk
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '2.5'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '2.5'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: awesome_print
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -123,7 +137,7 @@ dependencies:
|
|
123
137
|
- !ruby/object:Gem::Version
|
124
138
|
version: '0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
140
|
+
name: debug
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
128
142
|
requirements:
|
129
143
|
- - ">="
|
@@ -193,7 +207,7 @@ dependencies:
|
|
193
207
|
- !ruby/object:Gem::Version
|
194
208
|
version: '0'
|
195
209
|
- !ruby/object:Gem::Dependency
|
196
|
-
name: rubocop-
|
210
|
+
name: rubocop-rspec
|
197
211
|
requirement: !ruby/object:Gem::Requirement
|
198
212
|
requirements:
|
199
213
|
- - ">="
|
@@ -258,28 +272,33 @@ extra_rdoc_files: []
|
|
258
272
|
files:
|
259
273
|
- bin/htmlproofer
|
260
274
|
- lib/html-proofer.rb
|
261
|
-
- lib/
|
262
|
-
- lib/
|
263
|
-
- lib/
|
264
|
-
- lib/
|
265
|
-
- lib/
|
266
|
-
- lib/
|
267
|
-
- lib/
|
268
|
-
- lib/
|
269
|
-
- lib/
|
270
|
-
- lib/
|
271
|
-
- lib/
|
272
|
-
- lib/
|
273
|
-
- lib/
|
274
|
-
- lib/
|
275
|
-
- lib/
|
276
|
-
- lib/
|
277
|
-
- lib/
|
275
|
+
- lib/html_proofer.rb
|
276
|
+
- lib/html_proofer/attribute.rb
|
277
|
+
- lib/html_proofer/attribute/url.rb
|
278
|
+
- lib/html_proofer/cache.rb
|
279
|
+
- lib/html_proofer/check.rb
|
280
|
+
- lib/html_proofer/check/favicon.rb
|
281
|
+
- lib/html_proofer/check/images.rb
|
282
|
+
- lib/html_proofer/check/links.rb
|
283
|
+
- lib/html_proofer/check/open_graph.rb
|
284
|
+
- lib/html_proofer/check/scripts.rb
|
285
|
+
- lib/html_proofer/configuration.rb
|
286
|
+
- lib/html_proofer/element.rb
|
287
|
+
- lib/html_proofer/failure.rb
|
288
|
+
- lib/html_proofer/log.rb
|
289
|
+
- lib/html_proofer/reporter.rb
|
290
|
+
- lib/html_proofer/reporter/cli.rb
|
291
|
+
- lib/html_proofer/runner.rb
|
292
|
+
- lib/html_proofer/url_validator.rb
|
293
|
+
- lib/html_proofer/url_validator/external.rb
|
294
|
+
- lib/html_proofer/url_validator/internal.rb
|
295
|
+
- lib/html_proofer/utils.rb
|
296
|
+
- lib/html_proofer/version.rb
|
297
|
+
- lib/html_proofer/xpath_functions.rb
|
278
298
|
homepage: https://github.com/gjtorikian/html-proofer
|
279
299
|
licenses:
|
280
300
|
- MIT
|
281
301
|
metadata:
|
282
|
-
funding_uri: https://github.com/sponsors/gjtorikian/
|
283
302
|
rubygems_mfa_required: 'true'
|
284
303
|
post_install_message:
|
285
304
|
rdoc_options: []
|
@@ -295,11 +314,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
295
314
|
version: '4.0'
|
296
315
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
297
316
|
requirements:
|
298
|
-
- - "
|
317
|
+
- - ">"
|
299
318
|
- !ruby/object:Gem::Version
|
300
|
-
version:
|
319
|
+
version: 1.3.1
|
301
320
|
requirements: []
|
302
|
-
rubygems_version: 3.3.
|
321
|
+
rubygems_version: 3.3.3
|
303
322
|
signing_key:
|
304
323
|
specification_version: 4
|
305
324
|
summary: A set of tests to validate your HTML output. These tests check if your image
|
data/lib/html-proofer/cache.rb
DELETED
@@ -1,194 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'utils'
|
4
|
-
require 'date'
|
5
|
-
require 'json'
|
6
|
-
require 'uri'
|
7
|
-
|
8
|
-
module HTMLProofer
|
9
|
-
class Cache
|
10
|
-
include HTMLProofer::Utils
|
11
|
-
|
12
|
-
DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
|
13
|
-
DEFAULT_CACHE_FILE_NAME = 'cache.log'
|
14
|
-
|
15
|
-
URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
16
|
-
|
17
|
-
attr_reader :exists, :cache_log, :storage_dir, :cache_file
|
18
|
-
|
19
|
-
def initialize(logger, options)
|
20
|
-
@logger = logger
|
21
|
-
@cache_log = {}
|
22
|
-
|
23
|
-
@cache_datetime = DateTime.now
|
24
|
-
@cache_time = @cache_datetime.to_time
|
25
|
-
|
26
|
-
if options.nil? || options.empty?
|
27
|
-
define_singleton_method('use_cache?') { false }
|
28
|
-
else
|
29
|
-
define_singleton_method('use_cache?') { true }
|
30
|
-
setup_cache!(options)
|
31
|
-
@parsed_timeframe = parsed_timeframe(options[:timeframe])
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def within_timeframe?(time)
|
36
|
-
return false if time.nil?
|
37
|
-
|
38
|
-
(@parsed_timeframe..@cache_time).cover?(Time.parse(time))
|
39
|
-
end
|
40
|
-
|
41
|
-
def urls
|
42
|
-
@cache_log['urls'] || []
|
43
|
-
end
|
44
|
-
|
45
|
-
def size
|
46
|
-
@cache_log.length
|
47
|
-
end
|
48
|
-
|
49
|
-
def parsed_timeframe(timeframe)
|
50
|
-
time, date = timeframe.match(/(\d+)(\D)/).captures
|
51
|
-
time = time.to_i
|
52
|
-
case date
|
53
|
-
when 'M'
|
54
|
-
time_ago(time, :months)
|
55
|
-
when 'w'
|
56
|
-
time_ago(time, :weeks)
|
57
|
-
when 'd'
|
58
|
-
time_ago(time, :days)
|
59
|
-
when 'h'
|
60
|
-
time_ago(time, :hours)
|
61
|
-
else
|
62
|
-
raise ArgumentError, "#{date} is not a valid timeframe!"
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def add(url, filenames, status, msg = '')
|
67
|
-
return unless use_cache?
|
68
|
-
|
69
|
-
data = {
|
70
|
-
time: @cache_time,
|
71
|
-
filenames: filenames,
|
72
|
-
status: status,
|
73
|
-
message: msg
|
74
|
-
}
|
75
|
-
|
76
|
-
@cache_log[clean_url(url)] = data
|
77
|
-
end
|
78
|
-
|
79
|
-
def detect_url_changes(found, type)
|
80
|
-
found_urls = found.keys.map { |url| clean_url(url) }
|
81
|
-
|
82
|
-
# if there were no urls, bail
|
83
|
-
return {} if found_urls.empty?
|
84
|
-
|
85
|
-
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
86
|
-
|
87
|
-
# prepare to add new URLs detected
|
88
|
-
additions = found.reject do |url, _|
|
89
|
-
url = clean_url(url)
|
90
|
-
if existing_urls.include?(url)
|
91
|
-
true
|
92
|
-
else
|
93
|
-
@logger.log :debug, "Adding #{url} to cache check"
|
94
|
-
false
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
new_link_count = additions.length
|
99
|
-
new_link_text = pluralize(new_link_count, 'link', 'links')
|
100
|
-
@logger.log :info, "Adding #{new_link_text} to the cache..."
|
101
|
-
|
102
|
-
# remove from cache URLs that no longer exist
|
103
|
-
deletions = 0
|
104
|
-
@cache_log.delete_if do |url, _|
|
105
|
-
url = clean_url(url)
|
106
|
-
|
107
|
-
if found_urls.include?(url)
|
108
|
-
false
|
109
|
-
elsif url_matches_type?(url, type)
|
110
|
-
@logger.log :debug, "Removing #{url} from cache check"
|
111
|
-
deletions += 1
|
112
|
-
true
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
del_link_text = pluralize(deletions, 'link', 'links')
|
117
|
-
@logger.log :info, "Removing #{del_link_text} from the cache..."
|
118
|
-
|
119
|
-
additions
|
120
|
-
end
|
121
|
-
|
122
|
-
# TODO: Garbage performance--both the external and internal
|
123
|
-
# caches need access to this file. Write a proper versioned
|
124
|
-
# schema in the future
|
125
|
-
def write
|
126
|
-
File.write(cache_file, @cache_log.to_json)
|
127
|
-
end
|
128
|
-
|
129
|
-
def load?
|
130
|
-
@load.nil?
|
131
|
-
end
|
132
|
-
|
133
|
-
def retrieve_urls(urls, type)
|
134
|
-
urls_to_check = detect_url_changes(urls, type)
|
135
|
-
|
136
|
-
@cache_log.each_pair do |url, cache|
|
137
|
-
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
138
|
-
|
139
|
-
if url_matches_type?(url, type)
|
140
|
-
urls_to_check[url] = cache['filenames'] # recheck expired links
|
141
|
-
end
|
142
|
-
end
|
143
|
-
urls_to_check
|
144
|
-
end
|
145
|
-
|
146
|
-
# FIXME: it seems that Typhoeus actually acts on escaped URLs,
|
147
|
-
# but there's no way to get at that information, and the cache
|
148
|
-
# stores unescaped URLs. Because of this, some links, such as
|
149
|
-
# github.com/search/issues?q=is:open+is:issue+fig are not matched
|
150
|
-
# as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
|
151
|
-
def unescape_url(url)
|
152
|
-
Addressable::URI.unescape(url)
|
153
|
-
end
|
154
|
-
|
155
|
-
def clean_url(url)
|
156
|
-
unescape_url(url)
|
157
|
-
end
|
158
|
-
|
159
|
-
def setup_cache!(options)
|
160
|
-
@storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
|
161
|
-
|
162
|
-
FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)
|
163
|
-
|
164
|
-
cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME
|
165
|
-
|
166
|
-
@cache_file = File.join(storage_dir, cache_file_name)
|
167
|
-
|
168
|
-
return unless File.exist?(@cache_file)
|
169
|
-
|
170
|
-
contents = File.read(@cache_file)
|
171
|
-
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
172
|
-
end
|
173
|
-
|
174
|
-
private
|
175
|
-
|
176
|
-
def time_ago(measurement, unit)
|
177
|
-
case unit
|
178
|
-
when :months
|
179
|
-
@cache_datetime >> -measurement
|
180
|
-
when :weeks
|
181
|
-
@cache_datetime - (measurement * 7)
|
182
|
-
when :days
|
183
|
-
@cache_datetime - measurement
|
184
|
-
when :hours
|
185
|
-
@cache_datetime - Rational(measurement / 24.0)
|
186
|
-
end.to_time
|
187
|
-
end
|
188
|
-
|
189
|
-
def url_matches_type?(url, type)
|
190
|
-
return true if type == :internal && url !~ URI_REGEXP
|
191
|
-
return true if type == :external && url =~ URI_REGEXP
|
192
|
-
end
|
193
|
-
end
|
194
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class FaviconCheck < ::HTMLProofer::Check
|
4
|
-
def run
|
5
|
-
found = false
|
6
|
-
@html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |node|
|
7
|
-
favicon = create_element(node)
|
8
|
-
next if favicon.ignore?
|
9
|
-
|
10
|
-
found = true if favicon.rel.split.last.eql? 'icon'
|
11
|
-
break if found
|
12
|
-
end
|
13
|
-
|
14
|
-
return if found
|
15
|
-
|
16
|
-
return if immediate_redirect?
|
17
|
-
|
18
|
-
add_issue('no favicon specified')
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
# allow any instant-redirect meta tag
|
24
|
-
def immediate_redirect?
|
25
|
-
@html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
|
26
|
-
rescue StandardError
|
27
|
-
false
|
28
|
-
end
|
29
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class HtmlCheck < ::HTMLProofer::Check
|
4
|
-
# tags embedded in scripts are used in templating languages: http://git.io/vOovv
|
5
|
-
SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
|
6
|
-
INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
|
7
|
-
INVALID_PREFIX = /Namespace prefix/.freeze
|
8
|
-
PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
|
9
|
-
DOCTYPE_MSG = /Expected a doctype token/.freeze
|
10
|
-
EOF_IN_TAG = /End of input in tag/.freeze
|
11
|
-
MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
|
12
|
-
|
13
|
-
def run
|
14
|
-
@html.errors.each do |error|
|
15
|
-
add_issue(error.message, line: error.line) if report?(error.message)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def report?(message)
|
20
|
-
case message
|
21
|
-
when SCRIPT_EMBEDS_MSG
|
22
|
-
options[:validation][:report_script_embeds]
|
23
|
-
when INVALID_TAG_MSG, INVALID_PREFIX
|
24
|
-
options[:validation][:report_invalid_tags]
|
25
|
-
when PARSE_ENTITY_REF
|
26
|
-
options[:validation][:report_missing_names]
|
27
|
-
when DOCTYPE_MSG
|
28
|
-
options[:validation][:report_missing_doctype]
|
29
|
-
when EOF_IN_TAG
|
30
|
-
options[:validation][:report_eof_tags]
|
31
|
-
when MISMATCHED_TAGS
|
32
|
-
options[:validation][:report_mismatched_tags]
|
33
|
-
else
|
34
|
-
true
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class ImageCheck < ::HTMLProofer::Check
|
4
|
-
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
|
5
|
-
|
6
|
-
def empty_alt_tag?
|
7
|
-
@img.alt.nil? || @img.alt.strip.empty?
|
8
|
-
end
|
9
|
-
|
10
|
-
def terrible_filename?
|
11
|
-
@img.url =~ SCREEN_SHOT_REGEX
|
12
|
-
end
|
13
|
-
|
14
|
-
def missing_src?
|
15
|
-
blank?(@img.url)
|
16
|
-
end
|
17
|
-
|
18
|
-
def run
|
19
|
-
@html.css('img').each do |node|
|
20
|
-
@img = create_element(node)
|
21
|
-
line = node.line
|
22
|
-
content = node.content
|
23
|
-
|
24
|
-
next if @img.ignore?
|
25
|
-
|
26
|
-
# screenshot filenames should return because of terrible names
|
27
|
-
if terrible_filename?
|
28
|
-
add_issue("image has a terrible filename (#{@img.url})", line: line, content: content)
|
29
|
-
next
|
30
|
-
end
|
31
|
-
|
32
|
-
# does the image exist?
|
33
|
-
if missing_src?
|
34
|
-
add_issue('image has no src or srcset attribute', line: line, content: content)
|
35
|
-
elsif @img.remote?
|
36
|
-
add_to_external_urls(@img.url)
|
37
|
-
elsif !@img.exists?
|
38
|
-
add_issue("internal image #{@img.url} does not exist", line: line, content: content)
|
39
|
-
end
|
40
|
-
|
41
|
-
add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content) if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
|
42
|
-
|
43
|
-
add_issue("image #{@img.url} uses the http scheme", line: line, content: content) if @img.check_img_http? && @img.scheme == 'http'
|
44
|
-
end
|
45
|
-
|
46
|
-
external_urls
|
47
|
-
end
|
48
|
-
end
|
@@ -1,182 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class LinkCheck < ::HTMLProofer::Check
|
4
|
-
include HTMLProofer::Utils
|
5
|
-
|
6
|
-
def missing_href?
|
7
|
-
return blank?(@link.src) if @node.name == 'source'
|
8
|
-
|
9
|
-
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
10
|
-
end
|
11
|
-
|
12
|
-
def placeholder?
|
13
|
-
(!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
|
14
|
-
end
|
15
|
-
|
16
|
-
def run
|
17
|
-
@html.css('a, link, source').each do |node|
|
18
|
-
@link = create_element(node)
|
19
|
-
line = node.line
|
20
|
-
content = node.to_s
|
21
|
-
|
22
|
-
next if @link.ignore?
|
23
|
-
|
24
|
-
next if placeholder?
|
25
|
-
next if @link.allow_hash_href? && @link.href == '#'
|
26
|
-
|
27
|
-
# is it even a valid URL?
|
28
|
-
unless @link.valid?
|
29
|
-
add_issue("#{@link.href} is an invalid URL", line: line, content: content)
|
30
|
-
next
|
31
|
-
end
|
32
|
-
|
33
|
-
check_schemes(@link, line, content)
|
34
|
-
|
35
|
-
# is there even an href?
|
36
|
-
if missing_href?
|
37
|
-
next if @link.allow_missing_href?
|
38
|
-
# HTML5 allows dropping the href: http://git.io/vBX0z
|
39
|
-
next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)
|
40
|
-
|
41
|
-
add_issue('anchor has no href attribute', line: line, content: content)
|
42
|
-
next
|
43
|
-
end
|
44
|
-
|
45
|
-
# intentionally here because we still want valid? & missing_href? to execute
|
46
|
-
next if @link.non_http_remote?
|
47
|
-
|
48
|
-
if !@link.href&.start_with?('#') && !@link.internal? && @link.remote?
|
49
|
-
check_sri(line, content) if @link.check_sri? && node.name == 'link'
|
50
|
-
# we need to skip these for now; although the domain main be valid,
|
51
|
-
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
52
|
-
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
|
53
|
-
|
54
|
-
unless @link.path?
|
55
|
-
add_issue("#{@link.href} is an invalid URL", line: line, content: content)
|
56
|
-
next
|
57
|
-
end
|
58
|
-
|
59
|
-
add_to_external_urls(@link.href || @link.src)
|
60
|
-
next
|
61
|
-
elsif @link.internal?
|
62
|
-
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
63
|
-
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content) if !@link.exists? && !@link.hash
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
external_urls
|
68
|
-
end
|
69
|
-
|
70
|
-
def check_internal_link(link, path, line, content)
|
71
|
-
# does the local directory have a trailing slash?
|
72
|
-
if link.unslashed_directory?(link.absolute_path)
|
73
|
-
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", path: path, line: line, content: content)
|
74
|
-
return false
|
75
|
-
end
|
76
|
-
|
77
|
-
return true unless link.hash
|
78
|
-
|
79
|
-
# verify the target hash
|
80
|
-
handle_hash(link, path, line, content)
|
81
|
-
end
|
82
|
-
|
83
|
-
def check_schemes(link, line, content)
|
84
|
-
case link.scheme
|
85
|
-
when 'mailto'
|
86
|
-
handle_mailto(link, line, content)
|
87
|
-
when 'tel'
|
88
|
-
handle_tel(link, line, content)
|
89
|
-
when 'http'
|
90
|
-
return unless @options[:enforce_https]
|
91
|
-
|
92
|
-
add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
def handle_mailto(link, line, content)
|
97
|
-
if link.path.empty?
|
98
|
-
add_issue("#{link.href} contains no email address", line: line, content: content) unless link.ignore_empty_mailto?
|
99
|
-
elsif !link.path.include?('@')
|
100
|
-
add_issue("#{link.href} contains an invalid email address", line: line, content: content)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def handle_tel(link, line, content)
|
105
|
-
add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
|
106
|
-
end
|
107
|
-
|
108
|
-
def handle_hash(link, path, line, content)
|
109
|
-
if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
|
110
|
-
return add_issue("linking to internal hash ##{link.hash} that does not exist", path: path, line: line, content: content)
|
111
|
-
elsif link.external?
|
112
|
-
return external_link_check(link, line, content)
|
113
|
-
end
|
114
|
-
|
115
|
-
true
|
116
|
-
end
|
117
|
-
|
118
|
-
def external_link_check(link, line, content)
|
119
|
-
if link.exists? # rubocop:disable Style/GuardClause
|
120
|
-
target_html = create_nokogiri(link.absolute_path)
|
121
|
-
return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
|
122
|
-
else
|
123
|
-
return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
|
124
|
-
end
|
125
|
-
|
126
|
-
true
|
127
|
-
end
|
128
|
-
|
129
|
-
def hash_exists?(html, href_hash)
|
130
|
-
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
131
|
-
fragment_ids = [href_hash, decoded_href_hash]
|
132
|
-
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
133
|
-
fragment_ids.include?('top') || !find_fragments(html, fragment_ids).empty?
|
134
|
-
end
|
135
|
-
|
136
|
-
def find_fragments(html, fragment_ids)
|
137
|
-
xpaths = fragment_ids.flat_map do |frag_id|
|
138
|
-
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
|
139
|
-
[
|
140
|
-
"//*[case_sensitive_equals(@id, concat(#{escaped_frag_id}))]",
|
141
|
-
"//*[case_sensitive_equals(@name, concat(#{escaped_frag_id}))]"
|
142
|
-
]
|
143
|
-
end
|
144
|
-
xpaths << XpathFunctions.new
|
145
|
-
|
146
|
-
html.xpath(*xpaths)
|
147
|
-
end
|
148
|
-
|
149
|
-
# Whitelist for affected elements from Subresource Integrity specification
|
150
|
-
# https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
|
151
|
-
SRI_REL_TYPES = %(stylesheet)
|
152
|
-
|
153
|
-
def check_sri(line, content)
|
154
|
-
return unless SRI_REL_TYPES.include?(@link.rel)
|
155
|
-
|
156
|
-
if !defined?(@link.integrity) && !defined?(@link.crossorigin)
|
157
|
-
add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
|
158
|
-
elsif !defined?(@link.integrity)
|
159
|
-
add_issue("Integrity is missing in: #{@link.src}", line: line, content: content)
|
160
|
-
elsif !defined?(@link.crossorigin)
|
161
|
-
add_issue("CORS not provided for external resource in: #{@link.src}", line: line, content: content)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
class XpathFunctions
|
166
|
-
def case_sensitive_equals(node_set, str_to_match)
|
167
|
-
node_set.find_all { |node| node.to_s.== str_to_match.to_s }
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
class InternalLink
|
172
|
-
attr_reader :link, :href, :path, :line, :content
|
173
|
-
|
174
|
-
def initialize(link, path, line, content)
|
175
|
-
@link = link
|
176
|
-
@href = @link.href
|
177
|
-
@path = path
|
178
|
-
@line = line
|
179
|
-
@content = content
|
180
|
-
end
|
181
|
-
end
|
182
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class OpenGraphElement < ::HTMLProofer::Element
|
4
|
-
attr_reader :src
|
5
|
-
|
6
|
-
def initialize(obj, check, logger)
|
7
|
-
super(obj, check, logger)
|
8
|
-
# Fake up src from the content attribute
|
9
|
-
instance_variable_set('@src', @content)
|
10
|
-
|
11
|
-
@src.insert 0, 'http:' if %r{^//}.match?(@src)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
class OpenGraphCheck < ::HTMLProofer::Check
|
16
|
-
def missing_src?
|
17
|
-
!@opengraph.src
|
18
|
-
end
|
19
|
-
|
20
|
-
def empty_src?
|
21
|
-
blank?(@opengraph.src)
|
22
|
-
end
|
23
|
-
|
24
|
-
def run
|
25
|
-
@html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
|
26
|
-
@opengraph = OpenGraphElement.new(m, self, @logger)
|
27
|
-
|
28
|
-
next if @opengraph.ignore?
|
29
|
-
|
30
|
-
# does the opengraph exist?
|
31
|
-
if missing_src?
|
32
|
-
add_issue('open graph has no content attribute', line: m.line, content: m.content)
|
33
|
-
elsif empty_src?
|
34
|
-
add_issue('open graph content attribute is empty', line: m.line, content: m.content)
|
35
|
-
elsif !@opengraph.valid?
|
36
|
-
add_issue("#{@opengraph.src} is an invalid URL", line: m.line)
|
37
|
-
elsif @opengraph.remote?
|
38
|
-
add_to_external_urls(@opengraph.url)
|
39
|
-
else
|
40
|
-
add_issue("internal open graph #{@opengraph.url} does not exist", line: m.line, content: m.content) unless @opengraph.exists?
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
external_urls
|
45
|
-
end
|
46
|
-
end
|