html-proofer 3.19.4 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +30 -57
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +231 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +234 -0
- data/lib/html_proofer/check/favicon.rb +35 -0
- data/lib/html_proofer/check/images.rb +62 -0
- data/lib/html_proofer/check/links.rb +118 -0
- data/lib/html_proofer/check/open_graph.rb +34 -0
- data/lib/html_proofer/check/scripts.rb +38 -0
- data/lib/html_proofer/check.rb +91 -0
- data/lib/{html-proofer → html_proofer}/configuration.rb +30 -31
- data/lib/html_proofer/element.rb +122 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +0 -0
- data/lib/html_proofer/reporter/cli.rb +29 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +245 -0
- data/lib/html_proofer/url_validator/external.rb +189 -0
- data/lib/html_proofer/url_validator/internal.rb +86 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +5 -8
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +56 -0
- metadata +46 -27
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/element.rb +0 -265
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -249
- data/lib/html-proofer/url_validator.rb +0 -237
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05
|
11
|
+
date: 2022-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
47
|
+
version: '1.12'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1.
|
54
|
+
version: '1.12'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: parallel
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '2.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: zeitwerk
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '2.5'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '2.5'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: awesome_print
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -123,7 +137,7 @@ dependencies:
|
|
123
137
|
- !ruby/object:Gem::Version
|
124
138
|
version: '0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
140
|
+
name: debug
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
128
142
|
requirements:
|
129
143
|
- - ">="
|
@@ -193,7 +207,7 @@ dependencies:
|
|
193
207
|
- !ruby/object:Gem::Version
|
194
208
|
version: '0'
|
195
209
|
- !ruby/object:Gem::Dependency
|
196
|
-
name: rubocop-
|
210
|
+
name: rubocop-rspec
|
197
211
|
requirement: !ruby/object:Gem::Requirement
|
198
212
|
requirements:
|
199
213
|
- - ">="
|
@@ -258,28 +272,33 @@ extra_rdoc_files: []
|
|
258
272
|
files:
|
259
273
|
- bin/htmlproofer
|
260
274
|
- lib/html-proofer.rb
|
261
|
-
- lib/
|
262
|
-
- lib/
|
263
|
-
- lib/
|
264
|
-
- lib/
|
265
|
-
- lib/
|
266
|
-
- lib/
|
267
|
-
- lib/
|
268
|
-
- lib/
|
269
|
-
- lib/
|
270
|
-
- lib/
|
271
|
-
- lib/
|
272
|
-
- lib/
|
273
|
-
- lib/
|
274
|
-
- lib/
|
275
|
-
- lib/
|
276
|
-
- lib/
|
277
|
-
- lib/
|
275
|
+
- lib/html_proofer.rb
|
276
|
+
- lib/html_proofer/attribute.rb
|
277
|
+
- lib/html_proofer/attribute/url.rb
|
278
|
+
- lib/html_proofer/cache.rb
|
279
|
+
- lib/html_proofer/check.rb
|
280
|
+
- lib/html_proofer/check/favicon.rb
|
281
|
+
- lib/html_proofer/check/images.rb
|
282
|
+
- lib/html_proofer/check/links.rb
|
283
|
+
- lib/html_proofer/check/open_graph.rb
|
284
|
+
- lib/html_proofer/check/scripts.rb
|
285
|
+
- lib/html_proofer/configuration.rb
|
286
|
+
- lib/html_proofer/element.rb
|
287
|
+
- lib/html_proofer/failure.rb
|
288
|
+
- lib/html_proofer/log.rb
|
289
|
+
- lib/html_proofer/reporter.rb
|
290
|
+
- lib/html_proofer/reporter/cli.rb
|
291
|
+
- lib/html_proofer/runner.rb
|
292
|
+
- lib/html_proofer/url_validator.rb
|
293
|
+
- lib/html_proofer/url_validator/external.rb
|
294
|
+
- lib/html_proofer/url_validator/internal.rb
|
295
|
+
- lib/html_proofer/utils.rb
|
296
|
+
- lib/html_proofer/version.rb
|
297
|
+
- lib/html_proofer/xpath_functions.rb
|
278
298
|
homepage: https://github.com/gjtorikian/html-proofer
|
279
299
|
licenses:
|
280
300
|
- MIT
|
281
301
|
metadata:
|
282
|
-
funding_uri: https://github.com/sponsors/gjtorikian/
|
283
302
|
rubygems_mfa_required: 'true'
|
284
303
|
post_install_message:
|
285
304
|
rdoc_options: []
|
@@ -295,11 +314,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
295
314
|
version: '4.0'
|
296
315
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
297
316
|
requirements:
|
298
|
-
- - "
|
317
|
+
- - ">"
|
299
318
|
- !ruby/object:Gem::Version
|
300
|
-
version:
|
319
|
+
version: 1.3.1
|
301
320
|
requirements: []
|
302
|
-
rubygems_version: 3.3.
|
321
|
+
rubygems_version: 3.3.3
|
303
322
|
signing_key:
|
304
323
|
specification_version: 4
|
305
324
|
summary: A set of tests to validate your HTML output. These tests check if your image
|
data/lib/html-proofer/cache.rb
DELETED
@@ -1,194 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'utils'
|
4
|
-
require 'date'
|
5
|
-
require 'json'
|
6
|
-
require 'uri'
|
7
|
-
|
8
|
-
module HTMLProofer
|
9
|
-
class Cache
|
10
|
-
include HTMLProofer::Utils
|
11
|
-
|
12
|
-
DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
|
13
|
-
DEFAULT_CACHE_FILE_NAME = 'cache.log'
|
14
|
-
|
15
|
-
URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
16
|
-
|
17
|
-
attr_reader :exists, :cache_log, :storage_dir, :cache_file
|
18
|
-
|
19
|
-
def initialize(logger, options)
|
20
|
-
@logger = logger
|
21
|
-
@cache_log = {}
|
22
|
-
|
23
|
-
@cache_datetime = DateTime.now
|
24
|
-
@cache_time = @cache_datetime.to_time
|
25
|
-
|
26
|
-
if options.nil? || options.empty?
|
27
|
-
define_singleton_method('use_cache?') { false }
|
28
|
-
else
|
29
|
-
define_singleton_method('use_cache?') { true }
|
30
|
-
setup_cache!(options)
|
31
|
-
@parsed_timeframe = parsed_timeframe(options[:timeframe])
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def within_timeframe?(time)
|
36
|
-
return false if time.nil?
|
37
|
-
|
38
|
-
(@parsed_timeframe..@cache_time).cover?(Time.parse(time))
|
39
|
-
end
|
40
|
-
|
41
|
-
def urls
|
42
|
-
@cache_log['urls'] || []
|
43
|
-
end
|
44
|
-
|
45
|
-
def size
|
46
|
-
@cache_log.length
|
47
|
-
end
|
48
|
-
|
49
|
-
def parsed_timeframe(timeframe)
|
50
|
-
time, date = timeframe.match(/(\d+)(\D)/).captures
|
51
|
-
time = time.to_i
|
52
|
-
case date
|
53
|
-
when 'M'
|
54
|
-
time_ago(time, :months)
|
55
|
-
when 'w'
|
56
|
-
time_ago(time, :weeks)
|
57
|
-
when 'd'
|
58
|
-
time_ago(time, :days)
|
59
|
-
when 'h'
|
60
|
-
time_ago(time, :hours)
|
61
|
-
else
|
62
|
-
raise ArgumentError, "#{date} is not a valid timeframe!"
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def add(url, filenames, status, msg = '')
|
67
|
-
return unless use_cache?
|
68
|
-
|
69
|
-
data = {
|
70
|
-
time: @cache_time,
|
71
|
-
filenames: filenames,
|
72
|
-
status: status,
|
73
|
-
message: msg
|
74
|
-
}
|
75
|
-
|
76
|
-
@cache_log[clean_url(url)] = data
|
77
|
-
end
|
78
|
-
|
79
|
-
def detect_url_changes(found, type)
|
80
|
-
found_urls = found.keys.map { |url| clean_url(url) }
|
81
|
-
|
82
|
-
# if there were no urls, bail
|
83
|
-
return {} if found_urls.empty?
|
84
|
-
|
85
|
-
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
86
|
-
|
87
|
-
# prepare to add new URLs detected
|
88
|
-
additions = found.reject do |url, _|
|
89
|
-
url = clean_url(url)
|
90
|
-
if existing_urls.include?(url)
|
91
|
-
true
|
92
|
-
else
|
93
|
-
@logger.log :debug, "Adding #{url} to cache check"
|
94
|
-
false
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
new_link_count = additions.length
|
99
|
-
new_link_text = pluralize(new_link_count, 'link', 'links')
|
100
|
-
@logger.log :info, "Adding #{new_link_text} to the cache..."
|
101
|
-
|
102
|
-
# remove from cache URLs that no longer exist
|
103
|
-
deletions = 0
|
104
|
-
@cache_log.delete_if do |url, _|
|
105
|
-
url = clean_url(url)
|
106
|
-
|
107
|
-
if found_urls.include?(url)
|
108
|
-
false
|
109
|
-
elsif url_matches_type?(url, type)
|
110
|
-
@logger.log :debug, "Removing #{url} from cache check"
|
111
|
-
deletions += 1
|
112
|
-
true
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
del_link_text = pluralize(deletions, 'link', 'links')
|
117
|
-
@logger.log :info, "Removing #{del_link_text} from the cache..."
|
118
|
-
|
119
|
-
additions
|
120
|
-
end
|
121
|
-
|
122
|
-
# TODO: Garbage performance--both the external and internal
|
123
|
-
# caches need access to this file. Write a proper versioned
|
124
|
-
# schema in the future
|
125
|
-
def write
|
126
|
-
File.write(cache_file, @cache_log.to_json)
|
127
|
-
end
|
128
|
-
|
129
|
-
def load?
|
130
|
-
@load.nil?
|
131
|
-
end
|
132
|
-
|
133
|
-
def retrieve_urls(urls, type)
|
134
|
-
urls_to_check = detect_url_changes(urls, type)
|
135
|
-
|
136
|
-
@cache_log.each_pair do |url, cache|
|
137
|
-
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
138
|
-
|
139
|
-
if url_matches_type?(url, type)
|
140
|
-
urls_to_check[url] = cache['filenames'] # recheck expired links
|
141
|
-
end
|
142
|
-
end
|
143
|
-
urls_to_check
|
144
|
-
end
|
145
|
-
|
146
|
-
# FIXME: it seems that Typhoeus actually acts on escaped URLs,
|
147
|
-
# but there's no way to get at that information, and the cache
|
148
|
-
# stores unescaped URLs. Because of this, some links, such as
|
149
|
-
# github.com/search/issues?q=is:open+is:issue+fig are not matched
|
150
|
-
# as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
|
151
|
-
def unescape_url(url)
|
152
|
-
Addressable::URI.unescape(url)
|
153
|
-
end
|
154
|
-
|
155
|
-
def clean_url(url)
|
156
|
-
unescape_url(url)
|
157
|
-
end
|
158
|
-
|
159
|
-
def setup_cache!(options)
|
160
|
-
@storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
|
161
|
-
|
162
|
-
FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)
|
163
|
-
|
164
|
-
cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME
|
165
|
-
|
166
|
-
@cache_file = File.join(storage_dir, cache_file_name)
|
167
|
-
|
168
|
-
return unless File.exist?(@cache_file)
|
169
|
-
|
170
|
-
contents = File.read(@cache_file)
|
171
|
-
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
172
|
-
end
|
173
|
-
|
174
|
-
private
|
175
|
-
|
176
|
-
def time_ago(measurement, unit)
|
177
|
-
case unit
|
178
|
-
when :months
|
179
|
-
@cache_datetime >> -measurement
|
180
|
-
when :weeks
|
181
|
-
@cache_datetime - (measurement * 7)
|
182
|
-
when :days
|
183
|
-
@cache_datetime - measurement
|
184
|
-
when :hours
|
185
|
-
@cache_datetime - Rational(measurement / 24.0)
|
186
|
-
end.to_time
|
187
|
-
end
|
188
|
-
|
189
|
-
def url_matches_type?(url, type)
|
190
|
-
return true if type == :internal && url !~ URI_REGEXP
|
191
|
-
return true if type == :external && url =~ URI_REGEXP
|
192
|
-
end
|
193
|
-
end
|
194
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class FaviconCheck < ::HTMLProofer::Check
|
4
|
-
def run
|
5
|
-
found = false
|
6
|
-
@html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |node|
|
7
|
-
favicon = create_element(node)
|
8
|
-
next if favicon.ignore?
|
9
|
-
|
10
|
-
found = true if favicon.rel.split.last.eql? 'icon'
|
11
|
-
break if found
|
12
|
-
end
|
13
|
-
|
14
|
-
return if found
|
15
|
-
|
16
|
-
return if immediate_redirect?
|
17
|
-
|
18
|
-
add_issue('no favicon specified')
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
# allow any instant-redirect meta tag
|
24
|
-
def immediate_redirect?
|
25
|
-
@html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
|
26
|
-
rescue StandardError
|
27
|
-
false
|
28
|
-
end
|
29
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class HtmlCheck < ::HTMLProofer::Check
|
4
|
-
# tags embedded in scripts are used in templating languages: http://git.io/vOovv
|
5
|
-
SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
|
6
|
-
INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
|
7
|
-
INVALID_PREFIX = /Namespace prefix/.freeze
|
8
|
-
PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
|
9
|
-
DOCTYPE_MSG = /Expected a doctype token/.freeze
|
10
|
-
EOF_IN_TAG = /End of input in tag/.freeze
|
11
|
-
MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
|
12
|
-
|
13
|
-
def run
|
14
|
-
@html.errors.each do |error|
|
15
|
-
add_issue(error.message, line: error.line) if report?(error.message)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def report?(message)
|
20
|
-
case message
|
21
|
-
when SCRIPT_EMBEDS_MSG
|
22
|
-
options[:validation][:report_script_embeds]
|
23
|
-
when INVALID_TAG_MSG, INVALID_PREFIX
|
24
|
-
options[:validation][:report_invalid_tags]
|
25
|
-
when PARSE_ENTITY_REF
|
26
|
-
options[:validation][:report_missing_names]
|
27
|
-
when DOCTYPE_MSG
|
28
|
-
options[:validation][:report_missing_doctype]
|
29
|
-
when EOF_IN_TAG
|
30
|
-
options[:validation][:report_eof_tags]
|
31
|
-
when MISMATCHED_TAGS
|
32
|
-
options[:validation][:report_mismatched_tags]
|
33
|
-
else
|
34
|
-
true
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class ImageCheck < ::HTMLProofer::Check
|
4
|
-
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
|
5
|
-
|
6
|
-
def empty_alt_tag?
|
7
|
-
@img.alt.nil? || @img.alt.strip.empty?
|
8
|
-
end
|
9
|
-
|
10
|
-
def terrible_filename?
|
11
|
-
@img.url =~ SCREEN_SHOT_REGEX
|
12
|
-
end
|
13
|
-
|
14
|
-
def missing_src?
|
15
|
-
blank?(@img.url)
|
16
|
-
end
|
17
|
-
|
18
|
-
def run
|
19
|
-
@html.css('img').each do |node|
|
20
|
-
@img = create_element(node)
|
21
|
-
line = node.line
|
22
|
-
content = node.content
|
23
|
-
|
24
|
-
next if @img.ignore?
|
25
|
-
|
26
|
-
# screenshot filenames should return because of terrible names
|
27
|
-
if terrible_filename?
|
28
|
-
add_issue("image has a terrible filename (#{@img.url})", line: line, content: content)
|
29
|
-
next
|
30
|
-
end
|
31
|
-
|
32
|
-
# does the image exist?
|
33
|
-
if missing_src?
|
34
|
-
add_issue('image has no src or srcset attribute', line: line, content: content)
|
35
|
-
elsif @img.remote?
|
36
|
-
add_to_external_urls(@img.url)
|
37
|
-
elsif !@img.exists?
|
38
|
-
add_issue("internal image #{@img.url} does not exist", line: line, content: content)
|
39
|
-
end
|
40
|
-
|
41
|
-
add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content) if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
|
42
|
-
|
43
|
-
add_issue("image #{@img.url} uses the http scheme", line: line, content: content) if @img.check_img_http? && @img.scheme == 'http'
|
44
|
-
end
|
45
|
-
|
46
|
-
external_urls
|
47
|
-
end
|
48
|
-
end
|
@@ -1,182 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class LinkCheck < ::HTMLProofer::Check
|
4
|
-
include HTMLProofer::Utils
|
5
|
-
|
6
|
-
def missing_href?
|
7
|
-
return blank?(@link.src) if @node.name == 'source'
|
8
|
-
|
9
|
-
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
10
|
-
end
|
11
|
-
|
12
|
-
def placeholder?
|
13
|
-
(!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
|
14
|
-
end
|
15
|
-
|
16
|
-
def run
|
17
|
-
@html.css('a, link, source').each do |node|
|
18
|
-
@link = create_element(node)
|
19
|
-
line = node.line
|
20
|
-
content = node.to_s
|
21
|
-
|
22
|
-
next if @link.ignore?
|
23
|
-
|
24
|
-
next if placeholder?
|
25
|
-
next if @link.allow_hash_href? && @link.href == '#'
|
26
|
-
|
27
|
-
# is it even a valid URL?
|
28
|
-
unless @link.valid?
|
29
|
-
add_issue("#{@link.href} is an invalid URL", line: line, content: content)
|
30
|
-
next
|
31
|
-
end
|
32
|
-
|
33
|
-
check_schemes(@link, line, content)
|
34
|
-
|
35
|
-
# is there even an href?
|
36
|
-
if missing_href?
|
37
|
-
next if @link.allow_missing_href?
|
38
|
-
# HTML5 allows dropping the href: http://git.io/vBX0z
|
39
|
-
next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)
|
40
|
-
|
41
|
-
add_issue('anchor has no href attribute', line: line, content: content)
|
42
|
-
next
|
43
|
-
end
|
44
|
-
|
45
|
-
# intentionally here because we still want valid? & missing_href? to execute
|
46
|
-
next if @link.non_http_remote?
|
47
|
-
|
48
|
-
if !@link.href&.start_with?('#') && !@link.internal? && @link.remote?
|
49
|
-
check_sri(line, content) if @link.check_sri? && node.name == 'link'
|
50
|
-
# we need to skip these for now; although the domain main be valid,
|
51
|
-
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
52
|
-
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
|
53
|
-
|
54
|
-
unless @link.path?
|
55
|
-
add_issue("#{@link.href} is an invalid URL", line: line, content: content)
|
56
|
-
next
|
57
|
-
end
|
58
|
-
|
59
|
-
add_to_external_urls(@link.href || @link.src)
|
60
|
-
next
|
61
|
-
elsif @link.internal?
|
62
|
-
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
63
|
-
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content) if !@link.exists? && !@link.hash
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
external_urls
|
68
|
-
end
|
69
|
-
|
70
|
-
def check_internal_link(link, path, line, content)
|
71
|
-
# does the local directory have a trailing slash?
|
72
|
-
if link.unslashed_directory?(link.absolute_path)
|
73
|
-
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", path: path, line: line, content: content)
|
74
|
-
return false
|
75
|
-
end
|
76
|
-
|
77
|
-
return true unless link.hash
|
78
|
-
|
79
|
-
# verify the target hash
|
80
|
-
handle_hash(link, path, line, content)
|
81
|
-
end
|
82
|
-
|
83
|
-
def check_schemes(link, line, content)
|
84
|
-
case link.scheme
|
85
|
-
when 'mailto'
|
86
|
-
handle_mailto(link, line, content)
|
87
|
-
when 'tel'
|
88
|
-
handle_tel(link, line, content)
|
89
|
-
when 'http'
|
90
|
-
return unless @options[:enforce_https]
|
91
|
-
|
92
|
-
add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
def handle_mailto(link, line, content)
|
97
|
-
if link.path.empty?
|
98
|
-
add_issue("#{link.href} contains no email address", line: line, content: content) unless link.ignore_empty_mailto?
|
99
|
-
elsif !link.path.include?('@')
|
100
|
-
add_issue("#{link.href} contains an invalid email address", line: line, content: content)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def handle_tel(link, line, content)
|
105
|
-
add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
|
106
|
-
end
|
107
|
-
|
108
|
-
def handle_hash(link, path, line, content)
|
109
|
-
if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
|
110
|
-
return add_issue("linking to internal hash ##{link.hash} that does not exist", path: path, line: line, content: content)
|
111
|
-
elsif link.external?
|
112
|
-
return external_link_check(link, line, content)
|
113
|
-
end
|
114
|
-
|
115
|
-
true
|
116
|
-
end
|
117
|
-
|
118
|
-
def external_link_check(link, line, content)
|
119
|
-
if link.exists? # rubocop:disable Style/GuardClause
|
120
|
-
target_html = create_nokogiri(link.absolute_path)
|
121
|
-
return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
|
122
|
-
else
|
123
|
-
return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
|
124
|
-
end
|
125
|
-
|
126
|
-
true
|
127
|
-
end
|
128
|
-
|
129
|
-
def hash_exists?(html, href_hash)
|
130
|
-
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
131
|
-
fragment_ids = [href_hash, decoded_href_hash]
|
132
|
-
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
133
|
-
fragment_ids.include?('top') || !find_fragments(html, fragment_ids).empty?
|
134
|
-
end
|
135
|
-
|
136
|
-
def find_fragments(html, fragment_ids)
|
137
|
-
xpaths = fragment_ids.flat_map do |frag_id|
|
138
|
-
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
|
139
|
-
[
|
140
|
-
"//*[case_sensitive_equals(@id, concat(#{escaped_frag_id}))]",
|
141
|
-
"//*[case_sensitive_equals(@name, concat(#{escaped_frag_id}))]"
|
142
|
-
]
|
143
|
-
end
|
144
|
-
xpaths << XpathFunctions.new
|
145
|
-
|
146
|
-
html.xpath(*xpaths)
|
147
|
-
end
|
148
|
-
|
149
|
-
# Whitelist for affected elements from Subresource Integrity specification
|
150
|
-
# https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
|
151
|
-
SRI_REL_TYPES = %(stylesheet)
|
152
|
-
|
153
|
-
def check_sri(line, content)
|
154
|
-
return unless SRI_REL_TYPES.include?(@link.rel)
|
155
|
-
|
156
|
-
if !defined?(@link.integrity) && !defined?(@link.crossorigin)
|
157
|
-
add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
|
158
|
-
elsif !defined?(@link.integrity)
|
159
|
-
add_issue("Integrity is missing in: #{@link.src}", line: line, content: content)
|
160
|
-
elsif !defined?(@link.crossorigin)
|
161
|
-
add_issue("CORS not provided for external resource in: #{@link.src}", line: line, content: content)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
class XpathFunctions
|
166
|
-
def case_sensitive_equals(node_set, str_to_match)
|
167
|
-
node_set.find_all { |node| node.to_s.== str_to_match.to_s }
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
class InternalLink
|
172
|
-
attr_reader :link, :href, :path, :line, :content
|
173
|
-
|
174
|
-
def initialize(link, path, line, content)
|
175
|
-
@link = link
|
176
|
-
@href = @link.href
|
177
|
-
@path = path
|
178
|
-
@line = line
|
179
|
-
@content = content
|
180
|
-
end
|
181
|
-
end
|
182
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class OpenGraphElement < ::HTMLProofer::Element
|
4
|
-
attr_reader :src
|
5
|
-
|
6
|
-
def initialize(obj, check, logger)
|
7
|
-
super(obj, check, logger)
|
8
|
-
# Fake up src from the content attribute
|
9
|
-
instance_variable_set('@src', @content)
|
10
|
-
|
11
|
-
@src.insert 0, 'http:' if %r{^//}.match?(@src)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
class OpenGraphCheck < ::HTMLProofer::Check
|
16
|
-
def missing_src?
|
17
|
-
!@opengraph.src
|
18
|
-
end
|
19
|
-
|
20
|
-
def empty_src?
|
21
|
-
blank?(@opengraph.src)
|
22
|
-
end
|
23
|
-
|
24
|
-
def run
|
25
|
-
@html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
|
26
|
-
@opengraph = OpenGraphElement.new(m, self, @logger)
|
27
|
-
|
28
|
-
next if @opengraph.ignore?
|
29
|
-
|
30
|
-
# does the opengraph exist?
|
31
|
-
if missing_src?
|
32
|
-
add_issue('open graph has no content attribute', line: m.line, content: m.content)
|
33
|
-
elsif empty_src?
|
34
|
-
add_issue('open graph content attribute is empty', line: m.line, content: m.content)
|
35
|
-
elsif !@opengraph.valid?
|
36
|
-
add_issue("#{@opengraph.src} is an invalid URL", line: m.line)
|
37
|
-
elsif @opengraph.remote?
|
38
|
-
add_to_external_urls(@opengraph.url)
|
39
|
-
else
|
40
|
-
add_issue("internal open graph #{@opengraph.url} does not exist", line: m.line, content: m.content) unless @opengraph.exists?
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
external_urls
|
45
|
-
end
|
46
|
-
end
|