html-proofer 3.19.4 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +30 -57
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +231 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +234 -0
- data/lib/html_proofer/check/favicon.rb +35 -0
- data/lib/html_proofer/check/images.rb +62 -0
- data/lib/html_proofer/check/links.rb +118 -0
- data/lib/html_proofer/check/open_graph.rb +34 -0
- data/lib/html_proofer/check/scripts.rb +38 -0
- data/lib/html_proofer/check.rb +91 -0
- data/lib/{html-proofer → html_proofer}/configuration.rb +30 -31
- data/lib/html_proofer/element.rb +122 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +0 -0
- data/lib/html_proofer/reporter/cli.rb +29 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +245 -0
- data/lib/html_proofer/url_validator/external.rb +189 -0
- data/lib/html_proofer/url_validator/internal.rb +86 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +5 -8
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +56 -0
- metadata +46 -27
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/element.rb +0 -265
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -249
- data/lib/html-proofer/url_validator.rb +0 -237
@@ -1,42 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class ScriptCheck < ::HTMLProofer::Check
|
4
|
-
attr_reader :src
|
5
|
-
|
6
|
-
def missing_src?
|
7
|
-
!@script.src
|
8
|
-
end
|
9
|
-
|
10
|
-
def run
|
11
|
-
@html.css('script').each do |node|
|
12
|
-
@script = create_element(node)
|
13
|
-
line = node.line
|
14
|
-
content = node.content
|
15
|
-
|
16
|
-
next if @script.ignore?
|
17
|
-
next unless node.text.strip.empty?
|
18
|
-
|
19
|
-
# does the script exist?
|
20
|
-
if missing_src?
|
21
|
-
add_issue('script is empty and has no src attribute', line: line, content: content)
|
22
|
-
elsif @script.remote?
|
23
|
-
add_to_external_urls(@script.src)
|
24
|
-
check_sri(line, content) if @script.check_sri?
|
25
|
-
elsif !@script.exists?
|
26
|
-
add_issue("internal script #{@script.src} does not exist", line: line, content: content)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
external_urls
|
31
|
-
end
|
32
|
-
|
33
|
-
def check_sri(line, content)
|
34
|
-
if !defined?(@script.integrity) && !defined?(@script.crossorigin)
|
35
|
-
add_issue("SRI and CORS not provided in: #{@script.src}", line: line, content: content)
|
36
|
-
elsif !defined?(@script.integrity)
|
37
|
-
add_issue("Integrity is missing in: #{@script.src}", line: line, content: content)
|
38
|
-
elsif !defined?(@script.crossorigin)
|
39
|
-
add_issue("CORS not provided for external resource in: #{@script.src}", line: line, content: content)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
data/lib/html-proofer/check.rb
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTMLProofer
|
4
|
-
# Mostly handles issue management and collecting of external URLs.
|
5
|
-
class Check
|
6
|
-
attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
|
7
|
-
|
8
|
-
def initialize(src, path, html, logger, cache, options)
|
9
|
-
@src = src
|
10
|
-
@path = path
|
11
|
-
@html = remove_ignored(html)
|
12
|
-
@logger = logger
|
13
|
-
@cache = cache
|
14
|
-
@options = options
|
15
|
-
@issues = []
|
16
|
-
@internal_urls = {}
|
17
|
-
@external_urls = {}
|
18
|
-
end
|
19
|
-
|
20
|
-
def create_element(node)
|
21
|
-
@node = node
|
22
|
-
Element.new(node, self, @logger)
|
23
|
-
end
|
24
|
-
|
25
|
-
def run
|
26
|
-
raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
|
27
|
-
end
|
28
|
-
|
29
|
-
def add_issue(desc, line: nil, path: nil, status: -1, content: nil)
|
30
|
-
@issues << Issue.new(path || @path, desc, line: line, status: status, content: content)
|
31
|
-
false
|
32
|
-
end
|
33
|
-
|
34
|
-
def add_to_internal_urls(url, internal_url)
|
35
|
-
if @internal_urls[url]
|
36
|
-
@internal_urls[url] << internal_url
|
37
|
-
else
|
38
|
-
@internal_urls[url] = [internal_url]
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def add_to_external_urls(url)
|
43
|
-
return if @external_urls[url]
|
44
|
-
|
45
|
-
if @external_urls[url]
|
46
|
-
@external_urls[url] << @path
|
47
|
-
else
|
48
|
-
@external_urls[url] = [@path]
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def self.subchecks
|
53
|
-
classes = []
|
54
|
-
|
55
|
-
ObjectSpace.each_object(Class) do |c|
|
56
|
-
next unless c.superclass == self
|
57
|
-
|
58
|
-
classes << c
|
59
|
-
end
|
60
|
-
|
61
|
-
classes
|
62
|
-
end
|
63
|
-
|
64
|
-
def blank?(attr)
|
65
|
-
attr.nil? || attr.empty?
|
66
|
-
end
|
67
|
-
|
68
|
-
private
|
69
|
-
|
70
|
-
def remove_ignored(html)
|
71
|
-
html.css('code, pre, tt').each(&:unlink)
|
72
|
-
html
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
data/lib/html-proofer/element.rb
DELETED
@@ -1,265 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'addressable/uri'
|
4
|
-
require_relative './utils'
|
5
|
-
|
6
|
-
module HTMLProofer
|
7
|
-
# Represents the element currently being processed
|
8
|
-
class Element
|
9
|
-
include HTMLProofer::Utils
|
10
|
-
|
11
|
-
attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
|
12
|
-
|
13
|
-
def initialize(obj, check, logger)
|
14
|
-
@logger = logger
|
15
|
-
# Construct readable ivars for every element
|
16
|
-
begin
|
17
|
-
obj.attributes.each_pair do |attribute, value|
|
18
|
-
name = attribute.tr('-:.;@', '_').to_s.to_sym
|
19
|
-
(class << self; self; end).send(:attr_reader, name)
|
20
|
-
instance_variable_set("@#{name}", value.value)
|
21
|
-
end
|
22
|
-
rescue NameError => e
|
23
|
-
@logger.log :error, "Attribute set `#{obj}` contains an error!"
|
24
|
-
raise e
|
25
|
-
end
|
26
|
-
|
27
|
-
@aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
|
28
|
-
|
29
|
-
@data_proofer_ignore = defined?(@data_proofer_ignore)
|
30
|
-
|
31
|
-
@text = obj.content
|
32
|
-
@check = check
|
33
|
-
@checked_paths = {}
|
34
|
-
@type = check.class.name
|
35
|
-
@line = obj.line
|
36
|
-
|
37
|
-
@html = check.html
|
38
|
-
|
39
|
-
parent_attributes = obj.ancestors.map { |a| a.respond_to?(:attributes) && a.attributes }
|
40
|
-
parent_attributes.pop # remove document at the end
|
41
|
-
@parent_ignorable = parent_attributes.any? { |a| !a['data-proofer-ignore'].nil? }
|
42
|
-
|
43
|
-
# fix up missing protocols
|
44
|
-
if defined?(@href)
|
45
|
-
@href.insert(0, 'http:') if %r{^//}.match?(@href)
|
46
|
-
else
|
47
|
-
@href = nil
|
48
|
-
end
|
49
|
-
|
50
|
-
if defined?(@src)
|
51
|
-
@src.insert(0, 'http:') if %r{^//}.match?(@src)
|
52
|
-
else
|
53
|
-
@src = nil
|
54
|
-
end
|
55
|
-
|
56
|
-
if defined?(@srcset)
|
57
|
-
@srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
|
58
|
-
else
|
59
|
-
@srcset = nil
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def url
|
64
|
-
return @url if defined?(@url)
|
65
|
-
|
66
|
-
@url = (@src || @srcset || @href || '').delete("\u200b").strip
|
67
|
-
@url = Addressable::URI.join(base.attr('href') || '', url).to_s if base
|
68
|
-
return @url if @check.options[:url_swap].empty?
|
69
|
-
|
70
|
-
@url = swap(@url, @check.options[:url_swap])
|
71
|
-
end
|
72
|
-
|
73
|
-
def valid?
|
74
|
-
!parts.nil?
|
75
|
-
end
|
76
|
-
|
77
|
-
def path?
|
78
|
-
!parts.host.nil? && !parts.path.nil?
|
79
|
-
end
|
80
|
-
|
81
|
-
def parts
|
82
|
-
@parts ||= Addressable::URI.parse url
|
83
|
-
rescue URI::Error, Addressable::URI::InvalidURIError
|
84
|
-
@parts = nil
|
85
|
-
end
|
86
|
-
|
87
|
-
def path
|
88
|
-
Addressable::URI.unencode parts.path unless parts.nil?
|
89
|
-
end
|
90
|
-
|
91
|
-
def hash
|
92
|
-
parts&.fragment
|
93
|
-
end
|
94
|
-
|
95
|
-
def scheme
|
96
|
-
parts&.scheme
|
97
|
-
end
|
98
|
-
|
99
|
-
# path is to an external server
|
100
|
-
def remote?
|
101
|
-
%w[http https].include? scheme
|
102
|
-
end
|
103
|
-
|
104
|
-
def non_http_remote?
|
105
|
-
!scheme.nil? && !remote?
|
106
|
-
end
|
107
|
-
|
108
|
-
def ignore?
|
109
|
-
return true if @data_proofer_ignore
|
110
|
-
return true if @parent_ignorable
|
111
|
-
|
112
|
-
return true if /^javascript:/.match?(url)
|
113
|
-
|
114
|
-
# ignore base64 encoded images
|
115
|
-
return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
|
116
|
-
|
117
|
-
# ignore user defined URLs
|
118
|
-
return true if ignores_pattern_check(@check.options[:url_ignore])
|
119
|
-
end
|
120
|
-
|
121
|
-
def ignore_alt?
|
122
|
-
return true if ignores_pattern_check(@check.options[:alt_ignore]) || @aria_hidden
|
123
|
-
end
|
124
|
-
|
125
|
-
def ignore_empty_alt?
|
126
|
-
@check.options[:empty_alt_ignore]
|
127
|
-
end
|
128
|
-
|
129
|
-
def allow_missing_href?
|
130
|
-
@check.options[:allow_missing_href]
|
131
|
-
end
|
132
|
-
|
133
|
-
def allow_hash_href?
|
134
|
-
@check.options[:allow_hash_href]
|
135
|
-
end
|
136
|
-
|
137
|
-
def check_img_http?
|
138
|
-
@check.options[:check_img_http]
|
139
|
-
end
|
140
|
-
|
141
|
-
def check_sri?
|
142
|
-
@check.options[:check_sri]
|
143
|
-
end
|
144
|
-
|
145
|
-
def ignore_empty_mailto?
|
146
|
-
@check.options[:ignore_empty_mailto]
|
147
|
-
end
|
148
|
-
|
149
|
-
# path is external to the file
|
150
|
-
def external?
|
151
|
-
!internal?
|
152
|
-
end
|
153
|
-
|
154
|
-
def internal?
|
155
|
-
relative_link? || internal_absolute_link?
|
156
|
-
end
|
157
|
-
|
158
|
-
def internal_absolute_link?
|
159
|
-
url.start_with?('/')
|
160
|
-
end
|
161
|
-
|
162
|
-
def relative_link?
|
163
|
-
return false if remote?
|
164
|
-
|
165
|
-
hash_link || param_link || url.start_with?('.') || url =~ /^\S/
|
166
|
-
end
|
167
|
-
|
168
|
-
def link_points_to_same_page?
|
169
|
-
hash_link || param_link
|
170
|
-
end
|
171
|
-
|
172
|
-
def hash_link
|
173
|
-
url.start_with?('#')
|
174
|
-
end
|
175
|
-
|
176
|
-
def param_link
|
177
|
-
url.start_with?('?')
|
178
|
-
end
|
179
|
-
|
180
|
-
def absolute_path?(path)
|
181
|
-
path.start_with?('/')
|
182
|
-
end
|
183
|
-
|
184
|
-
def file_path
|
185
|
-
return if path.nil? || path.empty?
|
186
|
-
|
187
|
-
path_dot_ext = ''
|
188
|
-
|
189
|
-
path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
|
190
|
-
|
191
|
-
base = if absolute_path?(path) # path relative to root
|
192
|
-
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
193
|
-
@check.options[:root_dir] || (File.directory?(@check.src) ? @check.src : File.dirname(@check.src))
|
194
|
-
elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
|
195
|
-
File.dirname(@check.path)
|
196
|
-
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
|
197
|
-
File.dirname(@check.path)
|
198
|
-
else # relative link, path is a directory
|
199
|
-
@check.path
|
200
|
-
end
|
201
|
-
|
202
|
-
file = File.join(base, path)
|
203
|
-
|
204
|
-
if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
|
205
|
-
file = "#{file}#{@check.options[:extension]}"
|
206
|
-
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
207
|
-
file = File.join file, @check.options[:directory_index_file]
|
208
|
-
end
|
209
|
-
|
210
|
-
file
|
211
|
-
end
|
212
|
-
|
213
|
-
# checks if a file exists relative to the current pwd
|
214
|
-
def exists?
|
215
|
-
return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
|
216
|
-
|
217
|
-
@checked_paths[absolute_path] = File.exist?(absolute_path)
|
218
|
-
end
|
219
|
-
|
220
|
-
def absolute_path
|
221
|
-
path = file_path || @check.path
|
222
|
-
|
223
|
-
File.expand_path(path, Dir.pwd)
|
224
|
-
end
|
225
|
-
|
226
|
-
def ignores_pattern_check(links)
|
227
|
-
return false unless links.is_a?(Array)
|
228
|
-
|
229
|
-
links.each do |ignore|
|
230
|
-
case ignore
|
231
|
-
when String
|
232
|
-
return true if ignore == url
|
233
|
-
when Regexp
|
234
|
-
return true if ignore&.match?(url)
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
|
-
false
|
239
|
-
end
|
240
|
-
|
241
|
-
def unslashed_directory?(file)
|
242
|
-
File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
|
243
|
-
end
|
244
|
-
|
245
|
-
def follow_location?
|
246
|
-
@check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
|
247
|
-
end
|
248
|
-
|
249
|
-
def base
|
250
|
-
@base ||= @html.at_css('base')
|
251
|
-
end
|
252
|
-
|
253
|
-
def html
|
254
|
-
# If link is on the same page, then URL is on the current page. use the same HTML as for current page
|
255
|
-
if link_points_to_same_page?
|
256
|
-
@html
|
257
|
-
elsif internal?
|
258
|
-
# link on another page, e.g. /about#Team - need to get HTML from the other page
|
259
|
-
create_nokogiri(absolute_path)
|
260
|
-
else
|
261
|
-
raise NotImplementedError, 'HTMLProofer should not have gotten here. Please report this as a bug.'
|
262
|
-
end
|
263
|
-
end
|
264
|
-
end
|
265
|
-
end
|
data/lib/html-proofer/issue.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTMLProofer
|
4
|
-
class Issue
|
5
|
-
attr_reader :path, :desc, :status, :line, :content
|
6
|
-
|
7
|
-
def initialize(path, desc, line: nil, status: -1, content: nil)
|
8
|
-
@line = line.nil? ? '' : " (line #{line})"
|
9
|
-
@path = path
|
10
|
-
@desc = desc
|
11
|
-
@status = status
|
12
|
-
@content = content
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_s
|
16
|
-
"#{@path}: #{@desc}#{@line}"
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
class SortedIssues
|
21
|
-
attr_reader :issues
|
22
|
-
|
23
|
-
def initialize(issues, error_sort, logger)
|
24
|
-
@issues = issues
|
25
|
-
@error_sort = error_sort
|
26
|
-
@logger = logger
|
27
|
-
end
|
28
|
-
|
29
|
-
def sort_and_report
|
30
|
-
case @error_sort
|
31
|
-
when :path
|
32
|
-
sorted_issues = sort(:path, :desc)
|
33
|
-
report(sorted_issues, :path, :desc)
|
34
|
-
when :desc
|
35
|
-
sorted_issues = sort(:desc, :path)
|
36
|
-
report(sorted_issues, :desc, :path)
|
37
|
-
when :status
|
38
|
-
sorted_issues = sort(:status, :path)
|
39
|
-
report(sorted_issues, :status, :path)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def sort(first_sort, second_sort)
|
44
|
-
issues.sort_by { |t| [t.send(first_sort), t.send(second_sort)] }
|
45
|
-
end
|
46
|
-
|
47
|
-
def report(sorted_issues, first_report, second_report)
|
48
|
-
matcher = nil
|
49
|
-
|
50
|
-
sorted_issues.each do |issue|
|
51
|
-
if matcher != issue.send(first_report)
|
52
|
-
@logger.log :error, "- #{issue.send(first_report)}"
|
53
|
-
matcher = issue.send(first_report)
|
54
|
-
end
|
55
|
-
if first_report == :status
|
56
|
-
@logger.log :error, " * #{issue}"
|
57
|
-
else
|
58
|
-
msg = " * #{issue.send(second_report)}#{issue.line}"
|
59
|
-
msg = "#{msg}\n #{issue.content}" if !issue.content.nil? && !issue.content.empty?
|
60
|
-
@logger.log(:error, msg)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
@@ -1,82 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTMLProofer
|
4
|
-
class Middleware
|
5
|
-
include HTMLProofer::Utils
|
6
|
-
|
7
|
-
class InvalidHtmlError < StandardError
|
8
|
-
def initialize(failures)
|
9
|
-
super
|
10
|
-
@failures = failures
|
11
|
-
end
|
12
|
-
|
13
|
-
def message
|
14
|
-
"HTML Validation errors (skip by adding `?proofer-ignore` to URL): \n#{@failures.join("\n")}"
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.options
|
19
|
-
@options ||= {
|
20
|
-
type: :file,
|
21
|
-
allow_missing_href: true, # Permitted in html5
|
22
|
-
allow_hash_href: true,
|
23
|
-
check_external_hash: true,
|
24
|
-
check_html: true,
|
25
|
-
url_ignore: [%r{^/}], # Don't try to check if local files exist
|
26
|
-
validation: { report_eof_tags: true }
|
27
|
-
}
|
28
|
-
end
|
29
|
-
|
30
|
-
def initialize(app)
|
31
|
-
@app = app
|
32
|
-
end
|
33
|
-
|
34
|
-
HTML_SIGNATURE = [
|
35
|
-
'<!DOCTYPE HTML',
|
36
|
-
'<HTML',
|
37
|
-
'<HEAD',
|
38
|
-
'<SCRIPT',
|
39
|
-
'<IFRAME',
|
40
|
-
'<H1',
|
41
|
-
'<DIV',
|
42
|
-
'<FONT',
|
43
|
-
'<TABLE',
|
44
|
-
'<A',
|
45
|
-
'<STYLE',
|
46
|
-
'<TITLE',
|
47
|
-
'<B',
|
48
|
-
'<BODY',
|
49
|
-
'<BR',
|
50
|
-
'<P',
|
51
|
-
'<!--'
|
52
|
-
].freeze
|
53
|
-
|
54
|
-
def call(env)
|
55
|
-
result = @app.call(env)
|
56
|
-
return result if env['REQUEST_METHOD'] != 'GET'
|
57
|
-
return result if /proofer-ignore/.match?(env['QUERY_STRING'])
|
58
|
-
return result if result.first != 200
|
59
|
-
|
60
|
-
body = []
|
61
|
-
result.last.each { |e| body << e }
|
62
|
-
|
63
|
-
body = body.join
|
64
|
-
begin
|
65
|
-
html = body.lstrip
|
66
|
-
rescue StandardError
|
67
|
-
return result # Invalid encoding; it's not gonna be html.
|
68
|
-
end
|
69
|
-
if HTML_SIGNATURE.any? { |sig| html.upcase.start_with? sig }
|
70
|
-
parsed = HTMLProofer::Runner.new(
|
71
|
-
'response',
|
72
|
-
Middleware.options
|
73
|
-
).check_parsed(
|
74
|
-
Nokogiri::HTML5(html, max_errors: -1), 'response'
|
75
|
-
)
|
76
|
-
|
77
|
-
raise InvalidHtmlError, parsed[:failures] unless parsed[:failures].empty?
|
78
|
-
end
|
79
|
-
result
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|