html-proofer 3.19.4 → 4.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/bin/htmlproofer +30 -57
  3. data/lib/html-proofer.rb +1 -54
  4. data/lib/html_proofer/attribute/url.rb +231 -0
  5. data/lib/html_proofer/attribute.rb +15 -0
  6. data/lib/html_proofer/cache.rb +234 -0
  7. data/lib/html_proofer/check/favicon.rb +35 -0
  8. data/lib/html_proofer/check/images.rb +62 -0
  9. data/lib/html_proofer/check/links.rb +118 -0
  10. data/lib/html_proofer/check/open_graph.rb +34 -0
  11. data/lib/html_proofer/check/scripts.rb +38 -0
  12. data/lib/html_proofer/check.rb +91 -0
  13. data/lib/{html-proofer → html_proofer}/configuration.rb +30 -31
  14. data/lib/html_proofer/element.rb +122 -0
  15. data/lib/html_proofer/failure.rb +17 -0
  16. data/lib/{html-proofer → html_proofer}/log.rb +0 -0
  17. data/lib/html_proofer/reporter/cli.rb +29 -0
  18. data/lib/html_proofer/reporter.rb +23 -0
  19. data/lib/html_proofer/runner.rb +245 -0
  20. data/lib/html_proofer/url_validator/external.rb +189 -0
  21. data/lib/html_proofer/url_validator/internal.rb +86 -0
  22. data/lib/html_proofer/url_validator.rb +16 -0
  23. data/lib/{html-proofer → html_proofer}/utils.rb +5 -8
  24. data/lib/{html-proofer → html_proofer}/version.rb +1 -1
  25. data/lib/html_proofer/xpath_functions.rb +10 -0
  26. data/lib/html_proofer.rb +56 -0
  27. metadata +46 -27
  28. data/lib/html-proofer/cache.rb +0 -194
  29. data/lib/html-proofer/check/favicon.rb +0 -29
  30. data/lib/html-proofer/check/html.rb +0 -37
  31. data/lib/html-proofer/check/images.rb +0 -48
  32. data/lib/html-proofer/check/links.rb +0 -182
  33. data/lib/html-proofer/check/opengraph.rb +0 -46
  34. data/lib/html-proofer/check/scripts.rb +0 -42
  35. data/lib/html-proofer/check.rb +0 -75
  36. data/lib/html-proofer/element.rb +0 -265
  37. data/lib/html-proofer/issue.rb +0 -65
  38. data/lib/html-proofer/middleware.rb +0 -82
  39. data/lib/html-proofer/runner.rb +0 -249
  40. data/lib/html-proofer/url_validator.rb +0 -237
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class ScriptCheck < ::HTMLProofer::Check
4
- attr_reader :src
5
-
6
- def missing_src?
7
- !@script.src
8
- end
9
-
10
- def run
11
- @html.css('script').each do |node|
12
- @script = create_element(node)
13
- line = node.line
14
- content = node.content
15
-
16
- next if @script.ignore?
17
- next unless node.text.strip.empty?
18
-
19
- # does the script exist?
20
- if missing_src?
21
- add_issue('script is empty and has no src attribute', line: line, content: content)
22
- elsif @script.remote?
23
- add_to_external_urls(@script.src)
24
- check_sri(line, content) if @script.check_sri?
25
- elsif !@script.exists?
26
- add_issue("internal script #{@script.src} does not exist", line: line, content: content)
27
- end
28
- end
29
-
30
- external_urls
31
- end
32
-
33
- def check_sri(line, content)
34
- if !defined?(@script.integrity) && !defined?(@script.crossorigin)
35
- add_issue("SRI and CORS not provided in: #{@script.src}", line: line, content: content)
36
- elsif !defined?(@script.integrity)
37
- add_issue("Integrity is missing in: #{@script.src}", line: line, content: content)
38
- elsif !defined?(@script.crossorigin)
39
- add_issue("CORS not provided for external resource in: #{@script.src}", line: line, content: content)
40
- end
41
- end
42
- end
@@ -1,75 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module HTMLProofer
4
- # Mostly handles issue management and collecting of external URLs.
5
- class Check
6
- attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
7
-
8
- def initialize(src, path, html, logger, cache, options)
9
- @src = src
10
- @path = path
11
- @html = remove_ignored(html)
12
- @logger = logger
13
- @cache = cache
14
- @options = options
15
- @issues = []
16
- @internal_urls = {}
17
- @external_urls = {}
18
- end
19
-
20
- def create_element(node)
21
- @node = node
22
- Element.new(node, self, @logger)
23
- end
24
-
25
- def run
26
- raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
27
- end
28
-
29
- def add_issue(desc, line: nil, path: nil, status: -1, content: nil)
30
- @issues << Issue.new(path || @path, desc, line: line, status: status, content: content)
31
- false
32
- end
33
-
34
- def add_to_internal_urls(url, internal_url)
35
- if @internal_urls[url]
36
- @internal_urls[url] << internal_url
37
- else
38
- @internal_urls[url] = [internal_url]
39
- end
40
- end
41
-
42
- def add_to_external_urls(url)
43
- return if @external_urls[url]
44
-
45
- if @external_urls[url]
46
- @external_urls[url] << @path
47
- else
48
- @external_urls[url] = [@path]
49
- end
50
- end
51
-
52
- def self.subchecks
53
- classes = []
54
-
55
- ObjectSpace.each_object(Class) do |c|
56
- next unless c.superclass == self
57
-
58
- classes << c
59
- end
60
-
61
- classes
62
- end
63
-
64
- def blank?(attr)
65
- attr.nil? || attr.empty?
66
- end
67
-
68
- private
69
-
70
- def remove_ignored(html)
71
- html.css('code, pre, tt').each(&:unlink)
72
- html
73
- end
74
- end
75
- end
@@ -1,265 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'addressable/uri'
4
- require_relative './utils'
5
-
6
- module HTMLProofer
7
- # Represents the element currently being processed
8
- class Element
9
- include HTMLProofer::Utils
10
-
11
- attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
12
-
13
- def initialize(obj, check, logger)
14
- @logger = logger
15
- # Construct readable ivars for every element
16
- begin
17
- obj.attributes.each_pair do |attribute, value|
18
- name = attribute.tr('-:.;@', '_').to_s.to_sym
19
- (class << self; self; end).send(:attr_reader, name)
20
- instance_variable_set("@#{name}", value.value)
21
- end
22
- rescue NameError => e
23
- @logger.log :error, "Attribute set `#{obj}` contains an error!"
24
- raise e
25
- end
26
-
27
- @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
28
-
29
- @data_proofer_ignore = defined?(@data_proofer_ignore)
30
-
31
- @text = obj.content
32
- @check = check
33
- @checked_paths = {}
34
- @type = check.class.name
35
- @line = obj.line
36
-
37
- @html = check.html
38
-
39
- parent_attributes = obj.ancestors.map { |a| a.respond_to?(:attributes) && a.attributes }
40
- parent_attributes.pop # remove document at the end
41
- @parent_ignorable = parent_attributes.any? { |a| !a['data-proofer-ignore'].nil? }
42
-
43
- # fix up missing protocols
44
- if defined?(@href)
45
- @href.insert(0, 'http:') if %r{^//}.match?(@href)
46
- else
47
- @href = nil
48
- end
49
-
50
- if defined?(@src)
51
- @src.insert(0, 'http:') if %r{^//}.match?(@src)
52
- else
53
- @src = nil
54
- end
55
-
56
- if defined?(@srcset)
57
- @srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
58
- else
59
- @srcset = nil
60
- end
61
- end
62
-
63
- def url
64
- return @url if defined?(@url)
65
-
66
- @url = (@src || @srcset || @href || '').delete("\u200b").strip
67
- @url = Addressable::URI.join(base.attr('href') || '', url).to_s if base
68
- return @url if @check.options[:url_swap].empty?
69
-
70
- @url = swap(@url, @check.options[:url_swap])
71
- end
72
-
73
- def valid?
74
- !parts.nil?
75
- end
76
-
77
- def path?
78
- !parts.host.nil? && !parts.path.nil?
79
- end
80
-
81
- def parts
82
- @parts ||= Addressable::URI.parse url
83
- rescue URI::Error, Addressable::URI::InvalidURIError
84
- @parts = nil
85
- end
86
-
87
- def path
88
- Addressable::URI.unencode parts.path unless parts.nil?
89
- end
90
-
91
- def hash
92
- parts&.fragment
93
- end
94
-
95
- def scheme
96
- parts&.scheme
97
- end
98
-
99
- # path is to an external server
100
- def remote?
101
- %w[http https].include? scheme
102
- end
103
-
104
- def non_http_remote?
105
- !scheme.nil? && !remote?
106
- end
107
-
108
- def ignore?
109
- return true if @data_proofer_ignore
110
- return true if @parent_ignorable
111
-
112
- return true if /^javascript:/.match?(url)
113
-
114
- # ignore base64 encoded images
115
- return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
116
-
117
- # ignore user defined URLs
118
- return true if ignores_pattern_check(@check.options[:url_ignore])
119
- end
120
-
121
- def ignore_alt?
122
- return true if ignores_pattern_check(@check.options[:alt_ignore]) || @aria_hidden
123
- end
124
-
125
- def ignore_empty_alt?
126
- @check.options[:empty_alt_ignore]
127
- end
128
-
129
- def allow_missing_href?
130
- @check.options[:allow_missing_href]
131
- end
132
-
133
- def allow_hash_href?
134
- @check.options[:allow_hash_href]
135
- end
136
-
137
- def check_img_http?
138
- @check.options[:check_img_http]
139
- end
140
-
141
- def check_sri?
142
- @check.options[:check_sri]
143
- end
144
-
145
- def ignore_empty_mailto?
146
- @check.options[:ignore_empty_mailto]
147
- end
148
-
149
- # path is external to the file
150
- def external?
151
- !internal?
152
- end
153
-
154
- def internal?
155
- relative_link? || internal_absolute_link?
156
- end
157
-
158
- def internal_absolute_link?
159
- url.start_with?('/')
160
- end
161
-
162
- def relative_link?
163
- return false if remote?
164
-
165
- hash_link || param_link || url.start_with?('.') || url =~ /^\S/
166
- end
167
-
168
- def link_points_to_same_page?
169
- hash_link || param_link
170
- end
171
-
172
- def hash_link
173
- url.start_with?('#')
174
- end
175
-
176
- def param_link
177
- url.start_with?('?')
178
- end
179
-
180
- def absolute_path?(path)
181
- path.start_with?('/')
182
- end
183
-
184
- def file_path
185
- return if path.nil? || path.empty?
186
-
187
- path_dot_ext = ''
188
-
189
- path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
190
-
191
- base = if absolute_path?(path) # path relative to root
192
- # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
193
- @check.options[:root_dir] || (File.directory?(@check.src) ? @check.src : File.dirname(@check.src))
194
- elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
195
- File.dirname(@check.path)
196
- elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
197
- File.dirname(@check.path)
198
- else # relative link, path is a directory
199
- @check.path
200
- end
201
-
202
- file = File.join(base, path)
203
-
204
- if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
205
- file = "#{file}#{@check.options[:extension]}"
206
- elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
207
- file = File.join file, @check.options[:directory_index_file]
208
- end
209
-
210
- file
211
- end
212
-
213
- # checks if a file exists relative to the current pwd
214
- def exists?
215
- return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
216
-
217
- @checked_paths[absolute_path] = File.exist?(absolute_path)
218
- end
219
-
220
- def absolute_path
221
- path = file_path || @check.path
222
-
223
- File.expand_path(path, Dir.pwd)
224
- end
225
-
226
- def ignores_pattern_check(links)
227
- return false unless links.is_a?(Array)
228
-
229
- links.each do |ignore|
230
- case ignore
231
- when String
232
- return true if ignore == url
233
- when Regexp
234
- return true if ignore&.match?(url)
235
- end
236
- end
237
-
238
- false
239
- end
240
-
241
- def unslashed_directory?(file)
242
- File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
243
- end
244
-
245
- def follow_location?
246
- @check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
247
- end
248
-
249
- def base
250
- @base ||= @html.at_css('base')
251
- end
252
-
253
- def html
254
- # If link is on the same page, then URL is on the current page. use the same HTML as for current page
255
- if link_points_to_same_page?
256
- @html
257
- elsif internal?
258
- # link on another page, e.g. /about#Team - need to get HTML from the other page
259
- create_nokogiri(absolute_path)
260
- else
261
- raise NotImplementedError, 'HTMLProofer should not have gotten here. Please report this as a bug.'
262
- end
263
- end
264
- end
265
- end
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module HTMLProofer
4
- class Issue
5
- attr_reader :path, :desc, :status, :line, :content
6
-
7
- def initialize(path, desc, line: nil, status: -1, content: nil)
8
- @line = line.nil? ? '' : " (line #{line})"
9
- @path = path
10
- @desc = desc
11
- @status = status
12
- @content = content
13
- end
14
-
15
- def to_s
16
- "#{@path}: #{@desc}#{@line}"
17
- end
18
- end
19
-
20
- class SortedIssues
21
- attr_reader :issues
22
-
23
- def initialize(issues, error_sort, logger)
24
- @issues = issues
25
- @error_sort = error_sort
26
- @logger = logger
27
- end
28
-
29
- def sort_and_report
30
- case @error_sort
31
- when :path
32
- sorted_issues = sort(:path, :desc)
33
- report(sorted_issues, :path, :desc)
34
- when :desc
35
- sorted_issues = sort(:desc, :path)
36
- report(sorted_issues, :desc, :path)
37
- when :status
38
- sorted_issues = sort(:status, :path)
39
- report(sorted_issues, :status, :path)
40
- end
41
- end
42
-
43
- def sort(first_sort, second_sort)
44
- issues.sort_by { |t| [t.send(first_sort), t.send(second_sort)] }
45
- end
46
-
47
- def report(sorted_issues, first_report, second_report)
48
- matcher = nil
49
-
50
- sorted_issues.each do |issue|
51
- if matcher != issue.send(first_report)
52
- @logger.log :error, "- #{issue.send(first_report)}"
53
- matcher = issue.send(first_report)
54
- end
55
- if first_report == :status
56
- @logger.log :error, " * #{issue}"
57
- else
58
- msg = " * #{issue.send(second_report)}#{issue.line}"
59
- msg = "#{msg}\n #{issue.content}" if !issue.content.nil? && !issue.content.empty?
60
- @logger.log(:error, msg)
61
- end
62
- end
63
- end
64
- end
65
- end
@@ -1,82 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module HTMLProofer
4
- class Middleware
5
- include HTMLProofer::Utils
6
-
7
- class InvalidHtmlError < StandardError
8
- def initialize(failures)
9
- super
10
- @failures = failures
11
- end
12
-
13
- def message
14
- "HTML Validation errors (skip by adding `?proofer-ignore` to URL): \n#{@failures.join("\n")}"
15
- end
16
- end
17
-
18
- def self.options
19
- @options ||= {
20
- type: :file,
21
- allow_missing_href: true, # Permitted in html5
22
- allow_hash_href: true,
23
- check_external_hash: true,
24
- check_html: true,
25
- url_ignore: [%r{^/}], # Don't try to check if local files exist
26
- validation: { report_eof_tags: true }
27
- }
28
- end
29
-
30
- def initialize(app)
31
- @app = app
32
- end
33
-
34
- HTML_SIGNATURE = [
35
- '<!DOCTYPE HTML',
36
- '<HTML',
37
- '<HEAD',
38
- '<SCRIPT',
39
- '<IFRAME',
40
- '<H1',
41
- '<DIV',
42
- '<FONT',
43
- '<TABLE',
44
- '<A',
45
- '<STYLE',
46
- '<TITLE',
47
- '<B',
48
- '<BODY',
49
- '<BR',
50
- '<P',
51
- '<!--'
52
- ].freeze
53
-
54
- def call(env)
55
- result = @app.call(env)
56
- return result if env['REQUEST_METHOD'] != 'GET'
57
- return result if /proofer-ignore/.match?(env['QUERY_STRING'])
58
- return result if result.first != 200
59
-
60
- body = []
61
- result.last.each { |e| body << e }
62
-
63
- body = body.join
64
- begin
65
- html = body.lstrip
66
- rescue StandardError
67
- return result # Invalid encoding; it's not gonna be html.
68
- end
69
- if HTML_SIGNATURE.any? { |sig| html.upcase.start_with? sig }
70
- parsed = HTMLProofer::Runner.new(
71
- 'response',
72
- Middleware.options
73
- ).check_parsed(
74
- Nokogiri::HTML5(html, max_errors: -1), 'response'
75
- )
76
-
77
- raise InvalidHtmlError, parsed[:failures] unless parsed[:failures].empty?
78
- end
79
- result
80
- end
81
- end
82
- end