html-proofer 3.19.4 → 4.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/bin/htmlproofer +30 -57
  3. data/lib/html-proofer.rb +1 -54
  4. data/lib/html_proofer/attribute/url.rb +231 -0
  5. data/lib/html_proofer/attribute.rb +15 -0
  6. data/lib/html_proofer/cache.rb +234 -0
  7. data/lib/html_proofer/check/favicon.rb +35 -0
  8. data/lib/html_proofer/check/images.rb +62 -0
  9. data/lib/html_proofer/check/links.rb +118 -0
  10. data/lib/html_proofer/check/open_graph.rb +34 -0
  11. data/lib/html_proofer/check/scripts.rb +38 -0
  12. data/lib/html_proofer/check.rb +91 -0
  13. data/lib/{html-proofer → html_proofer}/configuration.rb +30 -31
  14. data/lib/html_proofer/element.rb +122 -0
  15. data/lib/html_proofer/failure.rb +17 -0
  16. data/lib/{html-proofer → html_proofer}/log.rb +0 -0
  17. data/lib/html_proofer/reporter/cli.rb +29 -0
  18. data/lib/html_proofer/reporter.rb +23 -0
  19. data/lib/html_proofer/runner.rb +245 -0
  20. data/lib/html_proofer/url_validator/external.rb +189 -0
  21. data/lib/html_proofer/url_validator/internal.rb +86 -0
  22. data/lib/html_proofer/url_validator.rb +16 -0
  23. data/lib/{html-proofer → html_proofer}/utils.rb +5 -8
  24. data/lib/{html-proofer → html_proofer}/version.rb +1 -1
  25. data/lib/html_proofer/xpath_functions.rb +10 -0
  26. data/lib/html_proofer.rb +56 -0
  27. metadata +46 -27
  28. data/lib/html-proofer/cache.rb +0 -194
  29. data/lib/html-proofer/check/favicon.rb +0 -29
  30. data/lib/html-proofer/check/html.rb +0 -37
  31. data/lib/html-proofer/check/images.rb +0 -48
  32. data/lib/html-proofer/check/links.rb +0 -182
  33. data/lib/html-proofer/check/opengraph.rb +0 -46
  34. data/lib/html-proofer/check/scripts.rb +0 -42
  35. data/lib/html-proofer/check.rb +0 -75
  36. data/lib/html-proofer/element.rb +0 -265
  37. data/lib/html-proofer/issue.rb +0 -65
  38. data/lib/html-proofer/middleware.rb +0 -82
  39. data/lib/html-proofer/runner.rb +0 -249
  40. data/lib/html-proofer/url_validator.rb +0 -237
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class ScriptCheck < ::HTMLProofer::Check
4
- attr_reader :src
5
-
6
- def missing_src?
7
- !@script.src
8
- end
9
-
10
- def run
11
- @html.css('script').each do |node|
12
- @script = create_element(node)
13
- line = node.line
14
- content = node.content
15
-
16
- next if @script.ignore?
17
- next unless node.text.strip.empty?
18
-
19
- # does the script exist?
20
- if missing_src?
21
- add_issue('script is empty and has no src attribute', line: line, content: content)
22
- elsif @script.remote?
23
- add_to_external_urls(@script.src)
24
- check_sri(line, content) if @script.check_sri?
25
- elsif !@script.exists?
26
- add_issue("internal script #{@script.src} does not exist", line: line, content: content)
27
- end
28
- end
29
-
30
- external_urls
31
- end
32
-
33
- def check_sri(line, content)
34
- if !defined?(@script.integrity) && !defined?(@script.crossorigin)
35
- add_issue("SRI and CORS not provided in: #{@script.src}", line: line, content: content)
36
- elsif !defined?(@script.integrity)
37
- add_issue("Integrity is missing in: #{@script.src}", line: line, content: content)
38
- elsif !defined?(@script.crossorigin)
39
- add_issue("CORS not provided for external resource in: #{@script.src}", line: line, content: content)
40
- end
41
- end
42
- end
@@ -1,75 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module HTMLProofer
4
- # Mostly handles issue management and collecting of external URLs.
5
- class Check
6
- attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
7
-
8
- def initialize(src, path, html, logger, cache, options)
9
- @src = src
10
- @path = path
11
- @html = remove_ignored(html)
12
- @logger = logger
13
- @cache = cache
14
- @options = options
15
- @issues = []
16
- @internal_urls = {}
17
- @external_urls = {}
18
- end
19
-
20
- def create_element(node)
21
- @node = node
22
- Element.new(node, self, @logger)
23
- end
24
-
25
- def run
26
- raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
27
- end
28
-
29
- def add_issue(desc, line: nil, path: nil, status: -1, content: nil)
30
- @issues << Issue.new(path || @path, desc, line: line, status: status, content: content)
31
- false
32
- end
33
-
34
- def add_to_internal_urls(url, internal_url)
35
- if @internal_urls[url]
36
- @internal_urls[url] << internal_url
37
- else
38
- @internal_urls[url] = [internal_url]
39
- end
40
- end
41
-
42
- def add_to_external_urls(url)
43
- return if @external_urls[url]
44
-
45
- if @external_urls[url]
46
- @external_urls[url] << @path
47
- else
48
- @external_urls[url] = [@path]
49
- end
50
- end
51
-
52
- def self.subchecks
53
- classes = []
54
-
55
- ObjectSpace.each_object(Class) do |c|
56
- next unless c.superclass == self
57
-
58
- classes << c
59
- end
60
-
61
- classes
62
- end
63
-
64
- def blank?(attr)
65
- attr.nil? || attr.empty?
66
- end
67
-
68
- private
69
-
70
- def remove_ignored(html)
71
- html.css('code, pre, tt').each(&:unlink)
72
- html
73
- end
74
- end
75
- end
@@ -1,265 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'addressable/uri'
4
- require_relative './utils'
5
-
6
- module HTMLProofer
7
- # Represents the element currently being processed
8
- class Element
9
- include HTMLProofer::Utils
10
-
11
- attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
12
-
13
- def initialize(obj, check, logger)
14
- @logger = logger
15
- # Construct readable ivars for every element
16
- begin
17
- obj.attributes.each_pair do |attribute, value|
18
- name = attribute.tr('-:.;@', '_').to_s.to_sym
19
- (class << self; self; end).send(:attr_reader, name)
20
- instance_variable_set("@#{name}", value.value)
21
- end
22
- rescue NameError => e
23
- @logger.log :error, "Attribute set `#{obj}` contains an error!"
24
- raise e
25
- end
26
-
27
- @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
28
-
29
- @data_proofer_ignore = defined?(@data_proofer_ignore)
30
-
31
- @text = obj.content
32
- @check = check
33
- @checked_paths = {}
34
- @type = check.class.name
35
- @line = obj.line
36
-
37
- @html = check.html
38
-
39
- parent_attributes = obj.ancestors.map { |a| a.respond_to?(:attributes) && a.attributes }
40
- parent_attributes.pop # remove document at the end
41
- @parent_ignorable = parent_attributes.any? { |a| !a['data-proofer-ignore'].nil? }
42
-
43
- # fix up missing protocols
44
- if defined?(@href)
45
- @href.insert(0, 'http:') if %r{^//}.match?(@href)
46
- else
47
- @href = nil
48
- end
49
-
50
- if defined?(@src)
51
- @src.insert(0, 'http:') if %r{^//}.match?(@src)
52
- else
53
- @src = nil
54
- end
55
-
56
- if defined?(@srcset)
57
- @srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
58
- else
59
- @srcset = nil
60
- end
61
- end
62
-
63
- def url
64
- return @url if defined?(@url)
65
-
66
- @url = (@src || @srcset || @href || '').delete("\u200b").strip
67
- @url = Addressable::URI.join(base.attr('href') || '', url).to_s if base
68
- return @url if @check.options[:url_swap].empty?
69
-
70
- @url = swap(@url, @check.options[:url_swap])
71
- end
72
-
73
- def valid?
74
- !parts.nil?
75
- end
76
-
77
- def path?
78
- !parts.host.nil? && !parts.path.nil?
79
- end
80
-
81
- def parts
82
- @parts ||= Addressable::URI.parse url
83
- rescue URI::Error, Addressable::URI::InvalidURIError
84
- @parts = nil
85
- end
86
-
87
- def path
88
- Addressable::URI.unencode parts.path unless parts.nil?
89
- end
90
-
91
- def hash
92
- parts&.fragment
93
- end
94
-
95
- def scheme
96
- parts&.scheme
97
- end
98
-
99
- # path is to an external server
100
- def remote?
101
- %w[http https].include? scheme
102
- end
103
-
104
- def non_http_remote?
105
- !scheme.nil? && !remote?
106
- end
107
-
108
- def ignore?
109
- return true if @data_proofer_ignore
110
- return true if @parent_ignorable
111
-
112
- return true if /^javascript:/.match?(url)
113
-
114
- # ignore base64 encoded images
115
- return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
116
-
117
- # ignore user defined URLs
118
- return true if ignores_pattern_check(@check.options[:url_ignore])
119
- end
120
-
121
- def ignore_alt?
122
- return true if ignores_pattern_check(@check.options[:alt_ignore]) || @aria_hidden
123
- end
124
-
125
- def ignore_empty_alt?
126
- @check.options[:empty_alt_ignore]
127
- end
128
-
129
- def allow_missing_href?
130
- @check.options[:allow_missing_href]
131
- end
132
-
133
- def allow_hash_href?
134
- @check.options[:allow_hash_href]
135
- end
136
-
137
- def check_img_http?
138
- @check.options[:check_img_http]
139
- end
140
-
141
- def check_sri?
142
- @check.options[:check_sri]
143
- end
144
-
145
- def ignore_empty_mailto?
146
- @check.options[:ignore_empty_mailto]
147
- end
148
-
149
- # path is external to the file
150
- def external?
151
- !internal?
152
- end
153
-
154
- def internal?
155
- relative_link? || internal_absolute_link?
156
- end
157
-
158
- def internal_absolute_link?
159
- url.start_with?('/')
160
- end
161
-
162
- def relative_link?
163
- return false if remote?
164
-
165
- hash_link || param_link || url.start_with?('.') || url =~ /^\S/
166
- end
167
-
168
- def link_points_to_same_page?
169
- hash_link || param_link
170
- end
171
-
172
- def hash_link
173
- url.start_with?('#')
174
- end
175
-
176
- def param_link
177
- url.start_with?('?')
178
- end
179
-
180
- def absolute_path?(path)
181
- path.start_with?('/')
182
- end
183
-
184
- def file_path
185
- return if path.nil? || path.empty?
186
-
187
- path_dot_ext = ''
188
-
189
- path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
190
-
191
- base = if absolute_path?(path) # path relative to root
192
- # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
193
- @check.options[:root_dir] || (File.directory?(@check.src) ? @check.src : File.dirname(@check.src))
194
- elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
195
- File.dirname(@check.path)
196
- elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
197
- File.dirname(@check.path)
198
- else # relative link, path is a directory
199
- @check.path
200
- end
201
-
202
- file = File.join(base, path)
203
-
204
- if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
205
- file = "#{file}#{@check.options[:extension]}"
206
- elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
207
- file = File.join file, @check.options[:directory_index_file]
208
- end
209
-
210
- file
211
- end
212
-
213
- # checks if a file exists relative to the current pwd
214
- def exists?
215
- return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
216
-
217
- @checked_paths[absolute_path] = File.exist?(absolute_path)
218
- end
219
-
220
- def absolute_path
221
- path = file_path || @check.path
222
-
223
- File.expand_path(path, Dir.pwd)
224
- end
225
-
226
- def ignores_pattern_check(links)
227
- return false unless links.is_a?(Array)
228
-
229
- links.each do |ignore|
230
- case ignore
231
- when String
232
- return true if ignore == url
233
- when Regexp
234
- return true if ignore&.match?(url)
235
- end
236
- end
237
-
238
- false
239
- end
240
-
241
- def unslashed_directory?(file)
242
- File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
243
- end
244
-
245
- def follow_location?
246
- @check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
247
- end
248
-
249
- def base
250
- @base ||= @html.at_css('base')
251
- end
252
-
253
- def html
254
- # If link is on the same page, then URL is on the current page. use the same HTML as for current page
255
- if link_points_to_same_page?
256
- @html
257
- elsif internal?
258
- # link on another page, e.g. /about#Team - need to get HTML from the other page
259
- create_nokogiri(absolute_path)
260
- else
261
- raise NotImplementedError, 'HTMLProofer should not have gotten here. Please report this as a bug.'
262
- end
263
- end
264
- end
265
- end
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module HTMLProofer
4
- class Issue
5
- attr_reader :path, :desc, :status, :line, :content
6
-
7
- def initialize(path, desc, line: nil, status: -1, content: nil)
8
- @line = line.nil? ? '' : " (line #{line})"
9
- @path = path
10
- @desc = desc
11
- @status = status
12
- @content = content
13
- end
14
-
15
- def to_s
16
- "#{@path}: #{@desc}#{@line}"
17
- end
18
- end
19
-
20
- class SortedIssues
21
- attr_reader :issues
22
-
23
- def initialize(issues, error_sort, logger)
24
- @issues = issues
25
- @error_sort = error_sort
26
- @logger = logger
27
- end
28
-
29
- def sort_and_report
30
- case @error_sort
31
- when :path
32
- sorted_issues = sort(:path, :desc)
33
- report(sorted_issues, :path, :desc)
34
- when :desc
35
- sorted_issues = sort(:desc, :path)
36
- report(sorted_issues, :desc, :path)
37
- when :status
38
- sorted_issues = sort(:status, :path)
39
- report(sorted_issues, :status, :path)
40
- end
41
- end
42
-
43
- def sort(first_sort, second_sort)
44
- issues.sort_by { |t| [t.send(first_sort), t.send(second_sort)] }
45
- end
46
-
47
- def report(sorted_issues, first_report, second_report)
48
- matcher = nil
49
-
50
- sorted_issues.each do |issue|
51
- if matcher != issue.send(first_report)
52
- @logger.log :error, "- #{issue.send(first_report)}"
53
- matcher = issue.send(first_report)
54
- end
55
- if first_report == :status
56
- @logger.log :error, " * #{issue}"
57
- else
58
- msg = " * #{issue.send(second_report)}#{issue.line}"
59
- msg = "#{msg}\n #{issue.content}" if !issue.content.nil? && !issue.content.empty?
60
- @logger.log(:error, msg)
61
- end
62
- end
63
- end
64
- end
65
- end
@@ -1,82 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module HTMLProofer
4
- class Middleware
5
- include HTMLProofer::Utils
6
-
7
- class InvalidHtmlError < StandardError
8
- def initialize(failures)
9
- super
10
- @failures = failures
11
- end
12
-
13
- def message
14
- "HTML Validation errors (skip by adding `?proofer-ignore` to URL): \n#{@failures.join("\n")}"
15
- end
16
- end
17
-
18
- def self.options
19
- @options ||= {
20
- type: :file,
21
- allow_missing_href: true, # Permitted in html5
22
- allow_hash_href: true,
23
- check_external_hash: true,
24
- check_html: true,
25
- url_ignore: [%r{^/}], # Don't try to check if local files exist
26
- validation: { report_eof_tags: true }
27
- }
28
- end
29
-
30
- def initialize(app)
31
- @app = app
32
- end
33
-
34
- HTML_SIGNATURE = [
35
- '<!DOCTYPE HTML',
36
- '<HTML',
37
- '<HEAD',
38
- '<SCRIPT',
39
- '<IFRAME',
40
- '<H1',
41
- '<DIV',
42
- '<FONT',
43
- '<TABLE',
44
- '<A',
45
- '<STYLE',
46
- '<TITLE',
47
- '<B',
48
- '<BODY',
49
- '<BR',
50
- '<P',
51
- '<!--'
52
- ].freeze
53
-
54
- def call(env)
55
- result = @app.call(env)
56
- return result if env['REQUEST_METHOD'] != 'GET'
57
- return result if /proofer-ignore/.match?(env['QUERY_STRING'])
58
- return result if result.first != 200
59
-
60
- body = []
61
- result.last.each { |e| body << e }
62
-
63
- body = body.join
64
- begin
65
- html = body.lstrip
66
- rescue StandardError
67
- return result # Invalid encoding; it's not gonna be html.
68
- end
69
- if HTML_SIGNATURE.any? { |sig| html.upcase.start_with? sig }
70
- parsed = HTMLProofer::Runner.new(
71
- 'response',
72
- Middleware.options
73
- ).check_parsed(
74
- Nokogiri::HTML5(html, max_errors: -1), 'response'
75
- )
76
-
77
- raise InvalidHtmlError, parsed[:failures] unless parsed[:failures].empty?
78
- end
79
- result
80
- end
81
- end
82
- end