html-proofer 3.19.4 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/bin/htmlproofer +44 -59
  3. data/lib/html-proofer.rb +1 -54
  4. data/lib/html_proofer/attribute/url.rb +251 -0
  5. data/lib/html_proofer/attribute.rb +15 -0
  6. data/lib/html_proofer/cache.rb +292 -0
  7. data/lib/html_proofer/check/favicon.rb +43 -0
  8. data/lib/html_proofer/check/images.rb +99 -0
  9. data/lib/html_proofer/check/links.rb +135 -0
  10. data/lib/html_proofer/check/open_graph.rb +42 -0
  11. data/lib/html_proofer/check/scripts.rb +49 -0
  12. data/lib/html_proofer/check.rb +94 -0
  13. data/lib/html_proofer/configuration.rb +91 -0
  14. data/lib/html_proofer/element.rb +144 -0
  15. data/lib/html_proofer/failure.rb +17 -0
  16. data/lib/{html-proofer → html_proofer}/log.rb +19 -19
  17. data/lib/html_proofer/reporter/cli.rb +33 -0
  18. data/lib/html_proofer/reporter.rb +23 -0
  19. data/lib/html_proofer/runner.rb +244 -0
  20. data/lib/html_proofer/url_validator/external.rb +193 -0
  21. data/lib/html_proofer/url_validator/internal.rb +97 -0
  22. data/lib/html_proofer/url_validator.rb +16 -0
  23. data/lib/{html-proofer → html_proofer}/utils.rb +9 -12
  24. data/lib/{html-proofer → html_proofer}/version.rb +1 -1
  25. data/lib/html_proofer/xpath_functions.rb +10 -0
  26. data/lib/html_proofer.rb +59 -0
  27. metadata +42 -22
  28. data/lib/html-proofer/cache.rb +0 -194
  29. data/lib/html-proofer/check/favicon.rb +0 -29
  30. data/lib/html-proofer/check/html.rb +0 -37
  31. data/lib/html-proofer/check/images.rb +0 -48
  32. data/lib/html-proofer/check/links.rb +0 -182
  33. data/lib/html-proofer/check/opengraph.rb +0 -46
  34. data/lib/html-proofer/check/scripts.rb +0 -42
  35. data/lib/html-proofer/check.rb +0 -75
  36. data/lib/html-proofer/configuration.rb +0 -88
  37. data/lib/html-proofer/element.rb +0 -265
  38. data/lib/html-proofer/issue.rb +0 -65
  39. data/lib/html-proofer/middleware.rb +0 -82
  40. data/lib/html-proofer/runner.rb +0 -249
  41. data/lib/html-proofer/url_validator.rb +0 -237
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ # Mostly handles issue management and collecting of external URLs.
5
+ class Check
6
+ include HTMLProofer::Utils
7
+
8
+ attr_reader :failures, :options, :internal_urls, :external_urls
9
+
10
+ def initialize(runner, html)
11
+ @runner = runner
12
+ @html = remove_ignored(html)
13
+
14
+ @external_urls = {}
15
+ @internal_urls = {}
16
+ @failures = []
17
+ end
18
+
19
+ def create_element(node)
20
+ Element.new(@runner, node, base_url: base_url)
21
+ end
22
+
23
+ def run
24
+ raise NotImplementedError, "HTMLProofer::Check subclasses must implement #run"
25
+ end
26
+
27
+ def add_failure(description, line: nil, status: nil, content: nil)
28
+ @failures << Failure.new(@runner.current_filename, short_name, description, line: line, status: status,
29
+ content: content)
30
+ end
31
+
32
+ def short_name
33
+ self.class.name.split("::").last
34
+ end
35
+
36
+ def add_to_internal_urls(url, line)
37
+ url_string = url.raw_attribute
38
+
39
+ @internal_urls[url_string] = [] if @internal_urls[url_string].nil?
40
+
41
+ metadata = {
42
+ source: @runner.current_source,
43
+ filename: @runner.current_filename,
44
+ line: line,
45
+ base_url: base_url,
46
+ found: false,
47
+ }
48
+ @internal_urls[url_string] << metadata
49
+ end
50
+
51
+ def add_to_external_urls(url, line)
52
+ url_string = url.to_s
53
+
54
+ @external_urls[url_string] = [] if @external_urls[url_string].nil?
55
+
56
+ @external_urls[url_string] << { filename: @runner.current_filename, line: line }
57
+ end
58
+
59
+ class << self
60
+ def subchecks(runner_options)
61
+ # grab all known checks
62
+ checks = ObjectSpace.each_object(Class).select do |klass|
63
+ klass < self
64
+ end
65
+
66
+ # remove any checks not explicitly included
67
+ checks.each_with_object([]) do |check, arr|
68
+ next unless runner_options[:checks].include?(check.short_name)
69
+
70
+ arr << check
71
+ end
72
+ end
73
+
74
+ def short_name
75
+ name.split("::").last
76
+ end
77
+ end
78
+
79
+ private def base_url
80
+ return @base_url if defined?(@base_url)
81
+
82
+ return (@base_url = "") if (base = @html.at_css("base")).nil?
83
+
84
+ @base_url = base["href"]
85
+ end
86
+
87
+ private def remove_ignored(html)
88
+ return if html.nil?
89
+
90
+ html.css("code, pre, tt").each(&:unlink)
91
+ html
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ module Configuration
5
+ DEFAULT_TESTS = ["Links", "Images", "Scripts"].freeze
6
+
7
+ PROOFER_DEFAULTS = {
8
+ allow_hash_href: true,
9
+ allow_missing_href: false,
10
+ assume_extension: ".html",
11
+ check_external_hash: true,
12
+ check_internal_hash: true,
13
+ checks: DEFAULT_TESTS,
14
+ directory_index_file: "index.html",
15
+ disable_external: false,
16
+ ignore_empty_alt: true,
17
+ ignore_empty_mailto: false,
18
+ ignore_files: [],
19
+ ignore_missing_alt: false,
20
+ ignore_status_codes: [],
21
+ ignore_urls: [],
22
+ enforce_https: true,
23
+ extensions: [".html"],
24
+ log_level: :info,
25
+ only_4xx: false,
26
+ swap_attributes: {},
27
+ swap_urls: {},
28
+ }.freeze
29
+
30
+ TYPHOEUS_DEFAULTS = {
31
+ followlocation: true,
32
+ headers: {
33
+ "User-Agent" => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)",
34
+ "Accept" => "application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5",
35
+ },
36
+ connecttimeout: 10,
37
+ timeout: 30,
38
+ }.freeze
39
+
40
+ HYDRA_DEFAULTS = {
41
+ max_concurrency: 50,
42
+ }.freeze
43
+
44
+ PARALLEL_DEFAULTS = {
45
+ enable: true,
46
+ }.freeze
47
+
48
+ CACHE_DEFAULTS = {}.freeze
49
+
50
+ class << self
51
+ def generate_defaults(opts)
52
+ options = PROOFER_DEFAULTS.merge(opts)
53
+
54
+ options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
55
+ options[:hydra] = HTMLProofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
56
+
57
+ options[:parallel] = HTMLProofer::Configuration::PARALLEL_DEFAULTS.merge(opts[:parallel] || {})
58
+ options[:cache] = HTMLProofer::Configuration::CACHE_DEFAULTS.merge(opts[:cache] || {})
59
+
60
+ options.delete(:src)
61
+
62
+ options
63
+ end
64
+
65
+ def to_regex?(item)
66
+ if item.start_with?("/") && item.end_with?("/")
67
+ Regexp.new(item[1...-1])
68
+ else
69
+ item
70
+ end
71
+ end
72
+
73
+ def parse_json_option(option_name, config, symbolize_names: true)
74
+ raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
75
+ raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
76
+
77
+ return {} if config.nil?
78
+
79
+ raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
80
+
81
+ return {} if config.strip.empty?
82
+
83
+ begin
84
+ JSON.parse(config, { symbolize_names: symbolize_names })
85
+ rescue StandardError
86
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "addressable/uri"
4
+
5
+ module HTMLProofer
6
+ # Represents the element currently being processed
7
+ class Element
8
+ include HTMLProofer::Utils
9
+
10
+ attr_reader :node, :url, :base_url, :line, :content
11
+
12
+ def initialize(runner, node, base_url: nil)
13
+ @runner = runner
14
+ @node = node
15
+
16
+ @base_url = base_url
17
+ @url = Attribute::Url.new(runner, link_attribute, base_url: base_url)
18
+
19
+ @line = node.line
20
+ @content = node.content
21
+ end
22
+
23
+ def link_attribute
24
+ meta_content || src || srcset || href
25
+ end
26
+
27
+ def meta_content
28
+ return nil unless meta_tag?
29
+ return swap_attributes("content") if attribute_swapped?
30
+
31
+ @node["content"]
32
+ end
33
+
34
+ def meta_tag?
35
+ @node.name == "meta"
36
+ end
37
+
38
+ def src
39
+ return nil if !img_tag? && !script_tag? && !source_tag?
40
+ return swap_attributes("src") if attribute_swapped?
41
+
42
+ @node["src"]
43
+ end
44
+
45
+ def img_tag?
46
+ @node.name == "img"
47
+ end
48
+
49
+ def script_tag?
50
+ @node.name == "script"
51
+ end
52
+
53
+ def srcset
54
+ return nil if !img_tag? && !source_tag?
55
+ return swap_attributes("srcset") if attribute_swapped?
56
+
57
+ @node["srcset"]
58
+ end
59
+
60
+ def source_tag?
61
+ @node.name == "source"
62
+ end
63
+
64
+ def href
65
+ return nil if !a_tag? && !link_tag?
66
+ return swap_attributes("href") if attribute_swapped?
67
+
68
+ @node["href"]
69
+ end
70
+
71
+ def a_tag?
72
+ @node.name == "a"
73
+ end
74
+
75
+ def link_tag?
76
+ @node.name == "link"
77
+ end
78
+
79
+ def aria_hidden?
80
+ @node.attributes["aria-hidden"]&.value == "true"
81
+ end
82
+
83
+ def multiple_srcsets?
84
+ !blank?(srcset) && srcset.split(",").size > 1
85
+ end
86
+
87
+ def srcsets
88
+ return nil if blank?(srcset)
89
+
90
+ srcset.split(",").map(&:strip)
91
+ end
92
+
93
+ def multiple_sizes?
94
+ return false if blank?(srcsets)
95
+
96
+ srcsets.any? do |srcset|
97
+ !blank?(srcset) && srcset.split(" ").size > 1
98
+ end
99
+ end
100
+
101
+ def srcsets_wo_sizes
102
+ return nil if blank?(srcsets)
103
+
104
+ srcsets.map do |srcset|
105
+ srcset.split(" ").first
106
+ end
107
+ end
108
+
109
+ def ignore?
110
+ return true if @node.attributes["data-proofer-ignore"]
111
+ return true if ancestors_ignorable?
112
+
113
+ return true if url&.ignore?
114
+
115
+ false
116
+ end
117
+
118
+ private def attribute_swapped?
119
+ return false if blank?(@runner.options[:swap_attributes])
120
+
121
+ attrs = @runner.options[:swap_attributes][@node.name]
122
+
123
+ return true unless blank?(attrs)
124
+ end
125
+
126
+ private def swap_attributes(old_attr)
127
+ attrs = @runner.options[:swap_attributes][@node.name]
128
+
129
+ new_attr = attrs.find do |(o, _)|
130
+ o == old_attr
131
+ end&.last
132
+
133
+ return nil if blank?(new_attr)
134
+
135
+ @node[new_attr]
136
+ end
137
+
138
+ private def ancestors_ignorable?
139
+ ancestors_attributes = @node.ancestors.map { |a| a.respond_to?(:attributes) && a.attributes }
140
+ ancestors_attributes.pop # remove document at the end
141
+ ancestors_attributes.any? { |a| !a["data-proofer-ignore"].nil? }
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Failure
5
+ attr_reader :path, :check_name, :description, :status, :line, :content
6
+
7
+ def initialize(path, check_name, description, line: nil, status: nil, content: nil)
8
+ @path = path
9
+ @check_name = check_name
10
+ @description = description
11
+
12
+ @line = line
13
+ @status = status
14
+ @content = content
15
+ end
16
+ end
17
+ end
@@ -1,21 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'yell'
4
- require 'rainbow'
3
+ require "yell"
4
+ require "rainbow"
5
5
 
6
6
  module HTMLProofer
7
7
  class Log
8
8
  include Yell::Loggable
9
9
 
10
- STDOUT_LEVELS = %i[debug info warn].freeze
11
- STDERR_LEVELS = %i[error fatal].freeze
10
+ STDOUT_LEVELS = [:debug, :info, :warn].freeze
11
+ STDERR_LEVELS = [:error, :fatal].freeze
12
12
 
13
13
  def initialize(log_level)
14
14
  @logger = Yell.new(format: false, \
15
- name: 'HTMLProofer', \
16
- level: "gte.#{log_level}") do |l|
17
- l.adapter :stdout, level: 'lte.warn'
18
- l.adapter :stderr, level: 'gte.error'
15
+ name: "HTMLProofer", \
16
+ level: "gte.#{log_level}") do |l|
17
+ l.adapter(:stdout, level: "lte.warn")
18
+ l.adapter(:stderr, level: "gte.error")
19
19
  end
20
20
  end
21
21
 
@@ -24,23 +24,23 @@ module HTMLProofer
24
24
  end
25
25
 
26
26
  def log_with_color(level, message)
27
- @logger.send level, colorize(level, message)
27
+ @logger.send(level, colorize(level, message))
28
28
  end
29
29
 
30
30
  def colorize(level, message)
31
31
  color = case level
32
- when :debug
33
- :cyan
34
- when :info
35
- :blue
36
- when :warn
37
- :yellow
38
- when :error, :fatal
39
- :red
40
- end
32
+ when :debug
33
+ :cyan
34
+ when :info
35
+ :blue
36
+ when :warn
37
+ :yellow
38
+ when :error, :fatal
39
+ :red
40
+ end
41
41
 
42
42
  if (STDOUT_LEVELS.include?(level) && $stdout.isatty) || \
43
- (STDERR_LEVELS.include?(level) && $stderr.isatty)
43
+ (STDERR_LEVELS.include?(level) && $stderr.isatty)
44
44
  Rainbow(message).send(color)
45
45
  else
46
46
  message
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Reporter
5
+ class Cli < HTMLProofer::Reporter
6
+ def report
7
+ msg = failures.each_with_object([]) do |(check_name, failures), arr|
8
+ str = ["For the #{check_name} check, the following failures were found:\n"]
9
+
10
+ failures.each do |failure|
11
+ path_str = blank?(failure.path) ? "" : "At #{failure.path}"
12
+
13
+ line_str = failure.line.nil? ? "" : ":#{failure.line}"
14
+
15
+ path_and_line = "#{path_str}#{line_str}"
16
+ path_and_line = blank?(path_and_line) ? "" : "* #{path_and_line}:\n\n"
17
+
18
+ status_str = failure.status.nil? ? "" : " (status code #{failure.status})"
19
+
20
+ indent = blank?(path_and_line) ? "* " : " "
21
+ str << <<~MSG
22
+ #{path_and_line}#{indent}#{failure.description}#{status_str}
23
+ MSG
24
+ end
25
+
26
+ arr << str.join("\n")
27
+ end
28
+
29
+ @logger.log(:error, msg.join("\n"))
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Reporter
5
+ include HTMLProofer::Utils
6
+
7
+ attr_reader :failures
8
+
9
+ def initialize(logger: nil)
10
+ @logger = logger
11
+ end
12
+
13
+ def failures=(failures)
14
+ @failures = failures.group_by(&:check_name) \
15
+ .transform_values { |issues| issues.sort_by { |issue| [issue.path, issue.line] } } \
16
+ .sort
17
+ end
18
+
19
+ def report
20
+ raise NotImplementedError, "HTMLProofer::Reporter subclasses must implement #report"
21
+ end
22
+ end
23
+ end