html-proofer 4.0.0.rc3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,118 +1,129 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Links < HTMLProofer::Check
4
- def run
5
- @html.css('a, link, source').each do |node|
6
- @link = create_element(node)
7
-
8
- next if @link.ignore?
9
-
10
- if !allow_hash_href? && @link.node['href'] == '#'
11
- add_failure('linking to internal hash #, which points to nowhere', line: @link.line, content: @link.content)
12
- next
3
+ module HTMLProofer
4
+ class Check
5
+ class Links < HTMLProofer::Check
6
+ def run
7
+ @html.css("a, link, source").each do |node|
8
+ @link = create_element(node)
9
+
10
+ next if @link.ignore?
11
+
12
+ if !allow_hash_href? && @link.node["href"] == "#"
13
+ add_failure("linking to internal hash #, which points to nowhere", line: @link.line, content: @link.content)
14
+ next
15
+ end
16
+
17
+ # is there even an href?
18
+ if blank?(@link.url.raw_attribute)
19
+ next if allow_missing_href?
20
+
21
+ add_failure("'#{@link.node.name}' tag is missing a reference", line: @link.line, content: @link.content)
22
+ next
23
+ end
24
+
25
+ # is it even a valid URL?
26
+ unless @link.url.valid?
27
+ add_failure("#{@link.href} is an invalid URL", line: @link.line, content: @link.content)
28
+ next
29
+ end
30
+
31
+ check_schemes
32
+
33
+ # intentionally down here because we still want valid? & missing_href? to execute
34
+ next if @link.url.non_http_remote?
35
+
36
+ if !@link.url.internal? && @link.url.remote?
37
+ check_sri if @runner.check_sri? && @link.link_tag?
38
+
39
+ # we need to skip these for now; although the domain main be valid,
40
+ # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
41
+ next if @link.node["rel"] == "dns-prefetch"
42
+
43
+ unless @link.url.path?
44
+ add_failure("#{@link.url.raw_attribute} is an invalid URL", line: @link.line, content: @link.content)
45
+ next
46
+ end
47
+
48
+ add_to_external_urls(@link.url, @link.line)
49
+ elsif @link.url.internal?
50
+ # does the local directory have a trailing slash?
51
+ if @link.url.unslashed_directory?(@link.url.absolute_path)
52
+ add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
53
+ line: @link.line, content: @link.content)
54
+ next
55
+ end
56
+
57
+ add_to_internal_urls(@link.url, @link.line)
58
+ end
59
+ end
13
60
  end
14
61
 
15
- # is there even an href?
16
- if blank?(@link.url.raw_attribute)
17
- next if allow_missing_href?
18
-
19
- add_failure("'#{@link.node.name}' tag is missing a reference", line: @link.line, content: @link.content)
20
- next
62
+ def allow_missing_href?
63
+ @runner.options[:allow_missing_href]
21
64
  end
22
65
 
23
- # is it even a valid URL?
24
- unless @link.url.valid?
25
- add_failure("#{@link.href} is an invalid URL", line: @link.line, content: @link.content)
26
- next
66
+ def allow_hash_href?
67
+ @runner.options[:allow_hash_href]
27
68
  end
28
69
 
29
- check_schemes
30
-
31
- # intentionally down here because we still want valid? & missing_href? to execute
32
- next if @link.url.non_http_remote?
70
+ def check_schemes
71
+ case @link.url.scheme
72
+ when "mailto"
73
+ handle_mailto
74
+ when "tel"
75
+ handle_tel
76
+ when "http"
77
+ return unless @runner.options[:enforce_https]
33
78
 
34
- if !@link.url.internal? && @link.url.remote?
35
- check_sri if @runner.check_sri? && @link.link_tag?
36
-
37
- # we need to skip these for now; although the domain main be valid,
38
- # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
39
- next if @link.node['rel'] == 'dns-prefetch'
40
-
41
- unless @link.url.path?
42
- add_failure("#{@link.url.raw_attribute} is an invalid URL", line: @link.line, content: @link.content)
43
- next
79
+ add_failure("#{@link.url.raw_attribute} is not an HTTPS link", line: @link.line, content: @link.content)
44
80
  end
45
-
46
- add_to_external_urls(@link.url, @link.line)
47
- elsif @link.url.internal?
48
- # does the local directory have a trailing slash?
49
- add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash", line: @link.line, content: @link.content) if @link.url.unslashed_directory?(@link.url.absolute_path)
50
-
51
- add_to_internal_urls(@link.url, @link.line)
52
81
  end
53
- end
54
-
55
- external_urls
56
- end
57
-
58
- def allow_missing_href?
59
- @runner.options[:allow_missing_href]
60
- end
61
-
62
- def allow_hash_href?
63
- @runner.options[:allow_hash_href]
64
- end
65
-
66
- def check_schemes
67
- case @link.url.scheme
68
- when 'mailto'
69
- handle_mailto
70
- when 'tel'
71
- handle_tel
72
- when 'http'
73
- return unless @runner.options[:enforce_https]
74
-
75
- add_failure("#{@link.url.raw_attribute} is not an HTTPS link", line: @link.line, content: @link.content)
76
- end
77
- end
78
82
 
79
- def handle_mailto
80
- if @link.url.path.empty?
81
- add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line, content: @link.content) unless ignore_empty_mailto?
82
- elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
83
- add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line, content: @link.content)
84
- end
85
- end
83
+ def handle_mailto
84
+ if @link.url.path.empty?
85
+ add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line,
86
+ content: @link.content) unless ignore_empty_mailto?
87
+ elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
88
+ add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line,
89
+ content: @link.content)
90
+ end
91
+ end
86
92
 
87
- def handle_tel
88
- add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line, content: @link.content) if @link.url.path.empty?
89
- end
93
+ def handle_tel
94
+ add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line,
95
+ content: @link.content) if @link.url.path.empty?
96
+ end
90
97
 
91
- def ignore_empty_mailto?
92
- @runner.options[:ignore_empty_mailto]
93
- end
98
+ def ignore_empty_mailto?
99
+ @runner.options[:ignore_empty_mailto]
100
+ end
94
101
 
95
- # Whitelist for affected elements from Subresource Integrity specification
96
- # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
97
- SRI_REL_TYPES = %(stylesheet)
102
+ # Allowed elements from Subresource Integrity specification
103
+ # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
104
+ SRI_REL_TYPES = %(stylesheet)
105
+
106
+ def check_sri
107
+ return unless SRI_REL_TYPES.include?(@link.node["rel"])
108
+
109
+ if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
110
+ add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line,
111
+ content: @link.content)
112
+ elsif blank?(@link.node["integrity"])
113
+ add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
114
+ elsif blank?(@link.node["crossorigin"])
115
+ add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line,
116
+ content: @link.content)
117
+ end
118
+ end
98
119
 
99
- def check_sri
100
- return unless SRI_REL_TYPES.include?(@link.node['rel'])
120
+ private def source_tag?
121
+ @link.node.name == "source"
122
+ end
101
123
 
102
- if blank?(@link.node['integrity']) && blank?(@link.node['crossorigin'])
103
- add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
104
- elsif blank?(@link.node['integrity'])
105
- add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
106
- elsif blank?(@link.node['crossorigin'])
107
- add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line, content: @link.content)
124
+ private def anchor_tag?
125
+ @link.node.name == "a"
126
+ end
108
127
  end
109
128
  end
110
-
111
- private def source_tag?
112
- @link.node.name == 'source'
113
- end
114
-
115
- private def anchor_tag?
116
- @link.node.name == 'a'
117
- end
118
129
  end
@@ -1,34 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::OpenGraph < HTMLProofer::Check
4
- def run
5
- @html.css('meta[property="og:url"], meta[property="og:image"]').each do |node|
6
- @open_graph = create_element(node)
3
+ module HTMLProofer
4
+ class Check
5
+ class OpenGraph < HTMLProofer::Check
6
+ def run
7
+ @html.css('meta[property="og:url"], meta[property="og:image"]').each do |node|
8
+ @open_graph = create_element(node)
7
9
 
8
- next if @open_graph.ignore?
10
+ next if @open_graph.ignore?
9
11
 
10
- # does the open_graph exist?
11
- if missing_content?
12
- add_failure('open graph has no content attribute', line: @open_graph.line, content: @open_graph.content)
13
- elsif empty_content?
14
- add_failure('open graph content attribute is empty', line: @open_graph.line, content: @open_graph.content)
15
- elsif !@open_graph.url.valid?
16
- add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
17
- elsif @open_graph.url.remote?
18
- add_to_external_urls(@open_graph.url, @open_graph.line)
19
- else
20
- add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line, content: @open_graph.content) unless @open_graph.url.exists?
21
- end
22
- end
12
+ # does the open_graph exist?
13
+ if missing_content?
14
+ add_failure("open graph has no content attribute", line: @open_graph.line, content: @open_graph.content)
15
+ elsif empty_content?
16
+ add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
17
+ elsif !@open_graph.url.valid?
18
+ add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
19
+ elsif @open_graph.url.remote?
20
+ add_to_external_urls(@open_graph.url, @open_graph.line)
21
+ else
22
+ add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line,
23
+ content: @open_graph.content) unless @open_graph.url.exists?
24
+ end
25
+ end
23
26
 
24
- external_urls
25
- end
27
+ external_urls
28
+ end
26
29
 
27
- private def missing_content?
28
- @open_graph.node['content'].nil?
29
- end
30
+ private def missing_content?
31
+ @open_graph.node["content"].nil?
32
+ end
30
33
 
31
- private def empty_content?
32
- @open_graph.node['content'].empty?
34
+ private def empty_content?
35
+ @open_graph.node["content"].empty?
36
+ end
37
+ end
33
38
  end
34
39
  end
@@ -1,38 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Scripts < HTMLProofer::Check
4
- def run
5
- @html.css('script').each do |node|
6
- @script = create_element(node)
3
+ module HTMLProofer
4
+ class Check
5
+ class Scripts < HTMLProofer::Check
6
+ def run
7
+ @html.css("script").each do |node|
8
+ @script = create_element(node)
7
9
 
8
- next if @script.ignore?
9
- next unless @script.content.strip.empty?
10
+ next if @script.ignore?
11
+ next unless @script.content.strip.empty?
10
12
 
11
- # does the script exist?
12
- if missing_src?
13
- add_failure('script is empty and has no src attribute', line: @script.line, content: @script.content)
14
- elsif @script.url.remote?
15
- add_to_external_urls(@script.src, @script.line)
16
- check_sri if @runner.check_sri?
17
- elsif !@script.url.exists?
18
- add_failure("internal script reference #{@script.src} does not exist", line: @script.line, content: @script.content)
19
- end
20
- end
13
+ # does the script exist?
14
+ if missing_src?
15
+ add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
16
+ elsif @script.url.remote?
17
+ add_to_external_urls(@script.src, @script.line)
18
+ check_sri if @runner.check_sri?
19
+ elsif !@script.url.exists?
20
+ add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
21
+ content: @script.content)
22
+ end
23
+ end
21
24
 
22
- external_urls
23
- end
25
+ external_urls
26
+ end
24
27
 
25
- def missing_src?
26
- @script.node['src'].nil?
27
- end
28
+ def missing_src?
29
+ @script.node["src"].nil?
30
+ end
28
31
 
29
- def check_sri
30
- if blank?(@script.node['integrity']) && blank?(@script.node['crossorigin'])
31
- add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line, content: @script.content)
32
- elsif blank?(@script.node['integrity'])
33
- add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line, content: @script.content)
34
- elsif blank?(@script.node['crossorigin'])
35
- add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line, content: @script.content)
32
+ def check_sri
33
+ if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
34
+ add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line,
35
+ content: @script.content)
36
+ elsif blank?(@script.node["integrity"])
37
+ add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line,
38
+ content: @script.content)
39
+ elsif blank?(@script.node["crossorigin"])
40
+ add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line,
41
+ content: @script.content)
42
+ end
43
+ end
36
44
  end
37
45
  end
38
46
  end
@@ -21,11 +21,12 @@ module HTMLProofer
21
21
  end
22
22
 
23
23
  def run
24
- raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
24
+ raise NotImplementedError, "HTMLProofer::Check subclasses must implement #run"
25
25
  end
26
26
 
27
27
  def add_failure(description, line: nil, status: nil, content: nil)
28
- @failures << Failure.new(@runner.current_path, short_name, description, line: line, status: status, content: content)
28
+ @failures << Failure.new(@runner.current_filename, short_name, description, line: line, status: status,
29
+ content: content)
29
30
  end
30
31
 
31
32
  def self.subchecks(runner_options)
@@ -43,11 +44,11 @@ module HTMLProofer
43
44
  end
44
45
 
45
46
  def short_name
46
- self.class.name.split('::').last
47
+ self.class.name.split("::").last
47
48
  end
48
49
 
49
50
  def self.short_name
50
- name.split('::').last
51
+ name.split("::").last
51
52
  end
52
53
 
53
54
  def add_to_internal_urls(url, line)
@@ -57,10 +58,10 @@ module HTMLProofer
57
58
 
58
59
  metadata = {
59
60
  source: @runner.current_source,
60
- current_path: @runner.current_path,
61
+ filename: @runner.current_filename,
61
62
  line: line,
62
63
  base_url: base_url,
63
- found: nil
64
+ found: false,
64
65
  }
65
66
  @internal_urls[url_string] << metadata
66
67
  end
@@ -70,21 +71,21 @@ module HTMLProofer
70
71
 
71
72
  @external_urls[url_string] = [] if @external_urls[url_string].nil?
72
73
 
73
- @external_urls[url_string] << { filename: @runner.current_path, line: line }
74
+ @external_urls[url_string] << { filename: @runner.current_filename, line: line }
74
75
  end
75
76
 
76
77
  private def base_url
77
78
  return @base_url if defined?(@base_url)
78
79
 
79
- return (@base_url = '') if (base = @html.at_css('base')).nil?
80
+ return (@base_url = "") if (base = @html.at_css("base")).nil?
80
81
 
81
- @base_url = base['href']
82
+ @base_url = base["href"]
82
83
  end
83
84
 
84
85
  private def remove_ignored(html)
85
86
  return if html.nil?
86
87
 
87
- html.css('code, pre, tt').each(&:unlink)
88
+ html.css("code, pre, tt").each(&:unlink)
88
89
  html
89
90
  end
90
91
  end
@@ -2,45 +2,46 @@
2
2
 
3
3
  module HTMLProofer
4
4
  module Configuration
5
- DEFAULT_TESTS = %w[Links Images Scripts].freeze
5
+ DEFAULT_TESTS = ["Links", "Images", "Scripts"].freeze
6
6
 
7
7
  PROOFER_DEFAULTS = {
8
8
  allow_hash_href: true,
9
9
  allow_missing_href: false,
10
- assume_extension: '.html',
10
+ assume_extension: ".html",
11
11
  check_external_hash: true,
12
12
  checks: DEFAULT_TESTS,
13
- directory_index_file: 'index.html',
13
+ directory_index_file: "index.html",
14
14
  disable_external: false,
15
+ ignore_empty_alt: true,
15
16
  ignore_empty_mailto: false,
16
17
  ignore_files: [],
17
18
  ignore_missing_alt: false,
18
19
  ignore_status_codes: [],
19
20
  ignore_urls: [],
20
21
  enforce_https: true,
21
- extensions: ['.html'],
22
+ extensions: [".html"],
22
23
  log_level: :info,
23
24
  only_4xx: false,
24
25
  swap_attributes: {},
25
- swap_urls: {}
26
+ swap_urls: {},
26
27
  }.freeze
27
28
 
28
29
  TYPHOEUS_DEFAULTS = {
29
30
  followlocation: true,
30
31
  headers: {
31
- 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)",
32
- 'Accept' => 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
32
+ "User-Agent" => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)",
33
+ "Accept" => "application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5",
33
34
  },
34
35
  connecttimeout: 10,
35
- timeout: 30
36
+ timeout: 30,
36
37
  }.freeze
37
38
 
38
39
  HYDRA_DEFAULTS = {
39
- max_concurrency: 50
40
+ max_concurrency: 50,
40
41
  }.freeze
41
42
 
42
43
  PARALLEL_DEFAULTS = {
43
- enable: true
44
+ enable: true,
44
45
  }.freeze
45
46
 
46
47
  CACHE_DEFAULTS = {}.freeze
@@ -60,20 +61,20 @@ module HTMLProofer
60
61
  end
61
62
 
62
63
  def self.to_regex?(item)
63
- if item.start_with?('/') && item.end_with?('/')
64
- Regexp.new item[1...-1]
64
+ if item.start_with?("/") && item.end_with?("/")
65
+ Regexp.new(item[1...-1])
65
66
  else
66
67
  item
67
68
  end
68
69
  end
69
70
 
70
71
  def self.parse_json_option(option_name, config, symbolize_names: true)
71
- raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
72
- raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
72
+ raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
73
+ raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
73
74
 
74
75
  return {} if config.nil?
75
76
 
76
- raise ArgumentError, 'Must provide a JSON configuration in string format.' unless config.is_a?(String)
77
+ raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
77
78
 
78
79
  return {} if config.strip.empty?
79
80
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'addressable/uri'
3
+ require "addressable/uri"
4
4
 
5
5
  module HTMLProofer
6
6
  # Represents the element currently being processed
@@ -26,66 +26,66 @@ module HTMLProofer
26
26
 
27
27
  def meta_content
28
28
  return nil unless meta_tag?
29
- return swap_attributes('content') if attribute_swapped?
29
+ return swap_attributes("content") if attribute_swapped?
30
30
 
31
- @node['content']
31
+ @node["content"]
32
32
  end
33
33
 
34
34
  def meta_tag?
35
- @node.name == 'meta'
35
+ @node.name == "meta"
36
36
  end
37
37
 
38
38
  def src
39
39
  return nil if !img_tag? && !script_tag? && !source_tag?
40
- return swap_attributes('src') if attribute_swapped?
40
+ return swap_attributes("src") if attribute_swapped?
41
41
 
42
- @node['src']
42
+ @node["src"]
43
43
  end
44
44
 
45
45
  def img_tag?
46
- @node.name == 'img'
46
+ @node.name == "img"
47
47
  end
48
48
 
49
49
  def script_tag?
50
- @node.name == 'script'
50
+ @node.name == "script"
51
51
  end
52
52
 
53
53
  def srcset
54
54
  return nil if !img_tag? && !source_tag?
55
- return swap_attributes('srcset') if attribute_swapped?
55
+ return swap_attributes("srcset") if attribute_swapped?
56
56
 
57
- @node['srcset']
57
+ @node["srcset"]
58
58
  end
59
59
 
60
60
  def source_tag?
61
- @node.name == 'source'
61
+ @node.name == "source"
62
62
  end
63
63
 
64
64
  def href
65
65
  return nil if !a_tag? && !link_tag?
66
- return swap_attributes('href') if attribute_swapped?
66
+ return swap_attributes("href") if attribute_swapped?
67
67
 
68
- @node['href']
68
+ @node["href"]
69
69
  end
70
70
 
71
71
  def a_tag?
72
- @node.name == 'a'
72
+ @node.name == "a"
73
73
  end
74
74
 
75
75
  def link_tag?
76
- @node.name == 'link'
76
+ @node.name == "link"
77
77
  end
78
78
 
79
79
  def aria_hidden?
80
- @node.attributes['aria-hidden']&.value == 'true'
80
+ @node.attributes["aria-hidden"]&.value == "true"
81
81
  end
82
82
 
83
83
  def multiple_srcsets?
84
- !blank?(srcset) && srcset.split(',').size > 1
84
+ !blank?(srcset) && srcset.split(",").size > 1
85
85
  end
86
86
 
87
87
  def ignore?
88
- return true if @node.attributes['data-proofer-ignore']
88
+ return true if @node.attributes["data-proofer-ignore"]
89
89
  return true if ancestors_ignorable?
90
90
 
91
91
  return true if url&.ignore?
@@ -116,7 +116,7 @@ module HTMLProofer
116
116
  private def ancestors_ignorable?
117
117
  ancestors_attributes = @node.ancestors.map { |a| a.respond_to?(:attributes) && a.attributes }
118
118
  ancestors_attributes.pop # remove document at the end
119
- ancestors_attributes.any? { |a| !a['data-proofer-ignore'].nil? }
119
+ ancestors_attributes.any? { |a| !a["data-proofer-ignore"].nil? }
120
120
  end
121
121
  end
122
122
  end