html-proofer 4.0.0.rc2 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,118 +1,129 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Links < HTMLProofer::Check
4
- def run
5
- @html.css('a, link, source').each do |node|
6
- @link = create_element(node)
7
-
8
- next if @link.ignore?
9
-
10
- if !allow_hash_href? && @link.node['href'] == '#'
11
- add_failure('linking to internal hash #, which points to nowhere', line: @link.line, content: @link.content)
12
- next
3
+ module HTMLProofer
4
+ class Check
5
+ class Links < HTMLProofer::Check
6
+ def run
7
+ @html.css("a, link, source").each do |node|
8
+ @link = create_element(node)
9
+
10
+ next if @link.ignore?
11
+
12
+ if !allow_hash_href? && @link.node["href"] == "#"
13
+ add_failure("linking to internal hash #, which points to nowhere", line: @link.line, content: @link.content)
14
+ next
15
+ end
16
+
17
+ # is there even an href?
18
+ if blank?(@link.url.raw_attribute)
19
+ next if allow_missing_href?
20
+
21
+ add_failure("'#{@link.node.name}' tag is missing a reference", line: @link.line, content: @link.content)
22
+ next
23
+ end
24
+
25
+ # is it even a valid URL?
26
+ unless @link.url.valid?
27
+ add_failure("#{@link.href} is an invalid URL", line: @link.line, content: @link.content)
28
+ next
29
+ end
30
+
31
+ check_schemes
32
+
33
+ # intentionally down here because we still want valid? & missing_href? to execute
34
+ next if @link.url.non_http_remote?
35
+
36
+ if !@link.url.internal? && @link.url.remote?
37
+ check_sri if @runner.check_sri? && @link.link_tag?
38
+
39
+ # we need to skip these for now; although the domain main be valid,
40
+ # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
41
+ next if @link.node["rel"] == "dns-prefetch"
42
+
43
+ unless @link.url.path?
44
+ add_failure("#{@link.url.raw_attribute} is an invalid URL", line: @link.line, content: @link.content)
45
+ next
46
+ end
47
+
48
+ add_to_external_urls(@link.url, @link.line)
49
+ elsif @link.url.internal?
50
+ # does the local directory have a trailing slash?
51
+ if @link.url.unslashed_directory?(@link.url.absolute_path)
52
+ add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
53
+ line: @link.line, content: @link.content)
54
+ next
55
+ end
56
+
57
+ add_to_internal_urls(@link.url, @link.line)
58
+ end
59
+ end
13
60
  end
14
61
 
15
- # is there even an href?
16
- if blank?(@link.url.raw_attribute)
17
- next if allow_missing_href?
18
-
19
- add_failure("'#{@link.node.name}' tag is missing a reference", line: @link.line, content: @link.content)
20
- next
62
+ def allow_missing_href?
63
+ @runner.options[:allow_missing_href]
21
64
  end
22
65
 
23
- # is it even a valid URL?
24
- unless @link.url.valid?
25
- add_failure("#{@link.href} is an invalid URL", line: @link.line, content: @link.content)
26
- next
66
+ def allow_hash_href?
67
+ @runner.options[:allow_hash_href]
27
68
  end
28
69
 
29
- check_schemes
30
-
31
- # intentionally down here because we still want valid? & missing_href? to execute
32
- next if @link.url.non_http_remote?
70
+ def check_schemes
71
+ case @link.url.scheme
72
+ when "mailto"
73
+ handle_mailto
74
+ when "tel"
75
+ handle_tel
76
+ when "http"
77
+ return unless @runner.options[:enforce_https]
33
78
 
34
- if !@link.url.internal? && @link.url.remote?
35
- check_sri if @runner.check_sri? && @link.link_tag?
36
-
37
- # we need to skip these for now; although the domain main be valid,
38
- # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
39
- next if @link.node['rel'] == 'dns-prefetch'
40
-
41
- unless @link.url.path?
42
- add_failure("#{@link.url.raw_attribute} is an invalid URL", line: @link.line, content: @link.content)
43
- next
79
+ add_failure("#{@link.url.raw_attribute} is not an HTTPS link", line: @link.line, content: @link.content)
44
80
  end
45
-
46
- add_to_external_urls(@link.url, @link.line)
47
- elsif @link.url.internal?
48
- # does the local directory have a trailing slash?
49
- add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash", line: @link.line, content: @link.content) if @link.url.unslashed_directory?(@link.url.absolute_path)
50
-
51
- add_to_internal_urls(@link.url, @link.line)
52
81
  end
53
- end
54
-
55
- external_urls
56
- end
57
-
58
- def allow_missing_href?
59
- @runner.options[:allow_missing_href]
60
- end
61
-
62
- def allow_hash_href?
63
- @runner.options[:allow_hash_href]
64
- end
65
-
66
- def check_schemes
67
- case @link.url.scheme
68
- when 'mailto'
69
- handle_mailto
70
- when 'tel'
71
- handle_tel
72
- when 'http'
73
- return unless @runner.options[:enforce_https]
74
-
75
- add_failure("#{@link.url.raw_attribute} is not an HTTPS link", line: @link.line, content: @link.content)
76
- end
77
- end
78
82
 
79
- def handle_mailto
80
- if @link.url.path.empty?
81
- add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line, content: @link.content) unless ignore_empty_mailto?
82
- elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
83
- add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line, content: @link.content)
84
- end
85
- end
83
+ def handle_mailto
84
+ if @link.url.path.empty?
85
+ add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line,
86
+ content: @link.content) unless ignore_empty_mailto?
87
+ elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
88
+ add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line,
89
+ content: @link.content)
90
+ end
91
+ end
86
92
 
87
- def handle_tel
88
- add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line, content: @link.content) if @link.url.path.empty?
89
- end
93
+ def handle_tel
94
+ add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line,
95
+ content: @link.content) if @link.url.path.empty?
96
+ end
90
97
 
91
- def ignore_empty_mailto?
92
- @runner.options[:ignore_empty_mailto]
93
- end
98
+ def ignore_empty_mailto?
99
+ @runner.options[:ignore_empty_mailto]
100
+ end
94
101
 
95
- # Whitelist for affected elements from Subresource Integrity specification
96
- # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
97
- SRI_REL_TYPES = %(stylesheet)
102
+ # Allowed elements from Subresource Integrity specification
103
+ # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
104
+ SRI_REL_TYPES = %(stylesheet)
105
+
106
+ def check_sri
107
+ return unless SRI_REL_TYPES.include?(@link.node["rel"])
108
+
109
+ if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
110
+ add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line,
111
+ content: @link.content)
112
+ elsif blank?(@link.node["integrity"])
113
+ add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
114
+ elsif blank?(@link.node["crossorigin"])
115
+ add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line,
116
+ content: @link.content)
117
+ end
118
+ end
98
119
 
99
- def check_sri
100
- return unless SRI_REL_TYPES.include?(@link.node['rel'])
120
+ private def source_tag?
121
+ @link.node.name == "source"
122
+ end
101
123
 
102
- if blank?(@link.node['integrity']) && blank?(@link.node['crossorigin'])
103
- add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
104
- elsif blank?(@link.node['integrity'])
105
- add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
106
- elsif blank?(@link.node['crossorigin'])
107
- add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line, content: @link.content)
124
+ private def anchor_tag?
125
+ @link.node.name == "a"
126
+ end
108
127
  end
109
128
  end
110
-
111
- private def source_tag?
112
- @link.node.name == 'source'
113
- end
114
-
115
- private def anchor_tag?
116
- @link.node.name == 'a'
117
- end
118
129
  end
@@ -1,34 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::OpenGraph < HTMLProofer::Check
4
- def run
5
- @html.css('meta[property="og:url"], meta[property="og:image"]').each do |node|
6
- @open_graph = create_element(node)
3
+ module HTMLProofer
4
+ class Check
5
+ class OpenGraph < HTMLProofer::Check
6
+ def run
7
+ @html.css('meta[property="og:url"], meta[property="og:image"]').each do |node|
8
+ @open_graph = create_element(node)
7
9
 
8
- next if @open_graph.ignore?
10
+ next if @open_graph.ignore?
9
11
 
10
- # does the open_graph exist?
11
- if missing_content?
12
- add_failure('open graph has no content attribute', line: @open_graph.line, content: @open_graph.content)
13
- elsif empty_content?
14
- add_failure('open graph content attribute is empty', line: @open_graph.line, content: @open_graph.content)
15
- elsif !@open_graph.url.valid?
16
- add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
17
- elsif @open_graph.url.remote?
18
- add_to_external_urls(@open_graph.url, @open_graph.line)
19
- else
20
- add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line, content: @open_graph.content) unless @open_graph.url.exists?
21
- end
22
- end
12
+ # does the open_graph exist?
13
+ if missing_content?
14
+ add_failure("open graph has no content attribute", line: @open_graph.line, content: @open_graph.content)
15
+ elsif empty_content?
16
+ add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
17
+ elsif !@open_graph.url.valid?
18
+ add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
19
+ elsif @open_graph.url.remote?
20
+ add_to_external_urls(@open_graph.url, @open_graph.line)
21
+ else
22
+ add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line,
23
+ content: @open_graph.content) unless @open_graph.url.exists?
24
+ end
25
+ end
23
26
 
24
- external_urls
25
- end
27
+ external_urls
28
+ end
26
29
 
27
- private def missing_content?
28
- @open_graph.node['content'].nil?
29
- end
30
+ private def missing_content?
31
+ @open_graph.node["content"].nil?
32
+ end
30
33
 
31
- private def empty_content?
32
- @open_graph.node['content'].empty?
34
+ private def empty_content?
35
+ @open_graph.node["content"].empty?
36
+ end
37
+ end
33
38
  end
34
39
  end
@@ -1,38 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Check::Scripts < HTMLProofer::Check
4
- def run
5
- @html.css('script').each do |node|
6
- @script = create_element(node)
3
+ module HTMLProofer
4
+ class Check
5
+ class Scripts < HTMLProofer::Check
6
+ def run
7
+ @html.css("script").each do |node|
8
+ @script = create_element(node)
7
9
 
8
- next if @script.ignore?
9
- next unless @script.content.strip.empty?
10
+ next if @script.ignore?
11
+ next unless @script.content.strip.empty?
10
12
 
11
- # does the script exist?
12
- if missing_src?
13
- add_failure('script is empty and has no src attribute', line: @script.line, content: @script.content)
14
- elsif @script.url.remote?
15
- add_to_external_urls(@script.src, @script.line)
16
- check_sri if @runner.check_sri?
17
- elsif !@script.url.exists?
18
- add_failure("internal script reference #{@script.src} does not exist", line: @script.line, content: @script.content)
19
- end
20
- end
13
+ # does the script exist?
14
+ if missing_src?
15
+ add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
16
+ elsif @script.url.remote?
17
+ add_to_external_urls(@script.src, @script.line)
18
+ check_sri if @runner.check_sri?
19
+ elsif !@script.url.exists?
20
+ add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
21
+ content: @script.content)
22
+ end
23
+ end
21
24
 
22
- external_urls
23
- end
25
+ external_urls
26
+ end
24
27
 
25
- def missing_src?
26
- @script.node['src'].nil?
27
- end
28
+ def missing_src?
29
+ @script.node["src"].nil?
30
+ end
28
31
 
29
- def check_sri
30
- if blank?(@script.node['integrity']) && blank?(@script.node['crossorigin'])
31
- add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line, content: @script.content)
32
- elsif blank?(@script.node['integrity'])
33
- add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line, content: @script.content)
34
- elsif blank?(@script.node['crossorigin'])
35
- add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line, content: @script.content)
32
+ def check_sri
33
+ if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
34
+ add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line,
35
+ content: @script.content)
36
+ elsif blank?(@script.node["integrity"])
37
+ add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line,
38
+ content: @script.content)
39
+ elsif blank?(@script.node["crossorigin"])
40
+ add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line,
41
+ content: @script.content)
42
+ end
43
+ end
36
44
  end
37
45
  end
38
46
  end
@@ -21,11 +21,12 @@ module HTMLProofer
21
21
  end
22
22
 
23
23
  def run
24
- raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
24
+ raise NotImplementedError, "HTMLProofer::Check subclasses must implement #run"
25
25
  end
26
26
 
27
27
  def add_failure(description, line: nil, status: nil, content: nil)
28
- @failures << Failure.new(@runner.current_path, short_name, description, line: line, status: status, content: content)
28
+ @failures << Failure.new(@runner.current_filename, short_name, description, line: line, status: status,
29
+ content: content)
29
30
  end
30
31
 
31
32
  def self.subchecks(runner_options)
@@ -43,11 +44,11 @@ module HTMLProofer
43
44
  end
44
45
 
45
46
  def short_name
46
- self.class.name.split('::').last
47
+ self.class.name.split("::").last
47
48
  end
48
49
 
49
50
  def self.short_name
50
- name.split('::').last
51
+ name.split("::").last
51
52
  end
52
53
 
53
54
  def add_to_internal_urls(url, line)
@@ -57,10 +58,10 @@ module HTMLProofer
57
58
 
58
59
  metadata = {
59
60
  source: @runner.current_source,
60
- current_path: @runner.current_path,
61
+ filename: @runner.current_filename,
61
62
  line: line,
62
63
  base_url: base_url,
63
- found: nil
64
+ found: false,
64
65
  }
65
66
  @internal_urls[url_string] << metadata
66
67
  end
@@ -70,21 +71,21 @@ module HTMLProofer
70
71
 
71
72
  @external_urls[url_string] = [] if @external_urls[url_string].nil?
72
73
 
73
- @external_urls[url_string] << { filename: @runner.current_path, line: line }
74
+ @external_urls[url_string] << { filename: @runner.current_filename, line: line }
74
75
  end
75
76
 
76
77
  private def base_url
77
78
  return @base_url if defined?(@base_url)
78
79
 
79
- return (@base_url = '') if (base = @html.at_css('base')).nil?
80
+ return (@base_url = "") if (base = @html.at_css("base")).nil?
80
81
 
81
- @base_url = base['href']
82
+ @base_url = base["href"]
82
83
  end
83
84
 
84
85
  private def remove_ignored(html)
85
86
  return if html.nil?
86
87
 
87
- html.css('code, pre, tt').each(&:unlink)
88
+ html.css("code, pre, tt").each(&:unlink)
88
89
  html
89
90
  end
90
91
  end
@@ -2,45 +2,46 @@
2
2
 
3
3
  module HTMLProofer
4
4
  module Configuration
5
- DEFAULT_TESTS = %w[Links Images Scripts].freeze
5
+ DEFAULT_TESTS = ["Links", "Images", "Scripts"].freeze
6
6
 
7
7
  PROOFER_DEFAULTS = {
8
8
  allow_hash_href: true,
9
9
  allow_missing_href: false,
10
- assume_extension: '.html',
10
+ assume_extension: ".html",
11
11
  check_external_hash: true,
12
12
  checks: DEFAULT_TESTS,
13
- directory_index_file: 'index.html',
13
+ directory_index_file: "index.html",
14
14
  disable_external: false,
15
+ ignore_empty_alt: true,
15
16
  ignore_empty_mailto: false,
16
17
  ignore_files: [],
17
18
  ignore_missing_alt: false,
18
19
  ignore_status_codes: [],
19
20
  ignore_urls: [],
20
21
  enforce_https: true,
21
- extensions: ['.html'],
22
+ extensions: [".html"],
22
23
  log_level: :info,
23
24
  only_4xx: false,
24
25
  swap_attributes: {},
25
- swap_urls: {}
26
+ swap_urls: {},
26
27
  }.freeze
27
28
 
28
29
  TYPHOEUS_DEFAULTS = {
29
30
  followlocation: true,
30
31
  headers: {
31
- 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)",
32
- 'Accept' => 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
32
+ "User-Agent" => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)",
33
+ "Accept" => "application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5",
33
34
  },
34
35
  connecttimeout: 10,
35
- timeout: 30
36
+ timeout: 30,
36
37
  }.freeze
37
38
 
38
39
  HYDRA_DEFAULTS = {
39
- max_concurrency: 50
40
+ max_concurrency: 50,
40
41
  }.freeze
41
42
 
42
43
  PARALLEL_DEFAULTS = {
43
- enable: true
44
+ enable: true,
44
45
  }.freeze
45
46
 
46
47
  CACHE_DEFAULTS = {}.freeze
@@ -60,20 +61,20 @@ module HTMLProofer
60
61
  end
61
62
 
62
63
  def self.to_regex?(item)
63
- if item.start_with?('/') && item.end_with?('/')
64
- Regexp.new item[1...-1]
64
+ if item.start_with?("/") && item.end_with?("/")
65
+ Regexp.new(item[1...-1])
65
66
  else
66
67
  item
67
68
  end
68
69
  end
69
70
 
70
71
  def self.parse_json_option(option_name, config, symbolize_names: true)
71
- raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
72
- raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
72
+ raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
73
+ raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
73
74
 
74
75
  return {} if config.nil?
75
76
 
76
- raise ArgumentError, 'Must provide a JSON configuration in string format.' unless config.is_a?(String)
77
+ raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
77
78
 
78
79
  return {} if config.strip.empty?
79
80
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'addressable/uri'
3
+ require "addressable/uri"
4
4
 
5
5
  module HTMLProofer
6
6
  # Represents the element currently being processed
@@ -26,66 +26,66 @@ module HTMLProofer
26
26
 
27
27
  def meta_content
28
28
  return nil unless meta_tag?
29
- return swap_attributes('content') if attribute_swapped?
29
+ return swap_attributes("content") if attribute_swapped?
30
30
 
31
- @node['content']
31
+ @node["content"]
32
32
  end
33
33
 
34
34
  def meta_tag?
35
- @node.name == 'meta'
35
+ @node.name == "meta"
36
36
  end
37
37
 
38
38
  def src
39
39
  return nil if !img_tag? && !script_tag? && !source_tag?
40
- return swap_attributes('src') if attribute_swapped?
40
+ return swap_attributes("src") if attribute_swapped?
41
41
 
42
- @node['src']
42
+ @node["src"]
43
43
  end
44
44
 
45
45
  def img_tag?
46
- @node.name == 'img'
46
+ @node.name == "img"
47
47
  end
48
48
 
49
49
  def script_tag?
50
- @node.name == 'script'
50
+ @node.name == "script"
51
51
  end
52
52
 
53
53
  def srcset
54
54
  return nil if !img_tag? && !source_tag?
55
- return swap_attributes('srcset') if attribute_swapped?
55
+ return swap_attributes("srcset") if attribute_swapped?
56
56
 
57
- @node['srcset']
57
+ @node["srcset"]
58
58
  end
59
59
 
60
60
  def source_tag?
61
- @node.name == 'source'
61
+ @node.name == "source"
62
62
  end
63
63
 
64
64
  def href
65
65
  return nil if !a_tag? && !link_tag?
66
- return swap_attributes('href') if attribute_swapped?
66
+ return swap_attributes("href") if attribute_swapped?
67
67
 
68
- @node['href']
68
+ @node["href"]
69
69
  end
70
70
 
71
71
  def a_tag?
72
- @node.name == 'a'
72
+ @node.name == "a"
73
73
  end
74
74
 
75
75
  def link_tag?
76
- @node.name == 'link'
76
+ @node.name == "link"
77
77
  end
78
78
 
79
79
  def aria_hidden?
80
- @node.attributes['aria-hidden']&.value == 'true'
80
+ @node.attributes["aria-hidden"]&.value == "true"
81
81
  end
82
82
 
83
83
  def multiple_srcsets?
84
- !blank?(srcset) && srcset.split(',').size > 1
84
+ !blank?(srcset) && srcset.split(",").size > 1
85
85
  end
86
86
 
87
87
  def ignore?
88
- return true if @node.attributes['data-proofer-ignore']
88
+ return true if @node.attributes["data-proofer-ignore"]
89
89
  return true if ancestors_ignorable?
90
90
 
91
91
  return true if url&.ignore?
@@ -116,7 +116,7 @@ module HTMLProofer
116
116
  private def ancestors_ignorable?
117
117
  ancestors_attributes = @node.ancestors.map { |a| a.respond_to?(:attributes) && a.attributes }
118
118
  ancestors_attributes.pop # remove document at the end
119
- ancestors_attributes.any? { |a| !a['data-proofer-ignore'].nil? }
119
+ ancestors_attributes.any? { |a| !a["data-proofer-ignore"].nil? }
120
120
  end
121
121
  end
122
122
  end