html-proofer 2.6.4 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,72 +1,58 @@
1
- class LinkCheckable < ::HTML::Proofer::Checkable
2
-
3
- def href
4
- real_attr @href
5
- end
6
-
7
- def id
8
- real_attr @id
9
- end
10
-
11
- def name
12
- real_attr @name
13
- end
1
+ class LinkCheck < ::HTMLProofer::Check
2
+ include HTMLProofer::Utils
14
3
 
15
4
  def missing_href?
16
- href.nil? && name.nil? && id.nil?
5
+ blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
17
6
  end
18
7
 
19
8
  def placeholder?
20
- (id || name) && href.nil?
9
+ (!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
21
10
  end
22
- end
23
-
24
- class LinkCheck < ::HTML::Proofer::CheckRunner
25
- include HTML::Proofer::Utils
26
11
 
27
12
  def run
28
13
  @html.css('a, link').each do |node|
29
- link = LinkCheckable.new(node, self)
30
- line = node.line
14
+ @link = create_element(node)
15
+ line = @node.line
16
+
17
+ next if @link.ignore?
31
18
 
32
- next if link.ignore?
33
- next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
34
- next if link.placeholder?
35
- next if link.allow_hash_href? && link.href == '#'
19
+ next if placeholder?
20
+ next if @link.allow_hash_href? && @link.href == '#'
36
21
 
37
22
  # is it even a valid URL?
38
- unless link.valid?
39
- add_issue("#{link.href} is an invalid URL", line)
23
+ unless @link.valid?
24
+ add_issue("#{@link.href} is an invalid URL", line: line)
40
25
  next
41
26
  end
42
27
 
43
- check_schemes(link, line)
28
+ check_schemes(@link, line)
44
29
 
45
- # is there even a href?
46
- if link.missing_href?
47
- add_issue('anchor has no href attribute', line)
30
+ # is there even an href?
31
+ if missing_href?
32
+ # HTML5 allows dropping the href: http://git.io/vBX0z
33
+ next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
34
+ add_issue('anchor has no href attribute', line: line)
48
35
  next
49
36
  end
50
37
 
51
38
  # intentionally here because we still want valid? & missing_href? to execute
52
- next if link.non_http_remote?
53
-
39
+ next if @link.non_http_remote?
54
40
  # does the file even exist?
55
- if link.remote?
56
- add_to_external_urls(link.href, line)
41
+ if @link.remote?
42
+ add_to_external_urls(@link.href, line)
57
43
  next
58
- elsif !link.internal?
59
- add_issue("internally linking to #{link.href}, which does not exist", line) unless link.exists?
44
+ elsif !@link.internal? && !@link.exists?
45
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line)
60
46
  end
61
47
 
62
48
  # does the local directory have a trailing slash?
63
- if link.unslashed_directory? link.absolute_path
64
- add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line)
49
+ if @link.unslashed_directory? @link.absolute_path
50
+ add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line)
65
51
  next
66
52
  end
67
53
 
68
54
  # verify the target hash
69
- handle_hash(link, line) if link.hash
55
+ handle_hash(@link, line) if @link.hash
70
56
  end
71
57
 
72
58
  external_urls
@@ -79,26 +65,27 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
79
65
  when 'tel'
80
66
  handle_tel(link, line)
81
67
  when 'http'
82
- add_issue("#{link.href} is not an HTTPS link", line) if @options[:enforce_https]
68
+ return unless @options[:enforce_https]
69
+ add_issue("#{link.href} is not an HTTPS link", line: line)
83
70
  end
84
71
  end
85
72
 
86
73
  def handle_mailto(link, line)
87
74
  if link.path.empty?
88
- add_issue("#{link.href} contains no email address", line)
75
+ add_issue("#{link.href} contains no email address", line: line)
89
76
  elsif !link.path.include?('@')
90
- add_issue("#{link.href} contains an invalid email address", line)
77
+ add_issue("#{link.href} contains an invalid email address", line: line)
91
78
  end
92
79
  end
93
80
 
94
81
  def handle_tel(link, line)
95
- add_issue("#{link.href} contains no phone number", line) if link.path.empty?
82
+ add_issue("#{link.href} contains no phone number", line: line) if link.path.empty?
96
83
  end
97
84
 
98
85
  def handle_hash(link, line)
99
86
  if link.internal?
100
87
  unless hash_check @html, link.hash
101
- add_issue("linking to internal hash ##{link.hash} that does not exist", line)
88
+ add_issue("linking to internal hash ##{link.hash} that does not exist", line: line)
102
89
  end
103
90
  elsif link.external?
104
91
  external_link_check(link, line)
@@ -107,11 +94,11 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
107
94
 
108
95
  def external_link_check(link, line)
109
96
  if !link.exists?
110
- add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line)
97
+ add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line)
111
98
  else
112
99
  target_html = create_nokogiri link.absolute_path
113
100
  unless hash_check target_html, link.hash
114
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line)
101
+ add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line)
115
102
  end
116
103
  end
117
104
  end
@@ -122,7 +109,12 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
122
109
  "//*[case_insensitive_equals(@name, '#{href_hash}')]", \
123
110
  "//*[case_insensitive_equals(@id, '#{decoded_href_hash}')]", \
124
111
  "//*[case_insensitive_equals(@name, '#{decoded_href_hash}')]", \
125
- HTML::Proofer::XpathFunctions.new).length > 0
112
+ XpathFunctions.new).length > 0
126
113
  end
127
114
 
115
+ class XpathFunctions
116
+ def case_insensitive_equals(node_set, str_to_match)
117
+ node_set.find_all {|node| node.to_s.downcase == str_to_match.to_s.downcase }
118
+ end
119
+ end
128
120
  end
@@ -0,0 +1,28 @@
1
+ class ScriptCheck < ::HTMLProofer::Check
2
+ attr_reader :src
3
+
4
+ def missing_src?
5
+ !@script.src
6
+ end
7
+
8
+ def run
9
+ @html.css('script').each do |node|
10
+ @script = create_element(node)
11
+ line = node.line
12
+
13
+ next if @script.ignore?
14
+ next unless node.text.strip.empty?
15
+
16
+ # does the script exist?
17
+ if missing_src?
18
+ add_issue('script is empty and has no src attribute', line: line)
19
+ elsif @script.remote?
20
+ add_to_external_urls(@script.src, line)
21
+ elsif !@script.exists?
22
+ add_issue("internal script #{@script.src} does not exist", line: line)
23
+ end
24
+ end
25
+
26
+ external_urls
27
+ end
28
+ end
@@ -0,0 +1,56 @@
1
+ module HTMLProofer
2
+ module Configuration
3
+ require_relative 'version'
4
+
5
+ PROOFER_DEFAULTS = {
6
+ :allow_hash_href => false,
7
+ :alt_ignore => [],
8
+ :assume_extension => false,
9
+ :check_external_hash => false,
10
+ :check_favicon => false,
11
+ :check_html => false,
12
+ :checks_to_ignore => [],
13
+ :directory_index_file => 'index.html',
14
+ :disable_external => false,
15
+ :empty_alt_ignore => false,
16
+ :enforce_https => false,
17
+ :error_sort => :path,
18
+ :extension => '.html',
19
+ :external_only => false,
20
+ :file_ignore => [],
21
+ :http_status_ignore => [],
22
+ :log_level => :info,
23
+ :only_4xx => false,
24
+ :url_ignore => [],
25
+ :url_swap => []
26
+ }
27
+
28
+ TYPHOEUS_DEFAULTS = {
29
+ :followlocation => true,
30
+ :headers => {
31
+ 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
32
+ }
33
+ }
34
+
35
+ HYDRA_DEFAULTS = {
36
+ :max_concurrency => 50
37
+ }
38
+
39
+ PARALLEL_DEFAULTS = {}
40
+
41
+ VALIDATION_DEFAULTS = {
42
+ :report_script_embeds => false,
43
+ :report_invalid_tags => false
44
+ }
45
+
46
+ CACHE_DEFAULTS = {}
47
+
48
+ def self.to_regex?(item)
49
+ if item.start_with?('/') && item.end_with?('/')
50
+ Regexp.new item[1...-1]
51
+ else
52
+ item
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,165 @@
1
+ require 'addressable/uri'
2
+ require_relative './utils'
3
+
4
+ module HTMLProofer
5
+ # Represents the element currently being processed
6
+ class Element
7
+ include HTMLProofer::Utils
8
+
9
+ attr_reader :id, :name, :alt, :href, :link, :src, :line
10
+
11
+ def initialize(obj, check)
12
+ # Contruct readable ivars for every element
13
+ obj.attributes.each_pair do |attribute, value|
14
+ name = "#{attribute.tr('-:.', '_')}".to_sym
15
+ (class << self; self; end).send(:attr_reader, name)
16
+ instance_variable_set("@#{name}", value.value)
17
+ end
18
+
19
+ @text = obj.content
20
+ @check = check
21
+ @checked_paths = {}
22
+ @type = check.class.name
23
+ @line = obj.line
24
+
25
+ # fix up missing protocols
26
+ @href.insert 0, 'http:' if @href =~ %r{^//}
27
+ @src.insert 0, 'http:' if @src =~ %r{^//}
28
+ end
29
+
30
+ def url
31
+ url = @src || @srcset || @href || ''
32
+ return url if @check.options[:url_swap].empty?
33
+ swap(url, @check.options[:url_swap])
34
+ end
35
+
36
+ def valid?
37
+ !parts.nil?
38
+ end
39
+
40
+ def parts
41
+ @parts ||= Addressable::URI.parse url
42
+ rescue URI::Error, Addressable::URI::InvalidURIError
43
+ @parts = nil
44
+ end
45
+
46
+ def path
47
+ Addressable::URI.unencode parts.path unless parts.nil?
48
+ end
49
+
50
+ def hash
51
+ parts.fragment unless parts.nil?
52
+ end
53
+
54
+ def scheme
55
+ parts.scheme unless parts.nil?
56
+ end
57
+
58
+ # path is to an external server
59
+ def remote?
60
+ %w( http https ).include? scheme
61
+ end
62
+
63
+ def non_http_remote?
64
+ !scheme.nil? && !remote?
65
+ end
66
+
67
+ def ignore?
68
+ return true if @data_proofer_ignore
69
+
70
+ return true if url.match(/^javascript:/)
71
+
72
+ # ignore base64 encoded images
73
+ if %w(ImageCheck FaviconCheck).include? @type
74
+ return true if url.match(/^data:image/)
75
+ end
76
+
77
+ # ignore user defined URLs
78
+ return true if ignores_pattern_check(@check.options[:url_ignore])
79
+
80
+ # ignore user defined alts
81
+ return false unless 'ImageCheck' == @type
82
+ return true if ignores_pattern_check(@check.options[:alt_ignore])
83
+ end
84
+
85
+ def ignore_empty_alt?
86
+ @check.options[:empty_alt_ignore]
87
+ end
88
+
89
+ def allow_hash_href?
90
+ @check.options[:allow_hash_href]
91
+ end
92
+
93
+ # path is external to the file
94
+ def external?
95
+ !internal?
96
+ end
97
+
98
+ # path is an anchor or a query
99
+ def internal?
100
+ url.start_with? '#', '?'
101
+ end
102
+
103
+ def file_path
104
+ return if path.nil?
105
+
106
+ path_dot_ext = ''
107
+
108
+ if @check.options[:assume_extension]
109
+ path_dot_ext = path + @check.options[:extension]
110
+ end
111
+
112
+ if path =~ %r{^/} # path relative to root
113
+ base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
114
+ elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
115
+ base = File.dirname @check.path
116
+ elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
117
+ base = File.dirname @check.path
118
+ else # relative link, path is a directory
119
+ base = @check.path
120
+ end
121
+
122
+ file = File.join base, path
123
+
124
+ # implicit index support
125
+ if File.directory?(file) && !unslashed_directory?(file)
126
+ file = File.join file, @check.options[:directory_index_file]
127
+ elsif @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
128
+ file = "#{file}#{@check.options[:extension]}"
129
+ end
130
+
131
+ file
132
+ end
133
+
134
+ # checks if a file exists relative to the current pwd
135
+ def exists?
136
+ return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
137
+ @checked_paths[absolute_path] = File.exist? absolute_path
138
+ end
139
+
140
+ def absolute_path
141
+ path = file_path || @check.path
142
+ File.expand_path path, Dir.pwd
143
+ end
144
+
145
+ def ignores_pattern_check(links)
146
+ links.each do |ignore|
147
+ if ignore.is_a? String
148
+ return true if ignore == url
149
+ elsif ignore.is_a? Regexp
150
+ return true if ignore =~ url
151
+ end
152
+ end
153
+
154
+ false
155
+ end
156
+
157
+ def unslashed_directory?(file)
158
+ File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
159
+ end
160
+
161
+ def follow_location?
162
+ @check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
163
+ end
164
+ end
165
+ end
@@ -1,18 +1,16 @@
1
- # encoding: utf-8
2
- class HTML::Proofer::CheckRunner
3
-
1
+ module HTMLProofer
4
2
  class Issue
5
- attr_reader :path, :desc, :status, :line_number
3
+ attr_reader :path, :desc, :status, :line
6
4
 
7
- def initialize(path, desc, line_number = nil, status = -1)
8
- @line_number = line_number.nil? ? '' : " (line #{line_number})"
5
+ def initialize(path, desc, line: nil, status: -1)
6
+ @line = line.nil? ? '' : " (line #{line})"
9
7
  @path = path
10
8
  @desc = desc
11
9
  @status = status
12
10
  end
13
11
 
14
12
  def to_s
15
- "#{@path}: #{@desc}#{@line_number}"
13
+ "#{@path}: #{@desc}#{@line}"
16
14
  end
17
15
  end
18
16
 
@@ -48,13 +46,13 @@ class HTML::Proofer::CheckRunner
48
46
 
49
47
  sorted_issues.each do |issue|
50
48
  if matcher != issue.send(first_report)
51
- @logger.log :error, :red, "- #{issue.send(first_report)}"
49
+ @logger.log :error, "- #{issue.send(first_report)}"
52
50
  matcher = issue.send(first_report)
53
51
  end
54
52
  if first_report == :status
55
- @logger.log :error, :red, " * #{issue}"
53
+ @logger.log :error, " * #{issue}"
56
54
  else
57
- @logger.log :error, :red, " * #{issue.send(second_report)}#{issue.line_number}"
55
+ @logger.log :error, " * #{issue.send(second_report)}#{issue.line}"
58
56
  end
59
57
  end
60
58
  end