html-proofer 2.6.4 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,72 +1,58 @@
1
- class LinkCheckable < ::HTML::Proofer::Checkable
2
-
3
- def href
4
- real_attr @href
5
- end
6
-
7
- def id
8
- real_attr @id
9
- end
10
-
11
- def name
12
- real_attr @name
13
- end
1
+ class LinkCheck < ::HTMLProofer::Check
2
+ include HTMLProofer::Utils
14
3
 
15
4
  def missing_href?
16
- href.nil? && name.nil? && id.nil?
5
+ blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
17
6
  end
18
7
 
19
8
  def placeholder?
20
- (id || name) && href.nil?
9
+ (!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
21
10
  end
22
- end
23
-
24
- class LinkCheck < ::HTML::Proofer::CheckRunner
25
- include HTML::Proofer::Utils
26
11
 
27
12
  def run
28
13
  @html.css('a, link').each do |node|
29
- link = LinkCheckable.new(node, self)
30
- line = node.line
14
+ @link = create_element(node)
15
+ line = @node.line
16
+
17
+ next if @link.ignore?
31
18
 
32
- next if link.ignore?
33
- next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
34
- next if link.placeholder?
35
- next if link.allow_hash_href? && link.href == '#'
19
+ next if placeholder?
20
+ next if @link.allow_hash_href? && @link.href == '#'
36
21
 
37
22
  # is it even a valid URL?
38
- unless link.valid?
39
- add_issue("#{link.href} is an invalid URL", line)
23
+ unless @link.valid?
24
+ add_issue("#{@link.href} is an invalid URL", line: line)
40
25
  next
41
26
  end
42
27
 
43
- check_schemes(link, line)
28
+ check_schemes(@link, line)
44
29
 
45
- # is there even a href?
46
- if link.missing_href?
47
- add_issue('anchor has no href attribute', line)
30
+ # is there even an href?
31
+ if missing_href?
32
+ # HTML5 allows dropping the href: http://git.io/vBX0z
33
+ next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
34
+ add_issue('anchor has no href attribute', line: line)
48
35
  next
49
36
  end
50
37
 
51
38
  # intentionally here because we still want valid? & missing_href? to execute
52
- next if link.non_http_remote?
53
-
39
+ next if @link.non_http_remote?
54
40
  # does the file even exist?
55
- if link.remote?
56
- add_to_external_urls(link.href, line)
41
+ if @link.remote?
42
+ add_to_external_urls(@link.href, line)
57
43
  next
58
- elsif !link.internal?
59
- add_issue("internally linking to #{link.href}, which does not exist", line) unless link.exists?
44
+ elsif !@link.internal? && !@link.exists?
45
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line)
60
46
  end
61
47
 
62
48
  # does the local directory have a trailing slash?
63
- if link.unslashed_directory? link.absolute_path
64
- add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line)
49
+ if @link.unslashed_directory? @link.absolute_path
50
+ add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line)
65
51
  next
66
52
  end
67
53
 
68
54
  # verify the target hash
69
- handle_hash(link, line) if link.hash
55
+ handle_hash(@link, line) if @link.hash
70
56
  end
71
57
 
72
58
  external_urls
@@ -79,26 +65,27 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
79
65
  when 'tel'
80
66
  handle_tel(link, line)
81
67
  when 'http'
82
- add_issue("#{link.href} is not an HTTPS link", line) if @options[:enforce_https]
68
+ return unless @options[:enforce_https]
69
+ add_issue("#{link.href} is not an HTTPS link", line: line)
83
70
  end
84
71
  end
85
72
 
86
73
  def handle_mailto(link, line)
87
74
  if link.path.empty?
88
- add_issue("#{link.href} contains no email address", line)
75
+ add_issue("#{link.href} contains no email address", line: line)
89
76
  elsif !link.path.include?('@')
90
- add_issue("#{link.href} contains an invalid email address", line)
77
+ add_issue("#{link.href} contains an invalid email address", line: line)
91
78
  end
92
79
  end
93
80
 
94
81
  def handle_tel(link, line)
95
- add_issue("#{link.href} contains no phone number", line) if link.path.empty?
82
+ add_issue("#{link.href} contains no phone number", line: line) if link.path.empty?
96
83
  end
97
84
 
98
85
  def handle_hash(link, line)
99
86
  if link.internal?
100
87
  unless hash_check @html, link.hash
101
- add_issue("linking to internal hash ##{link.hash} that does not exist", line)
88
+ add_issue("linking to internal hash ##{link.hash} that does not exist", line: line)
102
89
  end
103
90
  elsif link.external?
104
91
  external_link_check(link, line)
@@ -107,11 +94,11 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
107
94
 
108
95
  def external_link_check(link, line)
109
96
  if !link.exists?
110
- add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line)
97
+ add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line)
111
98
  else
112
99
  target_html = create_nokogiri link.absolute_path
113
100
  unless hash_check target_html, link.hash
114
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line)
101
+ add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line)
115
102
  end
116
103
  end
117
104
  end
@@ -122,7 +109,12 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
122
109
  "//*[case_insensitive_equals(@name, '#{href_hash}')]", \
123
110
  "//*[case_insensitive_equals(@id, '#{decoded_href_hash}')]", \
124
111
  "//*[case_insensitive_equals(@name, '#{decoded_href_hash}')]", \
125
- HTML::Proofer::XpathFunctions.new).length > 0
112
+ XpathFunctions.new).length > 0
126
113
  end
127
114
 
115
+ class XpathFunctions
116
+ def case_insensitive_equals(node_set, str_to_match)
117
+ node_set.find_all {|node| node.to_s.downcase == str_to_match.to_s.downcase }
118
+ end
119
+ end
128
120
  end
@@ -0,0 +1,28 @@
1
+ class ScriptCheck < ::HTMLProofer::Check
2
+ attr_reader :src
3
+
4
+ def missing_src?
5
+ !@script.src
6
+ end
7
+
8
+ def run
9
+ @html.css('script').each do |node|
10
+ @script = create_element(node)
11
+ line = node.line
12
+
13
+ next if @script.ignore?
14
+ next unless node.text.strip.empty?
15
+
16
+ # does the script exist?
17
+ if missing_src?
18
+ add_issue('script is empty and has no src attribute', line: line)
19
+ elsif @script.remote?
20
+ add_to_external_urls(@script.src, line)
21
+ elsif !@script.exists?
22
+ add_issue("internal script #{@script.src} does not exist", line: line)
23
+ end
24
+ end
25
+
26
+ external_urls
27
+ end
28
+ end
@@ -0,0 +1,56 @@
1
+ module HTMLProofer
2
+ module Configuration
3
+ require_relative 'version'
4
+
5
+ PROOFER_DEFAULTS = {
6
+ :allow_hash_href => false,
7
+ :alt_ignore => [],
8
+ :assume_extension => false,
9
+ :check_external_hash => false,
10
+ :check_favicon => false,
11
+ :check_html => false,
12
+ :checks_to_ignore => [],
13
+ :directory_index_file => 'index.html',
14
+ :disable_external => false,
15
+ :empty_alt_ignore => false,
16
+ :enforce_https => false,
17
+ :error_sort => :path,
18
+ :extension => '.html',
19
+ :external_only => false,
20
+ :file_ignore => [],
21
+ :http_status_ignore => [],
22
+ :log_level => :info,
23
+ :only_4xx => false,
24
+ :url_ignore => [],
25
+ :url_swap => []
26
+ }
27
+
28
+ TYPHOEUS_DEFAULTS = {
29
+ :followlocation => true,
30
+ :headers => {
31
+ 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
32
+ }
33
+ }
34
+
35
+ HYDRA_DEFAULTS = {
36
+ :max_concurrency => 50
37
+ }
38
+
39
+ PARALLEL_DEFAULTS = {}
40
+
41
+ VALIDATION_DEFAULTS = {
42
+ :report_script_embeds => false,
43
+ :report_invalid_tags => false
44
+ }
45
+
46
+ CACHE_DEFAULTS = {}
47
+
48
+ def self.to_regex?(item)
49
+ if item.start_with?('/') && item.end_with?('/')
50
+ Regexp.new item[1...-1]
51
+ else
52
+ item
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,165 @@
1
+ require 'addressable/uri'
2
+ require_relative './utils'
3
+
4
+ module HTMLProofer
5
+ # Represents the element currently being processed
6
+ class Element
7
+ include HTMLProofer::Utils
8
+
9
+ attr_reader :id, :name, :alt, :href, :link, :src, :line
10
+
11
+ def initialize(obj, check)
12
+ # Contruct readable ivars for every element
13
+ obj.attributes.each_pair do |attribute, value|
14
+ name = "#{attribute.tr('-:.', '_')}".to_sym
15
+ (class << self; self; end).send(:attr_reader, name)
16
+ instance_variable_set("@#{name}", value.value)
17
+ end
18
+
19
+ @text = obj.content
20
+ @check = check
21
+ @checked_paths = {}
22
+ @type = check.class.name
23
+ @line = obj.line
24
+
25
+ # fix up missing protocols
26
+ @href.insert 0, 'http:' if @href =~ %r{^//}
27
+ @src.insert 0, 'http:' if @src =~ %r{^//}
28
+ end
29
+
30
+ def url
31
+ url = @src || @srcset || @href || ''
32
+ return url if @check.options[:url_swap].empty?
33
+ swap(url, @check.options[:url_swap])
34
+ end
35
+
36
+ def valid?
37
+ !parts.nil?
38
+ end
39
+
40
+ def parts
41
+ @parts ||= Addressable::URI.parse url
42
+ rescue URI::Error, Addressable::URI::InvalidURIError
43
+ @parts = nil
44
+ end
45
+
46
+ def path
47
+ Addressable::URI.unencode parts.path unless parts.nil?
48
+ end
49
+
50
+ def hash
51
+ parts.fragment unless parts.nil?
52
+ end
53
+
54
+ def scheme
55
+ parts.scheme unless parts.nil?
56
+ end
57
+
58
+ # path is to an external server
59
+ def remote?
60
+ %w( http https ).include? scheme
61
+ end
62
+
63
+ def non_http_remote?
64
+ !scheme.nil? && !remote?
65
+ end
66
+
67
+ def ignore?
68
+ return true if @data_proofer_ignore
69
+
70
+ return true if url.match(/^javascript:/)
71
+
72
+ # ignore base64 encoded images
73
+ if %w(ImageCheck FaviconCheck).include? @type
74
+ return true if url.match(/^data:image/)
75
+ end
76
+
77
+ # ignore user defined URLs
78
+ return true if ignores_pattern_check(@check.options[:url_ignore])
79
+
80
+ # ignore user defined alts
81
+ return false unless 'ImageCheck' == @type
82
+ return true if ignores_pattern_check(@check.options[:alt_ignore])
83
+ end
84
+
85
+ def ignore_empty_alt?
86
+ @check.options[:empty_alt_ignore]
87
+ end
88
+
89
+ def allow_hash_href?
90
+ @check.options[:allow_hash_href]
91
+ end
92
+
93
+ # path is external to the file
94
+ def external?
95
+ !internal?
96
+ end
97
+
98
+ # path is an anchor or a query
99
+ def internal?
100
+ url.start_with? '#', '?'
101
+ end
102
+
103
+ def file_path
104
+ return if path.nil?
105
+
106
+ path_dot_ext = ''
107
+
108
+ if @check.options[:assume_extension]
109
+ path_dot_ext = path + @check.options[:extension]
110
+ end
111
+
112
+ if path =~ %r{^/} # path relative to root
113
+ base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
114
+ elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
115
+ base = File.dirname @check.path
116
+ elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
117
+ base = File.dirname @check.path
118
+ else # relative link, path is a directory
119
+ base = @check.path
120
+ end
121
+
122
+ file = File.join base, path
123
+
124
+ # implicit index support
125
+ if File.directory?(file) && !unslashed_directory?(file)
126
+ file = File.join file, @check.options[:directory_index_file]
127
+ elsif @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
128
+ file = "#{file}#{@check.options[:extension]}"
129
+ end
130
+
131
+ file
132
+ end
133
+
134
+ # checks if a file exists relative to the current pwd
135
+ def exists?
136
+ return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
137
+ @checked_paths[absolute_path] = File.exist? absolute_path
138
+ end
139
+
140
+ def absolute_path
141
+ path = file_path || @check.path
142
+ File.expand_path path, Dir.pwd
143
+ end
144
+
145
+ def ignores_pattern_check(links)
146
+ links.each do |ignore|
147
+ if ignore.is_a? String
148
+ return true if ignore == url
149
+ elsif ignore.is_a? Regexp
150
+ return true if ignore =~ url
151
+ end
152
+ end
153
+
154
+ false
155
+ end
156
+
157
+ def unslashed_directory?(file)
158
+ File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
159
+ end
160
+
161
+ def follow_location?
162
+ @check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
163
+ end
164
+ end
165
+ end
@@ -1,18 +1,16 @@
1
- # encoding: utf-8
2
- class HTML::Proofer::CheckRunner
3
-
1
+ module HTMLProofer
4
2
  class Issue
5
- attr_reader :path, :desc, :status, :line_number
3
+ attr_reader :path, :desc, :status, :line
6
4
 
7
- def initialize(path, desc, line_number = nil, status = -1)
8
- @line_number = line_number.nil? ? '' : " (line #{line_number})"
5
+ def initialize(path, desc, line: nil, status: -1)
6
+ @line = line.nil? ? '' : " (line #{line})"
9
7
  @path = path
10
8
  @desc = desc
11
9
  @status = status
12
10
  end
13
11
 
14
12
  def to_s
15
- "#{@path}: #{@desc}#{@line_number}"
13
+ "#{@path}: #{@desc}#{@line}"
16
14
  end
17
15
  end
18
16
 
@@ -48,13 +46,13 @@ class HTML::Proofer::CheckRunner
48
46
 
49
47
  sorted_issues.each do |issue|
50
48
  if matcher != issue.send(first_report)
51
- @logger.log :error, :red, "- #{issue.send(first_report)}"
49
+ @logger.log :error, "- #{issue.send(first_report)}"
52
50
  matcher = issue.send(first_report)
53
51
  end
54
52
  if first_report == :status
55
- @logger.log :error, :red, " * #{issue}"
53
+ @logger.log :error, " * #{issue}"
56
54
  else
57
- @logger.log :error, :red, " * #{issue.send(second_report)}#{issue.line_number}"
55
+ @logger.log :error, " * #{issue.send(second_report)}#{issue.line}"
58
56
  end
59
57
  end
60
58
  end