html-proofer 1.6.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/README.md +74 -56
  4. data/Rakefile +4 -6
  5. data/bin/htmlproof +46 -36
  6. data/html-proofer.gemspec +22 -22
  7. data/lib/html/proofer/check_runner/issue.rb +62 -0
  8. data/lib/html/proofer/{check.rb → check_runner.rb} +11 -19
  9. data/lib/html/proofer/checkable.rb +42 -28
  10. data/lib/html/proofer/checks/favicon.rb +6 -6
  11. data/lib/html/proofer/checks/html.rb +11 -12
  12. data/lib/html/proofer/checks/images.rb +11 -11
  13. data/lib/html/proofer/checks/links.rb +30 -28
  14. data/lib/html/proofer/checks/scripts.rb +7 -8
  15. data/lib/html/proofer/log.rb +38 -0
  16. data/lib/html/proofer/url_validator.rb +135 -0
  17. data/lib/html/proofer/utils.rb +24 -0
  18. data/lib/html/proofer/version.rb +1 -1
  19. data/lib/html/proofer.rb +95 -199
  20. data/spec/html/proofer/command_spec.rb +82 -0
  21. data/spec/html/proofer/favicon_spec.rb +20 -20
  22. data/spec/html/proofer/fixtures/images/srcSetCheck.html +7 -0
  23. data/spec/html/proofer/fixtures/images/srcSetIgnorable.html +13 -0
  24. data/spec/html/proofer/fixtures/images/srcSetMissingAlt.html +7 -0
  25. data/spec/html/proofer/fixtures/images/srcSetMissingImage.html +7 -0
  26. data/spec/html/proofer/fixtures/links/erstiebegru/314/210/303/237ung.html +1 -0
  27. data/spec/html/proofer/fixtures/links/erstiebegr/303/274/303/237ung.html +1 -0
  28. data/spec/html/proofer/fixtures/links/file.foo +11 -0
  29. data/spec/html/proofer/fixtures/links/folder/multiples/catalog/file.html +8 -0
  30. data/spec/html/proofer/fixtures/links/folder/multiples/javadoc/file.html +8 -0
  31. data/spec/html/proofer/fixtures/links/nodupe.html +1 -1
  32. data/spec/html/proofer/fixtures/links/redirected_error.html +1 -0
  33. data/spec/html/proofer/fixtures/links/rootLink/rootLink.html +0 -1
  34. data/spec/html/proofer/fixtures/links/urlencoded-href.html +2 -0
  35. data/spec/html/proofer/fixtures/links/utf8Link.html +2 -0
  36. data/spec/html/proofer/fixtures/utils/lang-jp.html +1 -0
  37. data/spec/html/proofer/html_spec.rb +25 -25
  38. data/spec/html/proofer/images_spec.rb +59 -35
  39. data/spec/html/proofer/links_spec.rb +152 -109
  40. data/spec/html/proofer/scripts_spec.rb +17 -17
  41. data/spec/html/proofer/utils_spec.rb +14 -0
  42. data/spec/html/proofer_spec.rb +58 -38
  43. data/spec/spec_helper.rb +13 -6
  44. metadata +39 -7
  45. data/lib/html/proofer/checks.rb +0 -15
  46. data/lib/html/proofer/issue.rb +0 -21
@@ -1,34 +1,37 @@
1
+ require 'addressable/uri'
2
+ require_relative './utils'
3
+
1
4
  module HTML
2
5
  class Proofer
6
+ # Represents the superclass from which all checks derive.
3
7
  class Checkable
4
- def initialize(obj, type, check)
5
- @src = obj['src']
6
- @href = obj['href']
7
- @alt = obj['alt']
8
- @name = obj['name']
9
- @id = obj['id']
10
- @rel = obj['rel']
8
+ include HTML::Utils
9
+ attr_reader :line
10
+
11
+ def initialize(obj, check)
12
+ obj.attributes.each_pair do |attribute, value|
13
+ next if attribute == 'data-proofer-ignore' # TODO: not quite sure why this doesn't work
14
+ instance_variable_set("@#{attribute}".to_sym, value.value)
15
+ end
11
16
 
12
17
  @data_ignore_proofer = obj['data-proofer-ignore']
13
18
  @content = obj.content
14
19
  @check = check
15
20
  @checked_paths = {}
16
- @type = type
21
+ @type = self.class.name
22
+ @line = obj.line
17
23
 
18
24
  if @href && @check.options[:href_swap]
19
- @check.options[:href_swap].each do |link, replace|
20
- @href = @href.gsub(link, replace)
21
- end
25
+ @href = swap(@href, @check.options[:href_swap])
22
26
  end
23
27
 
24
28
  # fix up missing protocols
25
- @href.insert 0, "http:" if @href =~ /^\/\//
26
- @src.insert 0, "http:" if @src =~ /^\/\//
27
-
29
+ @href.insert 0, 'http:' if @href =~ %r{^//}
30
+ @src.insert 0, 'http:' if @src =~ %r{^//}
28
31
  end
29
32
 
30
33
  def url
31
- @src || @href || ""
34
+ @src || @srcset || @href || ''
32
35
  end
33
36
 
34
37
  def valid?
@@ -42,15 +45,15 @@ module HTML
42
45
  end
43
46
 
44
47
  def path
45
- parts.path if !parts.nil?
48
+ CGI.unescape parts.path unless parts.nil?
46
49
  end
47
50
 
48
51
  def hash
49
- parts.fragment if !parts.nil?
52
+ parts.fragment unless parts.nil?
50
53
  end
51
54
 
52
55
  def scheme
53
- parts.scheme if !parts.nil?
56
+ parts.scheme unless parts.nil?
54
57
  end
55
58
 
56
59
  # path is to an external server
@@ -66,13 +69,13 @@ module HTML
66
69
  return true if @data_ignore_proofer
67
70
 
68
71
  case @type
69
- when "favicon"
72
+ when 'FaviconCheckable'
70
73
  return true if url.match(/^data:image/)
71
- when "link"
72
- return true if ignores_pattern_check(@check.additional_href_ignores)
73
- when "image"
74
+ when 'LinkCheckable'
75
+ return true if ignores_pattern_check(@check.href_ignores)
76
+ when 'ImageCheckable'
74
77
  return true if url.match(/^data:image/)
75
- return true if ignores_pattern_check(@check.additional_alt_ignores)
78
+ return true if ignores_pattern_check(@check.alt_ignores)
76
79
  end
77
80
  end
78
81
 
@@ -83,7 +86,7 @@ module HTML
83
86
 
84
87
  # path is an anchor or a query
85
88
  def internal?
86
- url.start_with? "#", "?"
89
+ url.start_with? '#', '?'
87
90
  end
88
91
 
89
92
  def file_path
@@ -102,7 +105,7 @@ module HTML
102
105
  file = File.join base, path
103
106
 
104
107
  # implicit index support
105
- if File.directory? file and !unslashed_directory? file
108
+ if File.directory?(file) && !unslashed_directory?(file)
106
109
  file = File.join file, @check.options[:directory_index_file]
107
110
  end
108
111
 
@@ -111,7 +114,7 @@ module HTML
111
114
 
112
115
  # checks if a file exists relative to the current pwd
113
116
  def exists?
114
- return @checked_paths[absolute_path] if @checked_paths.has_key? absolute_path
117
+ return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
115
118
  @checked_paths[absolute_path] = File.exist? absolute_path
116
119
  end
117
120
 
@@ -132,9 +135,20 @@ module HTML
132
135
  false
133
136
  end
134
137
 
135
- def unslashed_directory? file
136
- File.directory? file and !file.end_with? File::SEPARATOR and !@check.options[:followlocation]
138
+ def unslashed_directory?(file)
139
+ File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
137
140
  end
141
+
142
+ def follow_location?
143
+ @check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
144
+ end
145
+
146
+ private
147
+
148
+ def real_attr(attr)
149
+ attr.to_s unless attr.nil? || attr.empty?
150
+ end
151
+
138
152
  end
139
153
  end
140
154
  end
@@ -1,21 +1,21 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Favicon < ::HTML::Proofer::Checkable
3
+ class FaviconCheckable < ::HTML::Proofer::Checkable
4
4
  def rel
5
5
  @rel
6
6
  end
7
7
  end
8
8
 
9
- class Favicons < ::HTML::Proofer::Checks::Check
9
+ class FaviconCheck < ::HTML::Proofer::CheckRunner
10
10
 
11
11
  def run
12
- @html.xpath("//link[not(ancestor::pre or ancestor::code)]").each do |favicon|
13
- favicon = Favicon.new favicon, "favicon", self
12
+ @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |favicon|
13
+ favicon = FaviconCheckable.new favicon, self
14
14
  next if favicon.ignore?
15
- return if favicon.rel.split(" ").last.eql? "icon"
15
+ return if favicon.rel.split(' ').last.eql? 'icon'
16
16
  end
17
17
 
18
- self.add_issue "no favicon specified"
18
+ add_issue 'no favicon specified'
19
19
  end
20
20
 
21
21
  end
@@ -1,24 +1,23 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Html < ::HTML::Proofer::Checks::Check
3
+ class HtmlCheck < ::HTML::Proofer::CheckRunner
4
4
 
5
5
  # new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
6
- HTML5_TAGS = %w(article aside bdi details dialog figcaption
7
- figure footer header main mark menuitem meter
8
- nav progress rp rt ruby section summary
9
- time wbr datalist keygen output color date
10
- datetime datetime-local email month number
11
- range search tel time url week canvas
6
+ HTML5_TAGS = %w(article aside bdi details dialog figcaption
7
+ figure footer header main mark menuitem meter
8
+ nav progress rp rt ruby section summary
9
+ time wbr datalist keygen output color date
10
+ datetime datetime-local email month number
11
+ range search tel time url week canvas
12
12
  svg audio embed source track video)
13
13
 
14
14
  def run
15
15
  @html.errors.each do |e|
16
+ # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
17
+ # so we need to skip errors caused by the new tags in html5
18
+ next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
16
19
 
17
- # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
18
- # so we need to skip errors caused by the new tags in html5
19
- next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
20
-
21
- self.add_issue(e.to_s)
20
+ add_issue(e.to_s)
22
21
  end
23
22
  end
24
23
  end
@@ -1,19 +1,19 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Image < ::HTML::Proofer::Checkable
3
+ class ImageCheckable < ::HTML::Proofer::Checkable
4
4
 
5
5
  SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
6
6
 
7
7
  def valid_alt_tag?
8
- @alt and !@alt.empty?
8
+ @alt && !@alt.empty?
9
9
  end
10
10
 
11
11
  def terrible_filename?
12
- @src =~ SCREEN_SHOT_REGEX
12
+ src =~ SCREEN_SHOT_REGEX
13
13
  end
14
14
 
15
15
  def src
16
- @src unless @src.nil? || @src.empty?
16
+ real_attr(@src) || real_attr(@srcset)
17
17
  end
18
18
 
19
19
  def missing_src?
@@ -22,29 +22,29 @@ class Image < ::HTML::Proofer::Checkable
22
22
 
23
23
  end
24
24
 
25
- class Images < ::HTML::Proofer::Checks::Check
25
+ class ImageCheck < ::HTML::Proofer::CheckRunner
26
26
  def run
27
- @html.css("img").each do |i|
28
- img = Image.new i, "image", self
27
+ @html.css('img').each do |i|
28
+ img = ImageCheckable.new i, self
29
29
 
30
30
  next if img.ignore?
31
31
 
32
32
  # screenshot filenames should return because of terrible names
33
- next self.add_issue "image has a terrible filename (#{img.src})" if img.terrible_filename?
33
+ next add_issue("image has a terrible filename (#{img.src})", i.line) if img.terrible_filename?
34
34
 
35
35
  # does the image exist?
36
36
  if img.missing_src?
37
- self.add_issue "image has no src attribute"
37
+ add_issue('image has no src or srcset attribute', i.line)
38
38
  else
39
39
  if img.remote?
40
40
  add_to_external_urls img.src
41
41
  else
42
- self.add_issue("internal image #{img.src} does not exist") unless img.exists?
42
+ add_issue("internal image #{img.src} does not exist", i.line) unless img.exists?
43
43
  end
44
44
  end
45
45
 
46
46
  # check alt tag
47
- self.add_issue "image #{img.src} does not have an alt attribute" unless img.valid_alt_tag?
47
+ add_issue("image #{img.src} does not have an alt attribute", i.line) unless img.valid_alt_tag?
48
48
  end
49
49
 
50
50
  external_urls
@@ -1,6 +1,5 @@
1
1
  # encoding: utf-8
2
-
3
- class Link < ::HTML::Proofer::Checkable
2
+ class LinkCheckable < ::HTML::Proofer::Checkable
4
3
 
5
4
  def href
6
5
  real_attr @href
@@ -15,26 +14,21 @@ class Link < ::HTML::Proofer::Checkable
15
14
  end
16
15
 
17
16
  def missing_href?
18
- href.nil? and name.nil? and id.nil?
17
+ href.nil? && name.nil? && id.nil?
19
18
  end
20
19
 
21
20
  def placeholder?
22
21
  (id || name) && href.nil?
23
22
  end
24
23
 
25
- private
26
-
27
- def real_attr(attr)
28
- attr unless attr.nil? || attr.empty?
29
- end
30
-
31
24
  end
32
25
 
33
- class Links < ::HTML::Proofer::Checks::Check
26
+ class LinkCheck < ::HTML::Proofer::CheckRunner
27
+ include HTML::Utils
34
28
 
35
29
  def run
36
- @html.css("a, link").each do |l|
37
- link = Link.new l, "link", self
30
+ @html.css('a, link').each do |l|
31
+ link = LinkCheckable.new l, self
38
32
 
39
33
  next if link.ignore?
40
34
  next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
@@ -42,22 +36,22 @@ class Links < ::HTML::Proofer::Checks::Check
42
36
 
43
37
  # is it even a valid URL?
44
38
  unless link.valid?
45
- self.add_issue "#{link.href} is an invalid URL"
39
+ add_issue("#{link.href} is an invalid URL", l.line)
46
40
  next
47
41
  end
48
42
 
49
- if link.scheme == "mailto"
50
- self.add_issue "#{link.href} contains no email address" if link.path.empty?
51
- self.add_issue "#{link.href} contain an invalid email address" unless link.path.include?("@")
43
+ if link.scheme == 'mailto'
44
+ add_issue("#{link.href} contains no email address", l.line) if link.path.empty?
45
+ add_issue("#{link.href} contain an invalid email address", l.line) unless link.path.include?('@')
52
46
  end
53
47
 
54
- if link.scheme == "tel"
55
- self.add_issue "#{link.href} contains no phone number" if link.path.empty?
48
+ if link.scheme == 'tel'
49
+ add_issue("#{link.href} contains no phone number", l.line) if link.path.empty?
56
50
  end
57
51
 
58
52
  # is there even a href?
59
53
  if link.missing_href?
60
- self.add_issue("anchor has no href attribute")
54
+ add_issue('anchor has no href attribute', l.line)
61
55
  next
62
56
  end
63
57
 
@@ -69,26 +63,23 @@ class Links < ::HTML::Proofer::Checks::Check
69
63
  add_to_external_urls link.href
70
64
  next
71
65
  elsif !link.internal?
72
- self.add_issue "internally linking to #{link.href}, which does not exist" unless link.exists?
66
+ add_issue("internally linking to #{link.href}, which does not exist", l.line) unless link.exists?
73
67
  end
74
68
 
75
69
  # does the local directory have a trailing slash?
76
70
  if link.unslashed_directory? link.absolute_path
77
- self.add_issue("internally linking to a directory #{link.absolute_path} without trailing slash")
71
+ add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", l.line)
78
72
  next
79
73
  end
80
74
 
81
75
  # verify the target hash
82
76
  if link.hash
83
77
  if link.internal?
84
- self.add_issue "linking to internal hash ##{link.hash} that does not exist" unless hash_check @html, link.hash
85
- elsif link.external?
86
- unless link.exists?
87
- self.add_issue "trying to find hash of #{link.href}, but #{link.absolute_path} does not exist"
88
- else
89
- target_html = HTML::Proofer.create_nokogiri link.absolute_path
90
- self.add_issue "linking to #{link.href}, but #{link.hash} does not exist" unless hash_check target_html, link.hash
78
+ unless hash_check @html, link.hash
79
+ add_issue("linking to internal hash ##{link.hash} that does not exist", l.line)
91
80
  end
81
+ elsif link.external?
82
+ external_link_check(link)
92
83
  end
93
84
  end
94
85
  end
@@ -96,6 +87,17 @@ class Links < ::HTML::Proofer::Checks::Check
96
87
  external_urls
97
88
  end
98
89
 
90
+ def external_link_check(link)
91
+ if !link.exists?
92
+ add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", l.line)
93
+ else
94
+ target_html = create_nokogiri link.absolute_path
95
+ unless hash_check target_html, link.hash
96
+ add_issue("linking to #{link.href}, but #{link.hash} does not exist", link.line)
97
+ end
98
+ end
99
+ end
100
+
99
101
  def hash_check(html, href_hash)
100
102
  html.xpath("//*[@id='#{href_hash}']", "//*[@name='#{href_hash}']").length > 0
101
103
  end
@@ -1,9 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Script < ::HTML::Proofer::Checkable
3
+ class ScriptCheckable < ::HTML::Proofer::Checkable
4
4
 
5
5
  def src
6
- @src unless @src.nil? || @src.empty?
6
+ real_attr @src
7
7
  end
8
8
 
9
9
  def missing_src?
@@ -16,23 +16,22 @@ class Script < ::HTML::Proofer::Checkable
16
16
 
17
17
  end
18
18
 
19
- class Scripts < ::HTML::Proofer::Checks::Check
19
+ class ScriptCheck < ::HTML::Proofer::CheckRunner
20
20
  def run
21
- @html.css("script").each do |s|
22
- script = Script.new s, "script", self
21
+ @html.css('script').each do |s|
22
+ script = ScriptCheckable.new s, self
23
23
 
24
24
  next if script.ignore?
25
25
  next unless script.blank?
26
26
 
27
27
  # does the script exist?
28
28
  if script.missing_src?
29
- self.add_issue "script is empty and has no src attribute"
29
+ add_issue('script is empty and has no src attribute', s.line)
30
30
  elsif script.remote?
31
31
  add_to_external_urls script.src
32
32
  else
33
- self.add_issue("internal script #{script.src} does not exist") unless script.exists?
33
+ add_issue("internal script #{script.src} does not exist", s.line) unless script.exists?
34
34
  end
35
-
36
35
  end
37
36
 
38
37
  external_urls
@@ -0,0 +1,38 @@
1
+ require 'yell'
2
+ require 'colored'
3
+
4
+ module HTML
5
+ class Proofer
6
+ class Log
7
+ include Yell::Loggable
8
+
9
+ def initialize(verbose)
10
+ log_level = verbose ? :debug : :info
11
+
12
+ @logger = Yell.new(:format => false, \
13
+ :name => 'HTML::Proofer', \
14
+ :level => "gte.#{log_level}") do |l|
15
+ l.adapter :stdout, :level => [:debug, :info, :warn]
16
+ l.adapter :stderr, :level => [:error, :fatal]
17
+ end
18
+ end
19
+
20
+ def log(level, color, message)
21
+ @logger.send level, colorize(color, message)
22
+ end
23
+
24
+ def colorize(color, message)
25
+ if $stdout.isatty && $stderr.isatty
26
+ Colored.colorize(message, foreground: color)
27
+ else
28
+ message
29
+ end
30
+ end
31
+
32
+ # dumb override to play nice with Typhoeus/Ethon
33
+ def debug(message = nil)
34
+ log(:debug, :yellow, message) unless message.nil?
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,135 @@
1
+ require 'typhoeus'
2
+ require 'uri'
3
+ require_relative './utils'
4
+
5
+ module HTML
6
+ class Proofer
7
+ class UrlValidator
8
+ include Utils
9
+
10
+ attr_accessor :logger, :external_urls, :hydra
11
+
12
+ def initialize(logger, external_urls, options, typhoeus_opts, hydra_opts)
13
+ @logger = logger
14
+ @external_urls = external_urls
15
+ @failed_tests = []
16
+ @options = options
17
+ @hydra = Typhoeus::Hydra.new(hydra_opts)
18
+ @typhoeus_opts = typhoeus_opts
19
+ end
20
+
21
+ def run
22
+ external_link_checker(external_urls)
23
+ @failed_tests
24
+ end
25
+
26
+ # Proofer runs faster if we pull out all the external URLs and run the checks
27
+ # at the end. Otherwise, we're halting the consuming process for every file during
28
+ # the check_directory_of_files process.
29
+ #
30
+ # In addition, sorting the list lets libcurl keep connections to the same hosts alive.
31
+ #
32
+ # Finally, we'll first make a HEAD request, rather than GETing all the contents.
33
+ # If the HEAD fails, we'll fall back to GET, as some servers are not configured
34
+ # for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
35
+ # not an option.
36
+ def external_link_checker(external_urls)
37
+ external_urls = Hash[external_urls.sort]
38
+
39
+ count = external_urls.length
40
+ check_text = "#{count} " << (count == 1 ? 'external link' : 'external links')
41
+ logger.log :info, :blue, "Checking #{check_text}..."
42
+
43
+ Ethon.logger = logger # log from Typhoeus/Ethon
44
+
45
+ url_processor(external_urls)
46
+
47
+ logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
48
+ hydra.run
49
+ end
50
+
51
+ def url_processor(external_urls)
52
+ external_urls.each_pair do |href, filenames|
53
+ href = clean_url(href)
54
+ if hash?(href) && @options[:check_external_hash]
55
+ queue_request(:get, href, filenames)
56
+ else
57
+ queue_request(:head, href, filenames)
58
+ end
59
+ end
60
+ end
61
+
62
+ def clean_url(href)
63
+ Addressable::URI.parse(href).normalize
64
+ end
65
+
66
+ def queue_request(method, href, filenames)
67
+ request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method }))
68
+ request.on_complete { |response| response_handler(response, filenames) }
69
+ hydra.queue request
70
+ end
71
+
72
+ def response_handler(response, filenames)
73
+ effective_url = response.options[:effective_url]
74
+ href = response.request.base_url.to_s
75
+ method = response.request.options[:method]
76
+ response_code = response.code
77
+ debug_msg = "Received a #{response_code} for #{href}"
78
+ debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
79
+ logger.log :debug, :yellow, debug_msg
80
+
81
+ if response_code.between?(200, 299)
82
+ check_hash_in_2xx_response(href, effective_url, response, filenames)
83
+ elsif response.timed_out?
84
+ handle_timeout(filenames, response_code)
85
+ elsif method == :head
86
+ queue_request(:get, href, filenames)
87
+ else
88
+ return if @options[:only_4xx] && !response_code.between?(400, 499)
89
+ # Received a non-successful http response.
90
+ add_failed_tests filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code
91
+ end
92
+ end
93
+
94
+ # Even though the response was a success, we may have been asked to check
95
+ # if the hash on the URL exists on the page
96
+ def check_hash_in_2xx_response(href, effective_url, response, filenames)
97
+ return if @options[:only_4xx]
98
+ return unless @options[:check_external_hash]
99
+ return unless (hash = hash?(href))
100
+
101
+ body_doc = create_nokogiri(response.body)
102
+
103
+ # user-content is a special addition by GitHub.
104
+ xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"])
105
+ if URI.parse(href).host.match(/github\.com/i)
106
+ xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])
107
+ end
108
+
109
+ return unless body_doc.xpath(xpath).empty?
110
+
111
+ add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code
112
+ end
113
+
114
+ def handle_timeout
115
+ return if @options[:only_4xx]
116
+ add_failed_tests filenames, "External link #{href} failed: got a time out", response_code
117
+ end
118
+
119
+ def add_failed_tests(filenames, desc, status = nil)
120
+ if filenames.nil?
121
+ @failed_tests << CheckRunner::Issue.new('', desc, nil, status)
122
+ else
123
+ filenames.each { |f| @failed_tests << CheckRunner::Issue.new(f, desc, nil, status) }
124
+ end
125
+ end
126
+
127
+ def hash?(url)
128
+ URI.parse(url).fragment
129
+ rescue URI::InvalidURIError
130
+ nil
131
+ end
132
+
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,24 @@
1
+ require 'nokogiri'
2
+
3
+ module HTML
4
+ module Utils
5
+ def create_nokogiri(path)
6
+ if File.exist? path
7
+ content = File.open(path).read
8
+ else
9
+ content = path
10
+ end
11
+
12
+ Nokogiri::HTML(content)
13
+ end
14
+ module_function :create_nokogiri
15
+
16
+ def swap(href, replacement)
17
+ replacement.each do |link, replace|
18
+ href = href.gsub(link, replace)
19
+ end
20
+ href
21
+ end
22
+ module_function :swap
23
+ end
24
+ end
@@ -1,5 +1,5 @@
1
1
  module HTML
2
2
  class Proofer
3
- VERSION = "1.6.0"
3
+ VERSION = '2.0.0'
4
4
  end
5
5
  end