html-proofer 1.6.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/README.md +74 -56
  4. data/Rakefile +4 -6
  5. data/bin/htmlproof +46 -36
  6. data/html-proofer.gemspec +22 -22
  7. data/lib/html/proofer/check_runner/issue.rb +62 -0
  8. data/lib/html/proofer/{check.rb → check_runner.rb} +11 -19
  9. data/lib/html/proofer/checkable.rb +42 -28
  10. data/lib/html/proofer/checks/favicon.rb +6 -6
  11. data/lib/html/proofer/checks/html.rb +11 -12
  12. data/lib/html/proofer/checks/images.rb +11 -11
  13. data/lib/html/proofer/checks/links.rb +30 -28
  14. data/lib/html/proofer/checks/scripts.rb +7 -8
  15. data/lib/html/proofer/log.rb +38 -0
  16. data/lib/html/proofer/url_validator.rb +135 -0
  17. data/lib/html/proofer/utils.rb +24 -0
  18. data/lib/html/proofer/version.rb +1 -1
  19. data/lib/html/proofer.rb +95 -199
  20. data/spec/html/proofer/command_spec.rb +82 -0
  21. data/spec/html/proofer/favicon_spec.rb +20 -20
  22. data/spec/html/proofer/fixtures/images/srcSetCheck.html +7 -0
  23. data/spec/html/proofer/fixtures/images/srcSetIgnorable.html +13 -0
  24. data/spec/html/proofer/fixtures/images/srcSetMissingAlt.html +7 -0
  25. data/spec/html/proofer/fixtures/images/srcSetMissingImage.html +7 -0
  26. data/spec/html/proofer/fixtures/links/erstiebegru/314/210/303/237ung.html +1 -0
  27. data/spec/html/proofer/fixtures/links/erstiebegr/303/274/303/237ung.html +1 -0
  28. data/spec/html/proofer/fixtures/links/file.foo +11 -0
  29. data/spec/html/proofer/fixtures/links/folder/multiples/catalog/file.html +8 -0
  30. data/spec/html/proofer/fixtures/links/folder/multiples/javadoc/file.html +8 -0
  31. data/spec/html/proofer/fixtures/links/nodupe.html +1 -1
  32. data/spec/html/proofer/fixtures/links/redirected_error.html +1 -0
  33. data/spec/html/proofer/fixtures/links/rootLink/rootLink.html +0 -1
  34. data/spec/html/proofer/fixtures/links/urlencoded-href.html +2 -0
  35. data/spec/html/proofer/fixtures/links/utf8Link.html +2 -0
  36. data/spec/html/proofer/fixtures/utils/lang-jp.html +1 -0
  37. data/spec/html/proofer/html_spec.rb +25 -25
  38. data/spec/html/proofer/images_spec.rb +59 -35
  39. data/spec/html/proofer/links_spec.rb +152 -109
  40. data/spec/html/proofer/scripts_spec.rb +17 -17
  41. data/spec/html/proofer/utils_spec.rb +14 -0
  42. data/spec/html/proofer_spec.rb +58 -38
  43. data/spec/spec_helper.rb +13 -6
  44. metadata +39 -7
  45. data/lib/html/proofer/checks.rb +0 -15
  46. data/lib/html/proofer/issue.rb +0 -21
@@ -1,34 +1,37 @@
1
+ require 'addressable/uri'
2
+ require_relative './utils'
3
+
1
4
  module HTML
2
5
  class Proofer
6
+ # Represents the superclass from which all checks derive.
3
7
  class Checkable
4
- def initialize(obj, type, check)
5
- @src = obj['src']
6
- @href = obj['href']
7
- @alt = obj['alt']
8
- @name = obj['name']
9
- @id = obj['id']
10
- @rel = obj['rel']
8
+ include HTML::Utils
9
+ attr_reader :line
10
+
11
+ def initialize(obj, check)
12
+ obj.attributes.each_pair do |attribute, value|
13
+ next if attribute == 'data-proofer-ignore' # TODO: not quite sure why this doesn't work
14
+ instance_variable_set("@#{attribute}".to_sym, value.value)
15
+ end
11
16
 
12
17
  @data_ignore_proofer = obj['data-proofer-ignore']
13
18
  @content = obj.content
14
19
  @check = check
15
20
  @checked_paths = {}
16
- @type = type
21
+ @type = self.class.name
22
+ @line = obj.line
17
23
 
18
24
  if @href && @check.options[:href_swap]
19
- @check.options[:href_swap].each do |link, replace|
20
- @href = @href.gsub(link, replace)
21
- end
25
+ @href = swap(@href, @check.options[:href_swap])
22
26
  end
23
27
 
24
28
  # fix up missing protocols
25
- @href.insert 0, "http:" if @href =~ /^\/\//
26
- @src.insert 0, "http:" if @src =~ /^\/\//
27
-
29
+ @href.insert 0, 'http:' if @href =~ %r{^//}
30
+ @src.insert 0, 'http:' if @src =~ %r{^//}
28
31
  end
29
32
 
30
33
  def url
31
- @src || @href || ""
34
+ @src || @srcset || @href || ''
32
35
  end
33
36
 
34
37
  def valid?
@@ -42,15 +45,15 @@ module HTML
42
45
  end
43
46
 
44
47
  def path
45
- parts.path if !parts.nil?
48
+ CGI.unescape parts.path unless parts.nil?
46
49
  end
47
50
 
48
51
  def hash
49
- parts.fragment if !parts.nil?
52
+ parts.fragment unless parts.nil?
50
53
  end
51
54
 
52
55
  def scheme
53
- parts.scheme if !parts.nil?
56
+ parts.scheme unless parts.nil?
54
57
  end
55
58
 
56
59
  # path is to an external server
@@ -66,13 +69,13 @@ module HTML
66
69
  return true if @data_ignore_proofer
67
70
 
68
71
  case @type
69
- when "favicon"
72
+ when 'FaviconCheckable'
70
73
  return true if url.match(/^data:image/)
71
- when "link"
72
- return true if ignores_pattern_check(@check.additional_href_ignores)
73
- when "image"
74
+ when 'LinkCheckable'
75
+ return true if ignores_pattern_check(@check.href_ignores)
76
+ when 'ImageCheckable'
74
77
  return true if url.match(/^data:image/)
75
- return true if ignores_pattern_check(@check.additional_alt_ignores)
78
+ return true if ignores_pattern_check(@check.alt_ignores)
76
79
  end
77
80
  end
78
81
 
@@ -83,7 +86,7 @@ module HTML
83
86
 
84
87
  # path is an anchor or a query
85
88
  def internal?
86
- url.start_with? "#", "?"
89
+ url.start_with? '#', '?'
87
90
  end
88
91
 
89
92
  def file_path
@@ -102,7 +105,7 @@ module HTML
102
105
  file = File.join base, path
103
106
 
104
107
  # implicit index support
105
- if File.directory? file and !unslashed_directory? file
108
+ if File.directory?(file) && !unslashed_directory?(file)
106
109
  file = File.join file, @check.options[:directory_index_file]
107
110
  end
108
111
 
@@ -111,7 +114,7 @@ module HTML
111
114
 
112
115
  # checks if a file exists relative to the current pwd
113
116
  def exists?
114
- return @checked_paths[absolute_path] if @checked_paths.has_key? absolute_path
117
+ return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
115
118
  @checked_paths[absolute_path] = File.exist? absolute_path
116
119
  end
117
120
 
@@ -132,9 +135,20 @@ module HTML
132
135
  false
133
136
  end
134
137
 
135
- def unslashed_directory? file
136
- File.directory? file and !file.end_with? File::SEPARATOR and !@check.options[:followlocation]
138
+ def unslashed_directory?(file)
139
+ File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
137
140
  end
141
+
142
+ def follow_location?
143
+ @check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
144
+ end
145
+
146
+ private
147
+
148
+ def real_attr(attr)
149
+ attr.to_s unless attr.nil? || attr.empty?
150
+ end
151
+
138
152
  end
139
153
  end
140
154
  end
@@ -1,21 +1,21 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Favicon < ::HTML::Proofer::Checkable
3
+ class FaviconCheckable < ::HTML::Proofer::Checkable
4
4
  def rel
5
5
  @rel
6
6
  end
7
7
  end
8
8
 
9
- class Favicons < ::HTML::Proofer::Checks::Check
9
+ class FaviconCheck < ::HTML::Proofer::CheckRunner
10
10
 
11
11
  def run
12
- @html.xpath("//link[not(ancestor::pre or ancestor::code)]").each do |favicon|
13
- favicon = Favicon.new favicon, "favicon", self
12
+ @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |favicon|
13
+ favicon = FaviconCheckable.new favicon, self
14
14
  next if favicon.ignore?
15
- return if favicon.rel.split(" ").last.eql? "icon"
15
+ return if favicon.rel.split(' ').last.eql? 'icon'
16
16
  end
17
17
 
18
- self.add_issue "no favicon specified"
18
+ add_issue 'no favicon specified'
19
19
  end
20
20
 
21
21
  end
@@ -1,24 +1,23 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Html < ::HTML::Proofer::Checks::Check
3
+ class HtmlCheck < ::HTML::Proofer::CheckRunner
4
4
 
5
5
  # new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
6
- HTML5_TAGS = %w(article aside bdi details dialog figcaption
7
- figure footer header main mark menuitem meter
8
- nav progress rp rt ruby section summary
9
- time wbr datalist keygen output color date
10
- datetime datetime-local email month number
11
- range search tel time url week canvas
6
+ HTML5_TAGS = %w(article aside bdi details dialog figcaption
7
+ figure footer header main mark menuitem meter
8
+ nav progress rp rt ruby section summary
9
+ time wbr datalist keygen output color date
10
+ datetime datetime-local email month number
11
+ range search tel time url week canvas
12
12
  svg audio embed source track video)
13
13
 
14
14
  def run
15
15
  @html.errors.each do |e|
16
+ # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
17
+ # so we need to skip errors caused by the new tags in html5
18
+ next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
16
19
 
17
- # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
18
- # so we need to skip errors caused by the new tags in html5
19
- next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
20
-
21
- self.add_issue(e.to_s)
20
+ add_issue(e.to_s)
22
21
  end
23
22
  end
24
23
  end
@@ -1,19 +1,19 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Image < ::HTML::Proofer::Checkable
3
+ class ImageCheckable < ::HTML::Proofer::Checkable
4
4
 
5
5
  SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
6
6
 
7
7
  def valid_alt_tag?
8
- @alt and !@alt.empty?
8
+ @alt && !@alt.empty?
9
9
  end
10
10
 
11
11
  def terrible_filename?
12
- @src =~ SCREEN_SHOT_REGEX
12
+ src =~ SCREEN_SHOT_REGEX
13
13
  end
14
14
 
15
15
  def src
16
- @src unless @src.nil? || @src.empty?
16
+ real_attr(@src) || real_attr(@srcset)
17
17
  end
18
18
 
19
19
  def missing_src?
@@ -22,29 +22,29 @@ class Image < ::HTML::Proofer::Checkable
22
22
 
23
23
  end
24
24
 
25
- class Images < ::HTML::Proofer::Checks::Check
25
+ class ImageCheck < ::HTML::Proofer::CheckRunner
26
26
  def run
27
- @html.css("img").each do |i|
28
- img = Image.new i, "image", self
27
+ @html.css('img').each do |i|
28
+ img = ImageCheckable.new i, self
29
29
 
30
30
  next if img.ignore?
31
31
 
32
32
  # screenshot filenames should return because of terrible names
33
- next self.add_issue "image has a terrible filename (#{img.src})" if img.terrible_filename?
33
+ next add_issue("image has a terrible filename (#{img.src})", i.line) if img.terrible_filename?
34
34
 
35
35
  # does the image exist?
36
36
  if img.missing_src?
37
- self.add_issue "image has no src attribute"
37
+ add_issue('image has no src or srcset attribute', i.line)
38
38
  else
39
39
  if img.remote?
40
40
  add_to_external_urls img.src
41
41
  else
42
- self.add_issue("internal image #{img.src} does not exist") unless img.exists?
42
+ add_issue("internal image #{img.src} does not exist", i.line) unless img.exists?
43
43
  end
44
44
  end
45
45
 
46
46
  # check alt tag
47
- self.add_issue "image #{img.src} does not have an alt attribute" unless img.valid_alt_tag?
47
+ add_issue("image #{img.src} does not have an alt attribute", i.line) unless img.valid_alt_tag?
48
48
  end
49
49
 
50
50
  external_urls
@@ -1,6 +1,5 @@
1
1
  # encoding: utf-8
2
-
3
- class Link < ::HTML::Proofer::Checkable
2
+ class LinkCheckable < ::HTML::Proofer::Checkable
4
3
 
5
4
  def href
6
5
  real_attr @href
@@ -15,26 +14,21 @@ class Link < ::HTML::Proofer::Checkable
15
14
  end
16
15
 
17
16
  def missing_href?
18
- href.nil? and name.nil? and id.nil?
17
+ href.nil? && name.nil? && id.nil?
19
18
  end
20
19
 
21
20
  def placeholder?
22
21
  (id || name) && href.nil?
23
22
  end
24
23
 
25
- private
26
-
27
- def real_attr(attr)
28
- attr unless attr.nil? || attr.empty?
29
- end
30
-
31
24
  end
32
25
 
33
- class Links < ::HTML::Proofer::Checks::Check
26
+ class LinkCheck < ::HTML::Proofer::CheckRunner
27
+ include HTML::Utils
34
28
 
35
29
  def run
36
- @html.css("a, link").each do |l|
37
- link = Link.new l, "link", self
30
+ @html.css('a, link').each do |l|
31
+ link = LinkCheckable.new l, self
38
32
 
39
33
  next if link.ignore?
40
34
  next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
@@ -42,22 +36,22 @@ class Links < ::HTML::Proofer::Checks::Check
42
36
 
43
37
  # is it even a valid URL?
44
38
  unless link.valid?
45
- self.add_issue "#{link.href} is an invalid URL"
39
+ add_issue("#{link.href} is an invalid URL", l.line)
46
40
  next
47
41
  end
48
42
 
49
- if link.scheme == "mailto"
50
- self.add_issue "#{link.href} contains no email address" if link.path.empty?
51
- self.add_issue "#{link.href} contain an invalid email address" unless link.path.include?("@")
43
+ if link.scheme == 'mailto'
44
+ add_issue("#{link.href} contains no email address", l.line) if link.path.empty?
45
+ add_issue("#{link.href} contain an invalid email address", l.line) unless link.path.include?('@')
52
46
  end
53
47
 
54
- if link.scheme == "tel"
55
- self.add_issue "#{link.href} contains no phone number" if link.path.empty?
48
+ if link.scheme == 'tel'
49
+ add_issue("#{link.href} contains no phone number", l.line) if link.path.empty?
56
50
  end
57
51
 
58
52
  # is there even a href?
59
53
  if link.missing_href?
60
- self.add_issue("anchor has no href attribute")
54
+ add_issue('anchor has no href attribute', l.line)
61
55
  next
62
56
  end
63
57
 
@@ -69,26 +63,23 @@ class Links < ::HTML::Proofer::Checks::Check
69
63
  add_to_external_urls link.href
70
64
  next
71
65
  elsif !link.internal?
72
- self.add_issue "internally linking to #{link.href}, which does not exist" unless link.exists?
66
+ add_issue("internally linking to #{link.href}, which does not exist", l.line) unless link.exists?
73
67
  end
74
68
 
75
69
  # does the local directory have a trailing slash?
76
70
  if link.unslashed_directory? link.absolute_path
77
- self.add_issue("internally linking to a directory #{link.absolute_path} without trailing slash")
71
+ add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", l.line)
78
72
  next
79
73
  end
80
74
 
81
75
  # verify the target hash
82
76
  if link.hash
83
77
  if link.internal?
84
- self.add_issue "linking to internal hash ##{link.hash} that does not exist" unless hash_check @html, link.hash
85
- elsif link.external?
86
- unless link.exists?
87
- self.add_issue "trying to find hash of #{link.href}, but #{link.absolute_path} does not exist"
88
- else
89
- target_html = HTML::Proofer.create_nokogiri link.absolute_path
90
- self.add_issue "linking to #{link.href}, but #{link.hash} does not exist" unless hash_check target_html, link.hash
78
+ unless hash_check @html, link.hash
79
+ add_issue("linking to internal hash ##{link.hash} that does not exist", l.line)
91
80
  end
81
+ elsif link.external?
82
+ external_link_check(link)
92
83
  end
93
84
  end
94
85
  end
@@ -96,6 +87,17 @@ class Links < ::HTML::Proofer::Checks::Check
96
87
  external_urls
97
88
  end
98
89
 
90
+ def external_link_check(link)
91
+ if !link.exists?
92
+ add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", l.line)
93
+ else
94
+ target_html = create_nokogiri link.absolute_path
95
+ unless hash_check target_html, link.hash
96
+ add_issue("linking to #{link.href}, but #{link.hash} does not exist", link.line)
97
+ end
98
+ end
99
+ end
100
+
99
101
  def hash_check(html, href_hash)
100
102
  html.xpath("//*[@id='#{href_hash}']", "//*[@name='#{href_hash}']").length > 0
101
103
  end
@@ -1,9 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
- class Script < ::HTML::Proofer::Checkable
3
+ class ScriptCheckable < ::HTML::Proofer::Checkable
4
4
 
5
5
  def src
6
- @src unless @src.nil? || @src.empty?
6
+ real_attr @src
7
7
  end
8
8
 
9
9
  def missing_src?
@@ -16,23 +16,22 @@ class Script < ::HTML::Proofer::Checkable
16
16
 
17
17
  end
18
18
 
19
- class Scripts < ::HTML::Proofer::Checks::Check
19
+ class ScriptCheck < ::HTML::Proofer::CheckRunner
20
20
  def run
21
- @html.css("script").each do |s|
22
- script = Script.new s, "script", self
21
+ @html.css('script').each do |s|
22
+ script = ScriptCheckable.new s, self
23
23
 
24
24
  next if script.ignore?
25
25
  next unless script.blank?
26
26
 
27
27
  # does the script exist?
28
28
  if script.missing_src?
29
- self.add_issue "script is empty and has no src attribute"
29
+ add_issue('script is empty and has no src attribute', s.line)
30
30
  elsif script.remote?
31
31
  add_to_external_urls script.src
32
32
  else
33
- self.add_issue("internal script #{script.src} does not exist") unless script.exists?
33
+ add_issue("internal script #{script.src} does not exist", s.line) unless script.exists?
34
34
  end
35
-
36
35
  end
37
36
 
38
37
  external_urls
@@ -0,0 +1,38 @@
1
+ require 'yell'
2
+ require 'colored'
3
+
4
+ module HTML
5
+ class Proofer
6
+ class Log
7
+ include Yell::Loggable
8
+
9
+ def initialize(verbose)
10
+ log_level = verbose ? :debug : :info
11
+
12
+ @logger = Yell.new(:format => false, \
13
+ :name => 'HTML::Proofer', \
14
+ :level => "gte.#{log_level}") do |l|
15
+ l.adapter :stdout, :level => [:debug, :info, :warn]
16
+ l.adapter :stderr, :level => [:error, :fatal]
17
+ end
18
+ end
19
+
20
+ def log(level, color, message)
21
+ @logger.send level, colorize(color, message)
22
+ end
23
+
24
+ def colorize(color, message)
25
+ if $stdout.isatty && $stderr.isatty
26
+ Colored.colorize(message, foreground: color)
27
+ else
28
+ message
29
+ end
30
+ end
31
+
32
+ # dumb override to play nice with Typhoeus/Ethon
33
+ def debug(message = nil)
34
+ log(:debug, :yellow, message) unless message.nil?
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,135 @@
1
+ require 'typhoeus'
2
+ require 'uri'
3
+ require_relative './utils'
4
+
5
+ module HTML
6
+ class Proofer
7
+ class UrlValidator
8
+ include Utils
9
+
10
+ attr_accessor :logger, :external_urls, :hydra
11
+
12
+ def initialize(logger, external_urls, options, typhoeus_opts, hydra_opts)
13
+ @logger = logger
14
+ @external_urls = external_urls
15
+ @failed_tests = []
16
+ @options = options
17
+ @hydra = Typhoeus::Hydra.new(hydra_opts)
18
+ @typhoeus_opts = typhoeus_opts
19
+ end
20
+
21
+ def run
22
+ external_link_checker(external_urls)
23
+ @failed_tests
24
+ end
25
+
26
+ # Proofer runs faster if we pull out all the external URLs and run the checks
27
+ # at the end. Otherwise, we're halting the consuming process for every file during
28
+ # the check_directory_of_files process.
29
+ #
30
+ # In addition, sorting the list lets libcurl keep connections to the same hosts alive.
31
+ #
32
+ # Finally, we'll first make a HEAD request, rather than GETing all the contents.
33
+ # If the HEAD fails, we'll fall back to GET, as some servers are not configured
34
+ # for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
35
+ # not an option.
36
+ def external_link_checker(external_urls)
37
+ external_urls = Hash[external_urls.sort]
38
+
39
+ count = external_urls.length
40
+ check_text = "#{count} " << (count == 1 ? 'external link' : 'external links')
41
+ logger.log :info, :blue, "Checking #{check_text}..."
42
+
43
+ Ethon.logger = logger # log from Typhoeus/Ethon
44
+
45
+ url_processor(external_urls)
46
+
47
+ logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
48
+ hydra.run
49
+ end
50
+
51
+ def url_processor(external_urls)
52
+ external_urls.each_pair do |href, filenames|
53
+ href = clean_url(href)
54
+ if hash?(href) && @options[:check_external_hash]
55
+ queue_request(:get, href, filenames)
56
+ else
57
+ queue_request(:head, href, filenames)
58
+ end
59
+ end
60
+ end
61
+
62
+ def clean_url(href)
63
+ Addressable::URI.parse(href).normalize
64
+ end
65
+
66
+ def queue_request(method, href, filenames)
67
+ request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method }))
68
+ request.on_complete { |response| response_handler(response, filenames) }
69
+ hydra.queue request
70
+ end
71
+
72
+ def response_handler(response, filenames)
73
+ effective_url = response.options[:effective_url]
74
+ href = response.request.base_url.to_s
75
+ method = response.request.options[:method]
76
+ response_code = response.code
77
+ debug_msg = "Received a #{response_code} for #{href}"
78
+ debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
79
+ logger.log :debug, :yellow, debug_msg
80
+
81
+ if response_code.between?(200, 299)
82
+ check_hash_in_2xx_response(href, effective_url, response, filenames)
83
+ elsif response.timed_out?
84
+ handle_timeout(filenames, response_code)
85
+ elsif method == :head
86
+ queue_request(:get, href, filenames)
87
+ else
88
+ return if @options[:only_4xx] && !response_code.between?(400, 499)
89
+ # Received a non-successful http response.
90
+ add_failed_tests filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code
91
+ end
92
+ end
93
+
94
+ # Even though the response was a success, we may have been asked to check
95
+ # if the hash on the URL exists on the page
96
+ def check_hash_in_2xx_response(href, effective_url, response, filenames)
97
+ return if @options[:only_4xx]
98
+ return unless @options[:check_external_hash]
99
+ return unless (hash = hash?(href))
100
+
101
+ body_doc = create_nokogiri(response.body)
102
+
103
+ # user-content is a special addition by GitHub.
104
+ xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"])
105
+ if URI.parse(href).host.match(/github\.com/i)
106
+ xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])
107
+ end
108
+
109
+ return unless body_doc.xpath(xpath).empty?
110
+
111
+ add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code
112
+ end
113
+
114
+ def handle_timeout
115
+ return if @options[:only_4xx]
116
+ add_failed_tests filenames, "External link #{href} failed: got a time out", response_code
117
+ end
118
+
119
+ def add_failed_tests(filenames, desc, status = nil)
120
+ if filenames.nil?
121
+ @failed_tests << CheckRunner::Issue.new('', desc, nil, status)
122
+ else
123
+ filenames.each { |f| @failed_tests << CheckRunner::Issue.new(f, desc, nil, status) }
124
+ end
125
+ end
126
+
127
+ def hash?(url)
128
+ URI.parse(url).fragment
129
+ rescue URI::InvalidURIError
130
+ nil
131
+ end
132
+
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,24 @@
1
+ require 'nokogiri'
2
+
3
+ module HTML
4
+ module Utils
5
+ def create_nokogiri(path)
6
+ if File.exist? path
7
+ content = File.open(path).read
8
+ else
9
+ content = path
10
+ end
11
+
12
+ Nokogiri::HTML(content)
13
+ end
14
+ module_function :create_nokogiri
15
+
16
+ def swap(href, replacement)
17
+ replacement.each do |link, replace|
18
+ href = href.gsub(link, replace)
19
+ end
20
+ href
21
+ end
22
+ module_function :swap
23
+ end
24
+ end
@@ -1,5 +1,5 @@
1
1
  module HTML
2
2
  class Proofer
3
- VERSION = "1.6.0"
3
+ VERSION = '2.0.0'
4
4
  end
5
5
  end