html-proofer 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2882cf21c649ba0f4e231508045f46e474c74cbe
4
- data.tar.gz: 495aaf04be88b3710503c7c5f3099e37bbacfcbc
3
+ metadata.gz: 904c91c8694ab71a3722677bb5e8be2c78074503
4
+ data.tar.gz: 8e8c720d05ac809b4b5628711a40516a88be8dfa
5
5
  SHA512:
6
- metadata.gz: b8d342482dcdc6c0922ee2c703fffb5b0251e2f2a5dbb3bfbddc584bc5ff6867599dc1a5394c2a2f96ebc32b638a7e68a0de4db2b678af6be9b0aadd0881d88a
7
- data.tar.gz: c518d95329621dbd1eee59be267dbc6001988c4fc25e11e03e2fb83cf21bf4959f159724ef8ab7d2ab54d4785aa618629291795100cc88684f9354ee60e3f7dd
6
+ metadata.gz: e72778e7edd2f302a91b6727d4825ed64cc506cf473bc9575d0d79051ed39f56fca0bff8b79cba268c28a68b796f1c0495460e4c420d1f7f16439f84e7a94325
7
+ data.tar.gz: dd974ec72bf547882f85e59b51223b5ed2c54688288cf5265c34c01b872236daa4c1127c61ef4e25882f4dd340b74dc7c88370c31fcd603853b74f352b0e5213
@@ -5,15 +5,6 @@ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w( .. lib ))
5
5
 
6
6
  require 'html/proofer'
7
7
  require 'mercenary'
8
- require 'rubygems'
9
-
10
- def to_regex?(item)
11
- if item.start_with?('/') && item.end_with?('/')
12
- Regexp.new item[1...-1]
13
- else
14
- item
15
- end
16
- end
17
8
 
18
9
  Mercenary.program(:htmlproof) do |p|
19
10
  p.version HTML::Proofer::VERSION
@@ -22,25 +13,27 @@ Mercenary.program(:htmlproof) do |p|
22
13
 
23
14
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
24
15
 
16
+ p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, ignores the `href` `#`'
25
17
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
26
- p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'Comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
27
- p.option 'empty_alt_ignore', '--empty-alt-ignore', 'Ignores images with empty alt tags.'
18
+ p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
28
19
  p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
29
20
  p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).'
30
21
  p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
31
22
  p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
32
23
  p.option 'directory_index_file', '--directory-index-file', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
33
- p.option 'disable_external', '--disable-external', 'Disables the external link checker (default: `false`)'
34
- p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `path`, `desc`, or `status` (default: `path`).'
24
+ p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
25
+ p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
26
+ p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `:path`, `:desc`, or `:status` (default: `path`).'
35
27
  p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `false`).'
36
- p.option 'ext', '--ext EXT', String, 'The extension of your HTML files (default: `.html`)'
37
- p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'Comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
- p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'Comma-separated list of Strings or RegExps containing `href`s that are safe to ignore.'
39
- p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'Comma-separated list of key-value pairs of `RegExp:String`. Transforms links matching `RegExp` into `String`'
28
+ p.option 'ext', '--ext EXT', String, 'The extension of your HTML files including the dot. (default: `.html`)'
29
+ p.option 'external_only', '--external_only', 'Only checks problems with external references'
30
+ p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
31
+ p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. **Will be renamed in a future release.**'
32
+ p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. **Will be renamed in a future release.**'
40
33
  p.option 'ignore_script_embeds', '--ignore-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
41
- p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4x status code range.'
42
- p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'Comma-separated list of Strings or RegExps containing URLs that are safe to ignore.'
43
- p.option 'verbose', '--verbose', 'Enables more verbose logging.'
34
+ p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
35
+ p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
36
+ p.option 'verbose', '--verbose', 'If `true`, outputs extra information as the checking happens. Useful for debugging. **Will be deprecated in a future release.**'
44
37
  p.option 'verbosity', '--verbosity', String, 'Sets the logging level, as determined by Yell'
45
38
 
46
39
  p.action do |args, opts|
@@ -52,7 +45,7 @@ Mercenary.program(:htmlproof) do |p|
52
45
  # prepare everything to go to proofer
53
46
  p.options.select { |o| !opts[o.config_key].nil? }.each do |option|
54
47
  if option.return_type.to_s == 'Array' # TODO: is_a? doesn't work here?
55
- opts[option.config_key] = opts[option.config_key].map { |i| to_regex?(i) }
48
+ opts[option.config_key] = opts[option.config_key].map { |i| HTML::Proofer::Configuration.to_regex?(i) }
56
49
  end
57
50
  options[option.config_key.to_sym] = opts[option.config_key]
58
51
  end
@@ -68,7 +61,7 @@ Mercenary.program(:htmlproof) do |p|
68
61
 
69
62
  # check for ignore_scripts_embeds as it should be set in :validation
70
63
  unless opts['ignore_script_embeds'].nil?
71
- options[:validation] = { :ignore_script_embeds => true }
64
+ options[:validation] = { :ignore_script_embeds => true }
72
65
  end
73
66
 
74
67
  options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil?
@@ -8,30 +8,23 @@ end
8
8
  require_all 'proofer'
9
9
  require_all 'proofer/check_runner'
10
10
  require_all 'proofer/checks'
11
- require_relative './proofer/utils'
12
- require_relative './proofer/xpathfunctions'
13
11
 
14
12
  require 'parallel'
13
+ require 'fileutils'
15
14
 
16
15
  begin
17
16
  require 'awesome_print'
18
17
  rescue LoadError; end
19
18
 
20
19
  module HTML
21
-
22
20
  class Proofer
23
21
  include HTML::Proofer::Utils
24
22
 
25
23
  attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts, :external_urls, :iterable_external_urls
26
24
 
27
- TYPHOEUS_DEFAULTS = {
28
- :followlocation => true,
29
- :headers => {
30
- 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{VERSION}; +https://github.com/gjtorikian/html-proofer)"
31
- }
32
- }
33
-
34
25
  def initialize(src, opts = {})
26
+ FileUtils.mkdir_p(STORAGE_DIR) unless File.exist?(STORAGE_DIR)
27
+
35
28
  @src = src
36
29
 
37
30
  if opts[:verbose]
@@ -41,30 +34,12 @@ module HTML
41
34
  warn '`@options[:href_ignore]` will be renamed in a future 3.x.x release: http://git.io/vGHHy'
42
35
  end
43
36
 
44
- @proofer_opts = {
45
- :ext => '.html',
46
- :check_favicon => false,
47
- :href_swap => [],
48
- :href_ignore => [],
49
- :file_ignore => [],
50
- :url_ignore => [],
51
- :check_external_hash => false,
52
- :alt_ignore => [],
53
- :empty_alt_ignore => false,
54
- :enforce_https => false,
55
- :disable_external => false,
56
- :verbose => false,
57
- :only_4xx => false,
58
- :directory_index_file => 'index.html',
59
- :check_html => false,
60
- :error_sort => :path,
61
- :checks_to_ignore => []
62
- }
63
-
64
- @typhoeus_opts = TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
37
+ @proofer_opts = HTML::Proofer::Configuration::PROOFER_DEFAULTS
38
+
39
+ @typhoeus_opts = HTML::Proofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
65
40
  opts.delete(:typhoeus)
66
41
 
67
- @hydra_opts = opts[:hydra] || {}
42
+ @hydra_opts = HTML::Proofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
68
43
  opts.delete(:hydra)
69
44
 
70
45
  # fall back to parallel defaults
@@ -84,9 +59,7 @@ module HTML
84
59
  end
85
60
 
86
61
  def run
87
- count = checks.length
88
- check_text = "#{checks} " << (count == 1 ? 'check' : 'checks')
89
- logger.log :info, :blue, "Running #{check_text} on #{@src} on *#{@options[:ext]}... \n\n"
62
+ logger.log :info, :blue, "Running #{checks} on #{@src} on *#{@options[:ext]}... \n\n"
90
63
 
91
64
  if @src.is_a?(Array) && !@options[:disable_external]
92
65
  check_list_of_links
@@ -123,9 +96,19 @@ module HTML
123
96
  @failed_tests.concat(item[:failed_tests])
124
97
  end
125
98
 
126
- validate_urls unless @options[:disable_external]
99
+ # TODO: lazy. if we're checking only external links,
100
+ # we'll just trash all the failed tests. really, we should
101
+ # just not run those other checks at all.
102
+ if @options[:external_only]
103
+ @failed_tests = []
104
+ validate_urls
105
+ elsif !@options[:disable_external]
106
+ validate_urls
107
+ end
127
108
 
128
- logger.log :info, :blue, "Ran on #{files.length} files!\n\n"
109
+ count = files.length
110
+ file_text = pluralize(count, 'file', 'files')
111
+ logger.log :info, :blue, "Ran on #{file_text}!\n\n"
129
112
  end
130
113
 
131
114
  # Walks over each implemented check and runs them on the files, in parallel.
@@ -195,7 +178,7 @@ module HTML
195
178
 
196
179
  sorted_failures.sort_and_report
197
180
  count = @failed_tests.length
198
- failure_text = "#{count} " << (count == 1 ? 'failure' : 'failures')
181
+ failure_text = pluralize(count, 'failure', 'failures')
199
182
  fail logger.colorize :red, "HTML-Proofer found #{failure_text}!"
200
183
  end
201
184
  end
@@ -1,16 +1,141 @@
1
+ require_relative 'utils'
2
+
3
+ require 'json'
4
+ require 'active_support/core_ext/string'
5
+ require 'active_support/core_ext/date'
6
+ require 'active_support/core_ext/numeric/time'
7
+
1
8
  module HTML
2
9
  class Proofer
3
- module Cache
4
- def create_nokogiri(path)
5
- if File.exist? path
6
- content = File.open(path).read
10
+ class Cache
11
+ include HTML::Proofer::Utils
12
+
13
+ FILENAME = File.join(STORAGE_DIR, 'cache.log')
14
+
15
+ attr_accessor :exists, :load, :cache_log, :cache_time
16
+
17
+ def initialize(logger, options)
18
+ @logger = logger
19
+ @cache_log = {}
20
+
21
+ if options.nil? || options.empty?
22
+ @load = false
23
+ else
24
+ @load = true
25
+ @parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
26
+ end
27
+ @cache_time = Time.now
28
+
29
+ if File.exist?(FILENAME)
30
+ @exists = true
31
+ contents = File.read(FILENAME)
32
+ @cache_log = contents.empty? ? {} : JSON.parse(contents)
33
+ else
34
+ @exists = false
35
+ end
36
+ end
37
+
38
+ def within_timeframe?(time)
39
+ (@parsed_timeframe..@cache_time).cover?(time)
40
+ end
41
+
42
+ def urls
43
+ @cache_log['urls'] || []
44
+ end
45
+
46
+ def parsed_timeframe(timeframe)
47
+ time, date = timeframe.match(/(\d+)(\D)/).captures
48
+ time = time.to_f
49
+ case date
50
+ when 'M'
51
+ time.months.ago
52
+ when 'w'
53
+ time.weeks.ago
54
+ when 'd'
55
+ time.days.ago
56
+ when 'h'
57
+ time.hours.ago
7
58
  else
8
- content = path
59
+ fail ArgumentError, "#{date} is not a valid timeframe!"
9
60
  end
61
+ end
62
+
63
+ def add(url, filenames, status, msg = '')
64
+ data = {
65
+ :time => @cache_time,
66
+ :filenames => filenames,
67
+ :status => status,
68
+ :message => msg
69
+ }
70
+
71
+ @cache_log[clean_url(url)] = data
72
+ end
73
+
74
+ def detect_url_changes(found)
75
+ existing_urls = @cache_log.keys.map { |url| clean_url(url) }
76
+ found_urls = found.keys.map { |url| clean_url(url) }
77
+
78
+ # prepare to add new URLs detected
79
+ additions = found.reject do |url, _|
80
+ url = clean_url(url)
81
+ if existing_urls.include?(url)
82
+ true
83
+ else
84
+ @logger.log :debug, :yellow, "Adding #{url} to cache check"
85
+ false
86
+ end
87
+ end
88
+
89
+ new_link_count = additions.length
90
+ new_link_text = pluralize(new_link_count, 'link', 'links')
91
+ @logger.log :info, :blue, "Adding #{new_link_text} to the cache..."
92
+
93
+ # remove from cache URLs that no longer exist
94
+ del = 0
95
+ @cache_log.delete_if do |url, _|
96
+ url = clean_url(url)
97
+ if !found_urls.include?(url)
98
+ @logger.log :debug, :yellow, "Removing #{url} from cache check"
99
+ del += 1
100
+ true
101
+ else
102
+ false
103
+ end
104
+ end
105
+
106
+ del_link_text = pluralize(del, 'link', 'links')
107
+ @logger.log :info, :blue, "Removing #{del_link_text} from the cache..."
108
+
109
+ additions
110
+ end
111
+
112
+ def write
113
+ File.write(FILENAME, @cache_log.to_json)
114
+ end
115
+
116
+ def load?
117
+ @load.nil?
118
+ end
119
+
120
+
121
+ # FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
122
+ # a trailing slash to URL strings, which causes issues with the cache
123
+ def slashless_url(url)
124
+ url.chomp('/')
125
+ end
126
+
127
+ # FIXME: it seems that Typhoeus actually acts on escaped URLs,
128
+ # but there's no way to get at that information, and the cache
129
+ # stores unescaped URLs. Because of this, some links, such as
130
+ # github.com/search/issues?q=is:open+is:issue+fig are not matched
131
+ # as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
132
+ def unescape_url(url)
133
+ Addressable::URI.unescape(url)
134
+ end
10
135
 
11
- Nokogiri::HTML(content)
136
+ def clean_url(url)
137
+ slashless_url(unescape_url(url))
12
138
  end
13
- module_function :create_nokogiri
14
139
  end
15
140
  end
16
141
  end
@@ -7,7 +7,7 @@ module HTML
7
7
 
8
8
  attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \
9
9
  :validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, \
10
- :empty_alt_ignore
10
+ :empty_alt_ignore, :allow_hash_href
11
11
 
12
12
  def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts)
13
13
  @src = src
@@ -23,6 +23,7 @@ module HTML
23
23
  @url_ignores = @options[:url_ignore]
24
24
  @alt_ignores = @options[:alt_ignore]
25
25
  @empty_alt_ignore = @options[:empty_alt_ignore]
26
+ @allow_hash_href = @options[:allow_hash_href]
26
27
  @external_urls = {}
27
28
  end
28
29
 
@@ -90,6 +90,10 @@ module HTML
90
90
  @check.empty_alt_ignore
91
91
  end
92
92
 
93
+ def allow_hash_href?
94
+ @check.allow_hash_href
95
+ end
96
+
93
97
  # path is external to the file
94
98
  def external?
95
99
  !internal?
@@ -32,6 +32,7 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
32
32
  next if link.ignore?
33
33
  next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
34
34
  next if link.placeholder?
35
+ next if link.allow_hash_href? && link.href == '#'
35
36
 
36
37
  # is it even a valid URL?
37
38
  unless link.valid?
@@ -0,0 +1,48 @@
1
+ module HTML
2
+ class Proofer
3
+ module Configuration
4
+ require_relative 'version'
5
+
6
+ PROOFER_DEFAULTS = {
7
+ :allow_hash_href => false,
8
+ :alt_ignore => [],
9
+ :check_external_hash => false,
10
+ :check_favicon => false,
11
+ :check_html => false,
12
+ :checks_to_ignore => [],
13
+ :directory_index_file => 'index.html',
14
+ :disable_external => false,
15
+ :empty_alt_ignore => false,
16
+ :enforce_https => false,
17
+ :error_sort => :path,
18
+ :ext => '.html',
19
+ :external_only => false,
20
+ :file_ignore => [],
21
+ :href_ignore => [],
22
+ :href_swap => [],
23
+ :only_4xx => false,
24
+ :url_ignore => [],
25
+ :verbose => false
26
+ }
27
+
28
+ TYPHOEUS_DEFAULTS = {
29
+ :followlocation => true,
30
+ :headers => {
31
+ 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTML::Proofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
32
+ }
33
+ }
34
+
35
+ HYDRA_DEFAULTS = {
36
+ :max_concurrency => 50
37
+ }
38
+
39
+ def self.to_regex?(item)
40
+ if item.start_with?('/') && item.end_with?('/')
41
+ Regexp.new item[1...-1]
42
+ else
43
+ item
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -1,6 +1,7 @@
1
1
  require 'typhoeus'
2
2
  require 'uri'
3
3
  require_relative './utils'
4
+ require_relative './cache'
4
5
 
5
6
  module HTML
6
7
  class Proofer
@@ -18,16 +19,40 @@ module HTML
18
19
  @hydra = Typhoeus::Hydra.new(hydra_opts)
19
20
  @typhoeus_opts = typhoeus_opts
20
21
  @external_domain_paths_with_queries = {}
22
+ @cache = Cache.new(@logger, @options[:cache])
21
23
  end
22
24
 
23
25
  def run
24
26
  @iterable_external_urls = remove_query_values
25
- external_link_checker(@iterable_external_urls)
27
+
28
+ if @cache.exists && @cache.load
29
+ cache_count = @cache.cache_log.length
30
+ cache_text = pluralize(cache_count, 'link', 'links')
31
+
32
+ logger.log :info, :blue, "Found #{cache_text} in the cache..."
33
+
34
+ urls_to_check = @cache.detect_url_changes(@iterable_external_urls)
35
+
36
+ @cache.cache_log.each_pair do |url, cache|
37
+ if @cache.within_timeframe?(cache['time'])
38
+ next if cache['message'].empty? # these were successes to skip
39
+ urls_to_check[url] = cache['filenames'] # these are failures to retry
40
+ else
41
+ urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
42
+ end
43
+ end
44
+
45
+ external_link_checker(urls_to_check)
46
+ else
47
+ external_link_checker(@iterable_external_urls)
48
+ end
49
+
50
+ @cache.write
26
51
  @failed_tests
27
52
  end
28
53
 
29
54
  def remove_query_values
30
- return if @external_urls.nil?
55
+ return nil if @external_urls.nil?
31
56
  iterable_external_urls = @external_urls.dup
32
57
  @external_urls.keys.each do |url|
33
58
  uri = begin
@@ -75,14 +100,16 @@ module HTML
75
100
  external_urls = Hash[external_urls.sort]
76
101
 
77
102
  count = external_urls.length
78
- check_text = "#{count} " << (count == 1 ? 'external link' : 'external links')
103
+ check_text = pluralize(count, 'external link', 'external links')
79
104
  logger.log :info, :blue, "Checking #{check_text}..."
80
105
 
81
106
  Ethon.logger = logger # log from Typhoeus/Ethon
82
107
 
83
108
  url_processor(external_urls)
84
109
 
85
- logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
110
+ logger.log :debug, :yellow, "Running requests for:"
111
+ logger.log :debug, :yellow, "###\n" + external_urls.keys.join("\n") + "\n###"
112
+
86
113
  hydra.run
87
114
  end
88
115
 
@@ -125,14 +152,19 @@ module HTML
125
152
 
126
153
  if response_code.between?(200, 299)
127
154
  check_hash_in_2xx_response(href, effective_url, response, filenames)
155
+ @cache.add(href, filenames, response_code)
128
156
  elsif response.timed_out?
129
157
  handle_timeout(href, filenames, response_code)
158
+ elsif response_code == 0
159
+ handle_failure(href, filenames, response_code)
130
160
  elsif method == :head
131
161
  queue_request(:get, href, filenames)
132
162
  else
133
163
  return if @options[:only_4xx] && !response_code.between?(400, 499)
134
164
  # Received a non-successful http response.
135
- add_external_issue(filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code)
165
+ msg = "External link #{href} failed: #{response_code} #{response.return_message}"
166
+ add_external_issue(filenames, msg, response_code)
167
+ @cache.add(href, filenames, response_code, msg)
136
168
  end
137
169
  end
138
170
 
@@ -153,12 +185,23 @@ module HTML
153
185
 
154
186
  return unless body_doc.xpath(xpath).empty?
155
187
 
156
- add_external_issue filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code
188
+ msg = "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not"
189
+ add_external_issue(filenames, msg, response.code)
190
+ @cache.add(href, filenames, response.code, msg)
157
191
  end
158
192
 
159
193
  def handle_timeout(href, filenames, response_code)
194
+ msg = "External link #{href} failed: got a time out (response code #{response_code})"
195
+ @cache.add(href, filenames, 0, msg)
196
+ return if @options[:only_4xx]
197
+ add_external_issue(filenames, msg, response_code)
198
+ end
199
+
200
+ def handle_failure(href, filenames, response_code)
201
+ msg = "External link #{href} failed: response code #{response_code} means something's wrong"
202
+ @cache.add(href, filenames, 0, msg)
160
203
  return if @options[:only_4xx]
161
- add_external_issue filenames, "External link #{href} failed: got a time out", response_code
204
+ add_external_issue(filenames, msg, response_code)
162
205
  end
163
206
 
164
207
  def add_external_issue(filenames, desc, status = nil)
@@ -3,6 +3,12 @@ require 'nokogiri'
3
3
  module HTML
4
4
  class Proofer
5
5
  module Utils
6
+ STORAGE_DIR = File.join('tmp', '.htmlproofer')
7
+
8
+ def pluralize(count, single, plural)
9
+ "#{count} " << (count == 1 ? single : plural)
10
+ end
11
+
6
12
  def create_nokogiri(path)
7
13
  if File.exist? path
8
14
  content = File.open(path).read
@@ -10,7 +16,7 @@ module HTML
10
16
  content = path
11
17
  end
12
18
 
13
- Nokogiri::HTML(content)
19
+ Nokogiri::HTML(clean_content(content))
14
20
  end
15
21
  module_function :create_nokogiri
16
22
 
@@ -21,6 +27,21 @@ module HTML
21
27
  href
22
28
  end
23
29
  module_function :swap
30
+
31
+ # address a problem with Nokogiri's parsing URL entities
32
+ # problem from http://git.io/vBYU1
33
+ # solution from http://git.io/vBYUi
34
+ def clean_content(string)
35
+ matches = string.scan(%r{https?://([^>]+)}i)
36
+
37
+ matches.flatten.each do |url|
38
+ escaped_url = url.gsub(/&(?!amp;)/, '&amp;')
39
+ escaped_url = escaped_url.gsub(%r{/}, '&#47;')
40
+ string.gsub!(url, escaped_url)
41
+ end
42
+ string
43
+ end
44
+ module_function :clean_content
24
45
  end
25
46
  end
26
47
  end
@@ -1,5 +1,5 @@
1
1
  module HTML
2
2
  class Proofer
3
- VERSION = '2.5.2'
3
+ VERSION = '2.6.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.2
4
+ version: 2.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-06 00:00:00.000000000 Z
11
+ date: 2015-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mercenary
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '2.3'
111
+ - !ruby/object:Gem::Dependency
112
+ name: activesupport
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '4.2'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '4.2'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: redcarpet
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +192,20 @@ dependencies:
178
192
  - - "~>"
179
193
  - !ruby/object:Gem::Version
180
194
  version: '2.9'
195
+ - !ruby/object:Gem::Dependency
196
+ name: timecop
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: '0.8'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: '0.8'
181
209
  description: Test your rendered HTML files to make sure they're accurate.
182
210
  email:
183
211
  - gjtorikian@gmail.com
@@ -197,6 +225,7 @@ files:
197
225
  - lib/html/proofer/checks/images.rb
198
226
  - lib/html/proofer/checks/links.rb
199
227
  - lib/html/proofer/checks/scripts.rb
228
+ - lib/html/proofer/configuration.rb
200
229
  - lib/html/proofer/log.rb
201
230
  - lib/html/proofer/url_validator.rb
202
231
  - lib/html/proofer/utils.rb
@@ -222,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
222
251
  version: '0'
223
252
  requirements: []
224
253
  rubyforge_project:
225
- rubygems_version: 2.4.5
254
+ rubygems_version: 2.4.5.1
226
255
  signing_key:
227
256
  specification_version: 4
228
257
  summary: A set of tests to validate your HTML output. These tests check if your image