html-proofer 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2882cf21c649ba0f4e231508045f46e474c74cbe
4
- data.tar.gz: 495aaf04be88b3710503c7c5f3099e37bbacfcbc
3
+ metadata.gz: 904c91c8694ab71a3722677bb5e8be2c78074503
4
+ data.tar.gz: 8e8c720d05ac809b4b5628711a40516a88be8dfa
5
5
  SHA512:
6
- metadata.gz: b8d342482dcdc6c0922ee2c703fffb5b0251e2f2a5dbb3bfbddc584bc5ff6867599dc1a5394c2a2f96ebc32b638a7e68a0de4db2b678af6be9b0aadd0881d88a
7
- data.tar.gz: c518d95329621dbd1eee59be267dbc6001988c4fc25e11e03e2fb83cf21bf4959f159724ef8ab7d2ab54d4785aa618629291795100cc88684f9354ee60e3f7dd
6
+ metadata.gz: e72778e7edd2f302a91b6727d4825ed64cc506cf473bc9575d0d79051ed39f56fca0bff8b79cba268c28a68b796f1c0495460e4c420d1f7f16439f84e7a94325
7
+ data.tar.gz: dd974ec72bf547882f85e59b51223b5ed2c54688288cf5265c34c01b872236daa4c1127c61ef4e25882f4dd340b74dc7c88370c31fcd603853b74f352b0e5213
@@ -5,15 +5,6 @@ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w( .. lib ))
5
5
 
6
6
  require 'html/proofer'
7
7
  require 'mercenary'
8
- require 'rubygems'
9
-
10
- def to_regex?(item)
11
- if item.start_with?('/') && item.end_with?('/')
12
- Regexp.new item[1...-1]
13
- else
14
- item
15
- end
16
- end
17
8
 
18
9
  Mercenary.program(:htmlproof) do |p|
19
10
  p.version HTML::Proofer::VERSION
@@ -22,25 +13,27 @@ Mercenary.program(:htmlproof) do |p|
22
13
 
23
14
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
24
15
 
16
+ p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, ignores the `href` `#`'
25
17
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
26
- p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'Comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
27
- p.option 'empty_alt_ignore', '--empty-alt-ignore', 'Ignores images with empty alt tags.'
18
+ p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
28
19
  p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
29
20
  p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).'
30
21
  p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
31
22
  p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
32
23
  p.option 'directory_index_file', '--directory-index-file', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
33
- p.option 'disable_external', '--disable-external', 'Disables the external link checker (default: `false`)'
34
- p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `path`, `desc`, or `status` (default: `path`).'
24
+ p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
25
+ p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
26
+ p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `:path`, `:desc`, or `:status` (default: `path`).'
35
27
  p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `false`).'
36
- p.option 'ext', '--ext EXT', String, 'The extension of your HTML files (default: `.html`)'
37
- p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'Comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
- p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'Comma-separated list of Strings or RegExps containing `href`s that are safe to ignore.'
39
- p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'Comma-separated list of key-value pairs of `RegExp:String`. Transforms links matching `RegExp` into `String`'
28
+ p.option 'ext', '--ext EXT', String, 'The extension of your HTML files including the dot. (default: `.html`)'
29
+ p.option 'external_only', '--external_only', 'Only checks problems with external references'
30
+ p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
31
+ p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. **Will be renamed in a future release.**'
32
+ p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. **Will be renamed in a future release.**'
40
33
  p.option 'ignore_script_embeds', '--ignore-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
41
- p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4x status code range.'
42
- p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'Comma-separated list of Strings or RegExps containing URLs that are safe to ignore.'
43
- p.option 'verbose', '--verbose', 'Enables more verbose logging.'
34
+ p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
35
+ p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
36
+ p.option 'verbose', '--verbose', 'If `true`, outputs extra information as the checking happens. Useful for debugging. **Will be deprecated in a future release.**'
44
37
  p.option 'verbosity', '--verbosity', String, 'Sets the logging level, as determined by Yell'
45
38
 
46
39
  p.action do |args, opts|
@@ -52,7 +45,7 @@ Mercenary.program(:htmlproof) do |p|
52
45
  # prepare everything to go to proofer
53
46
  p.options.select { |o| !opts[o.config_key].nil? }.each do |option|
54
47
  if option.return_type.to_s == 'Array' # TODO: is_a? doesn't work here?
55
- opts[option.config_key] = opts[option.config_key].map { |i| to_regex?(i) }
48
+ opts[option.config_key] = opts[option.config_key].map { |i| HTML::Proofer::Configuration.to_regex?(i) }
56
49
  end
57
50
  options[option.config_key.to_sym] = opts[option.config_key]
58
51
  end
@@ -68,7 +61,7 @@ Mercenary.program(:htmlproof) do |p|
68
61
 
69
62
  # check for ignore_scripts_embeds as it should be set in :validation
70
63
  unless opts['ignore_script_embeds'].nil?
71
- options[:validation] = { :ignore_script_embeds => true }
64
+ options[:validation] = { :ignore_script_embeds => true }
72
65
  end
73
66
 
74
67
  options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil?
@@ -8,30 +8,23 @@ end
8
8
  require_all 'proofer'
9
9
  require_all 'proofer/check_runner'
10
10
  require_all 'proofer/checks'
11
- require_relative './proofer/utils'
12
- require_relative './proofer/xpathfunctions'
13
11
 
14
12
  require 'parallel'
13
+ require 'fileutils'
15
14
 
16
15
  begin
17
16
  require 'awesome_print'
18
17
  rescue LoadError; end
19
18
 
20
19
  module HTML
21
-
22
20
  class Proofer
23
21
  include HTML::Proofer::Utils
24
22
 
25
23
  attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts, :external_urls, :iterable_external_urls
26
24
 
27
- TYPHOEUS_DEFAULTS = {
28
- :followlocation => true,
29
- :headers => {
30
- 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{VERSION}; +https://github.com/gjtorikian/html-proofer)"
31
- }
32
- }
33
-
34
25
  def initialize(src, opts = {})
26
+ FileUtils.mkdir_p(STORAGE_DIR) unless File.exist?(STORAGE_DIR)
27
+
35
28
  @src = src
36
29
 
37
30
  if opts[:verbose]
@@ -41,30 +34,12 @@ module HTML
41
34
  warn '`@options[:href_ignore]` will be renamed in a future 3.x.x release: http://git.io/vGHHy'
42
35
  end
43
36
 
44
- @proofer_opts = {
45
- :ext => '.html',
46
- :check_favicon => false,
47
- :href_swap => [],
48
- :href_ignore => [],
49
- :file_ignore => [],
50
- :url_ignore => [],
51
- :check_external_hash => false,
52
- :alt_ignore => [],
53
- :empty_alt_ignore => false,
54
- :enforce_https => false,
55
- :disable_external => false,
56
- :verbose => false,
57
- :only_4xx => false,
58
- :directory_index_file => 'index.html',
59
- :check_html => false,
60
- :error_sort => :path,
61
- :checks_to_ignore => []
62
- }
63
-
64
- @typhoeus_opts = TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
37
+ @proofer_opts = HTML::Proofer::Configuration::PROOFER_DEFAULTS
38
+
39
+ @typhoeus_opts = HTML::Proofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
65
40
  opts.delete(:typhoeus)
66
41
 
67
- @hydra_opts = opts[:hydra] || {}
42
+ @hydra_opts = HTML::Proofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
68
43
  opts.delete(:hydra)
69
44
 
70
45
  # fall back to parallel defaults
@@ -84,9 +59,7 @@ module HTML
84
59
  end
85
60
 
86
61
  def run
87
- count = checks.length
88
- check_text = "#{checks} " << (count == 1 ? 'check' : 'checks')
89
- logger.log :info, :blue, "Running #{check_text} on #{@src} on *#{@options[:ext]}... \n\n"
62
+ logger.log :info, :blue, "Running #{checks} on #{@src} on *#{@options[:ext]}... \n\n"
90
63
 
91
64
  if @src.is_a?(Array) && !@options[:disable_external]
92
65
  check_list_of_links
@@ -123,9 +96,19 @@ module HTML
123
96
  @failed_tests.concat(item[:failed_tests])
124
97
  end
125
98
 
126
- validate_urls unless @options[:disable_external]
99
+ # TODO: lazy. if we're checking only external links,
100
+ # we'll just trash all the failed tests. really, we should
101
+ # just not run those other checks at all.
102
+ if @options[:external_only]
103
+ @failed_tests = []
104
+ validate_urls
105
+ elsif !@options[:disable_external]
106
+ validate_urls
107
+ end
127
108
 
128
- logger.log :info, :blue, "Ran on #{files.length} files!\n\n"
109
+ count = files.length
110
+ file_text = pluralize(count, 'file', 'files')
111
+ logger.log :info, :blue, "Ran on #{file_text}!\n\n"
129
112
  end
130
113
 
131
114
  # Walks over each implemented check and runs them on the files, in parallel.
@@ -195,7 +178,7 @@ module HTML
195
178
 
196
179
  sorted_failures.sort_and_report
197
180
  count = @failed_tests.length
198
- failure_text = "#{count} " << (count == 1 ? 'failure' : 'failures')
181
+ failure_text = pluralize(count, 'failure', 'failures')
199
182
  fail logger.colorize :red, "HTML-Proofer found #{failure_text}!"
200
183
  end
201
184
  end
@@ -1,16 +1,141 @@
1
+ require_relative 'utils'
2
+
3
+ require 'json'
4
+ require 'active_support/core_ext/string'
5
+ require 'active_support/core_ext/date'
6
+ require 'active_support/core_ext/numeric/time'
7
+
1
8
  module HTML
2
9
  class Proofer
3
- module Cache
4
- def create_nokogiri(path)
5
- if File.exist? path
6
- content = File.open(path).read
10
+ class Cache
11
+ include HTML::Proofer::Utils
12
+
13
+ FILENAME = File.join(STORAGE_DIR, 'cache.log')
14
+
15
+ attr_accessor :exists, :load, :cache_log, :cache_time
16
+
17
+ def initialize(logger, options)
18
+ @logger = logger
19
+ @cache_log = {}
20
+
21
+ if options.nil? || options.empty?
22
+ @load = false
23
+ else
24
+ @load = true
25
+ @parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
26
+ end
27
+ @cache_time = Time.now
28
+
29
+ if File.exist?(FILENAME)
30
+ @exists = true
31
+ contents = File.read(FILENAME)
32
+ @cache_log = contents.empty? ? {} : JSON.parse(contents)
33
+ else
34
+ @exists = false
35
+ end
36
+ end
37
+
38
+ def within_timeframe?(time)
39
+ (@parsed_timeframe..@cache_time).cover?(time)
40
+ end
41
+
42
+ def urls
43
+ @cache_log['urls'] || []
44
+ end
45
+
46
+ def parsed_timeframe(timeframe)
47
+ time, date = timeframe.match(/(\d+)(\D)/).captures
48
+ time = time.to_f
49
+ case date
50
+ when 'M'
51
+ time.months.ago
52
+ when 'w'
53
+ time.weeks.ago
54
+ when 'd'
55
+ time.days.ago
56
+ when 'h'
57
+ time.hours.ago
7
58
  else
8
- content = path
59
+ fail ArgumentError, "#{date} is not a valid timeframe!"
9
60
  end
61
+ end
62
+
63
+ def add(url, filenames, status, msg = '')
64
+ data = {
65
+ :time => @cache_time,
66
+ :filenames => filenames,
67
+ :status => status,
68
+ :message => msg
69
+ }
70
+
71
+ @cache_log[clean_url(url)] = data
72
+ end
73
+
74
+ def detect_url_changes(found)
75
+ existing_urls = @cache_log.keys.map { |url| clean_url(url) }
76
+ found_urls = found.keys.map { |url| clean_url(url) }
77
+
78
+ # prepare to add new URLs detected
79
+ additions = found.reject do |url, _|
80
+ url = clean_url(url)
81
+ if existing_urls.include?(url)
82
+ true
83
+ else
84
+ @logger.log :debug, :yellow, "Adding #{url} to cache check"
85
+ false
86
+ end
87
+ end
88
+
89
+ new_link_count = additions.length
90
+ new_link_text = pluralize(new_link_count, 'link', 'links')
91
+ @logger.log :info, :blue, "Adding #{new_link_text} to the cache..."
92
+
93
+ # remove from cache URLs that no longer exist
94
+ del = 0
95
+ @cache_log.delete_if do |url, _|
96
+ url = clean_url(url)
97
+ if !found_urls.include?(url)
98
+ @logger.log :debug, :yellow, "Removing #{url} from cache check"
99
+ del += 1
100
+ true
101
+ else
102
+ false
103
+ end
104
+ end
105
+
106
+ del_link_text = pluralize(del, 'link', 'links')
107
+ @logger.log :info, :blue, "Removing #{del_link_text} from the cache..."
108
+
109
+ additions
110
+ end
111
+
112
+ def write
113
+ File.write(FILENAME, @cache_log.to_json)
114
+ end
115
+
116
+ def load?
117
+ @load.nil?
118
+ end
119
+
120
+
121
+ # FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
122
+ # a trailing slash to URL strings, which causes issues with the cache
123
+ def slashless_url(url)
124
+ url.chomp('/')
125
+ end
126
+
127
+ # FIXME: it seems that Typhoeus actually acts on escaped URLs,
128
+ # but there's no way to get at that information, and the cache
129
+ # stores unescaped URLs. Because of this, some links, such as
130
+ # github.com/search/issues?q=is:open+is:issue+fig are not matched
131
+ # as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
132
+ def unescape_url(url)
133
+ Addressable::URI.unescape(url)
134
+ end
10
135
 
11
- Nokogiri::HTML(content)
136
+ def clean_url(url)
137
+ slashless_url(unescape_url(url))
12
138
  end
13
- module_function :create_nokogiri
14
139
  end
15
140
  end
16
141
  end
@@ -7,7 +7,7 @@ module HTML
7
7
 
8
8
  attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \
9
9
  :validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, \
10
- :empty_alt_ignore
10
+ :empty_alt_ignore, :allow_hash_href
11
11
 
12
12
  def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts)
13
13
  @src = src
@@ -23,6 +23,7 @@ module HTML
23
23
  @url_ignores = @options[:url_ignore]
24
24
  @alt_ignores = @options[:alt_ignore]
25
25
  @empty_alt_ignore = @options[:empty_alt_ignore]
26
+ @allow_hash_href = @options[:allow_hash_href]
26
27
  @external_urls = {}
27
28
  end
28
29
 
@@ -90,6 +90,10 @@ module HTML
90
90
  @check.empty_alt_ignore
91
91
  end
92
92
 
93
+ def allow_hash_href?
94
+ @check.allow_hash_href
95
+ end
96
+
93
97
  # path is external to the file
94
98
  def external?
95
99
  !internal?
@@ -32,6 +32,7 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
32
32
  next if link.ignore?
33
33
  next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
34
34
  next if link.placeholder?
35
+ next if link.allow_hash_href? && link.href == '#'
35
36
 
36
37
  # is it even a valid URL?
37
38
  unless link.valid?
@@ -0,0 +1,48 @@
1
+ module HTML
2
+ class Proofer
3
+ module Configuration
4
+ require_relative 'version'
5
+
6
+ PROOFER_DEFAULTS = {
7
+ :allow_hash_href => false,
8
+ :alt_ignore => [],
9
+ :check_external_hash => false,
10
+ :check_favicon => false,
11
+ :check_html => false,
12
+ :checks_to_ignore => [],
13
+ :directory_index_file => 'index.html',
14
+ :disable_external => false,
15
+ :empty_alt_ignore => false,
16
+ :enforce_https => false,
17
+ :error_sort => :path,
18
+ :ext => '.html',
19
+ :external_only => false,
20
+ :file_ignore => [],
21
+ :href_ignore => [],
22
+ :href_swap => [],
23
+ :only_4xx => false,
24
+ :url_ignore => [],
25
+ :verbose => false
26
+ }
27
+
28
+ TYPHOEUS_DEFAULTS = {
29
+ :followlocation => true,
30
+ :headers => {
31
+ 'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTML::Proofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
32
+ }
33
+ }
34
+
35
+ HYDRA_DEFAULTS = {
36
+ :max_concurrency => 50
37
+ }
38
+
39
+ def self.to_regex?(item)
40
+ if item.start_with?('/') && item.end_with?('/')
41
+ Regexp.new item[1...-1]
42
+ else
43
+ item
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -1,6 +1,7 @@
1
1
  require 'typhoeus'
2
2
  require 'uri'
3
3
  require_relative './utils'
4
+ require_relative './cache'
4
5
 
5
6
  module HTML
6
7
  class Proofer
@@ -18,16 +19,40 @@ module HTML
18
19
  @hydra = Typhoeus::Hydra.new(hydra_opts)
19
20
  @typhoeus_opts = typhoeus_opts
20
21
  @external_domain_paths_with_queries = {}
22
+ @cache = Cache.new(@logger, @options[:cache])
21
23
  end
22
24
 
23
25
  def run
24
26
  @iterable_external_urls = remove_query_values
25
- external_link_checker(@iterable_external_urls)
27
+
28
+ if @cache.exists && @cache.load
29
+ cache_count = @cache.cache_log.length
30
+ cache_text = pluralize(cache_count, 'link', 'links')
31
+
32
+ logger.log :info, :blue, "Found #{cache_text} in the cache..."
33
+
34
+ urls_to_check = @cache.detect_url_changes(@iterable_external_urls)
35
+
36
+ @cache.cache_log.each_pair do |url, cache|
37
+ if @cache.within_timeframe?(cache['time'])
38
+ next if cache['message'].empty? # these were successes to skip
39
+ urls_to_check[url] = cache['filenames'] # these are failures to retry
40
+ else
41
+ urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
42
+ end
43
+ end
44
+
45
+ external_link_checker(urls_to_check)
46
+ else
47
+ external_link_checker(@iterable_external_urls)
48
+ end
49
+
50
+ @cache.write
26
51
  @failed_tests
27
52
  end
28
53
 
29
54
  def remove_query_values
30
- return if @external_urls.nil?
55
+ return nil if @external_urls.nil?
31
56
  iterable_external_urls = @external_urls.dup
32
57
  @external_urls.keys.each do |url|
33
58
  uri = begin
@@ -75,14 +100,16 @@ module HTML
75
100
  external_urls = Hash[external_urls.sort]
76
101
 
77
102
  count = external_urls.length
78
- check_text = "#{count} " << (count == 1 ? 'external link' : 'external links')
103
+ check_text = pluralize(count, 'external link', 'external links')
79
104
  logger.log :info, :blue, "Checking #{check_text}..."
80
105
 
81
106
  Ethon.logger = logger # log from Typhoeus/Ethon
82
107
 
83
108
  url_processor(external_urls)
84
109
 
85
- logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
110
+ logger.log :debug, :yellow, "Running requests for:"
111
+ logger.log :debug, :yellow, "###\n" + external_urls.keys.join("\n") + "\n###"
112
+
86
113
  hydra.run
87
114
  end
88
115
 
@@ -125,14 +152,19 @@ module HTML
125
152
 
126
153
  if response_code.between?(200, 299)
127
154
  check_hash_in_2xx_response(href, effective_url, response, filenames)
155
+ @cache.add(href, filenames, response_code)
128
156
  elsif response.timed_out?
129
157
  handle_timeout(href, filenames, response_code)
158
+ elsif response_code == 0
159
+ handle_failure(href, filenames, response_code)
130
160
  elsif method == :head
131
161
  queue_request(:get, href, filenames)
132
162
  else
133
163
  return if @options[:only_4xx] && !response_code.between?(400, 499)
134
164
  # Received a non-successful http response.
135
- add_external_issue(filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code)
165
+ msg = "External link #{href} failed: #{response_code} #{response.return_message}"
166
+ add_external_issue(filenames, msg, response_code)
167
+ @cache.add(href, filenames, response_code, msg)
136
168
  end
137
169
  end
138
170
 
@@ -153,12 +185,23 @@ module HTML
153
185
 
154
186
  return unless body_doc.xpath(xpath).empty?
155
187
 
156
- add_external_issue filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code
188
+ msg = "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not"
189
+ add_external_issue(filenames, msg, response.code)
190
+ @cache.add(href, filenames, response.code, msg)
157
191
  end
158
192
 
159
193
  def handle_timeout(href, filenames, response_code)
194
+ msg = "External link #{href} failed: got a time out (response code #{response_code})"
195
+ @cache.add(href, filenames, 0, msg)
196
+ return if @options[:only_4xx]
197
+ add_external_issue(filenames, msg, response_code)
198
+ end
199
+
200
+ def handle_failure(href, filenames, response_code)
201
+ msg = "External link #{href} failed: response code #{response_code} means something's wrong"
202
+ @cache.add(href, filenames, 0, msg)
160
203
  return if @options[:only_4xx]
161
- add_external_issue filenames, "External link #{href} failed: got a time out", response_code
204
+ add_external_issue(filenames, msg, response_code)
162
205
  end
163
206
 
164
207
  def add_external_issue(filenames, desc, status = nil)
@@ -3,6 +3,12 @@ require 'nokogiri'
3
3
  module HTML
4
4
  class Proofer
5
5
  module Utils
6
+ STORAGE_DIR = File.join('tmp', '.htmlproofer')
7
+
8
+ def pluralize(count, single, plural)
9
+ "#{count} " << (count == 1 ? single : plural)
10
+ end
11
+
6
12
  def create_nokogiri(path)
7
13
  if File.exist? path
8
14
  content = File.open(path).read
@@ -10,7 +16,7 @@ module HTML
10
16
  content = path
11
17
  end
12
18
 
13
- Nokogiri::HTML(content)
19
+ Nokogiri::HTML(clean_content(content))
14
20
  end
15
21
  module_function :create_nokogiri
16
22
 
@@ -21,6 +27,21 @@ module HTML
21
27
  href
22
28
  end
23
29
  module_function :swap
30
+
31
+ # address a problem with Nokogiri's parsing URL entities
32
+ # problem from http://git.io/vBYU1
33
+ # solution from http://git.io/vBYUi
34
+ def clean_content(string)
35
+ matches = string.scan(%r{https?://([^>]+)}i)
36
+
37
+ matches.flatten.each do |url|
38
+ escaped_url = url.gsub(/&(?!amp;)/, '&amp;')
39
+ escaped_url = escaped_url.gsub(%r{/}, '&#47;')
40
+ string.gsub!(url, escaped_url)
41
+ end
42
+ string
43
+ end
44
+ module_function :clean_content
24
45
  end
25
46
  end
26
47
  end
@@ -1,5 +1,5 @@
1
1
  module HTML
2
2
  class Proofer
3
- VERSION = '2.5.2'
3
+ VERSION = '2.6.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.2
4
+ version: 2.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-06 00:00:00.000000000 Z
11
+ date: 2015-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mercenary
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '2.3'
111
+ - !ruby/object:Gem::Dependency
112
+ name: activesupport
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '4.2'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '4.2'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: redcarpet
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +192,20 @@ dependencies:
178
192
  - - "~>"
179
193
  - !ruby/object:Gem::Version
180
194
  version: '2.9'
195
+ - !ruby/object:Gem::Dependency
196
+ name: timecop
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: '0.8'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: '0.8'
181
209
  description: Test your rendered HTML files to make sure they're accurate.
182
210
  email:
183
211
  - gjtorikian@gmail.com
@@ -197,6 +225,7 @@ files:
197
225
  - lib/html/proofer/checks/images.rb
198
226
  - lib/html/proofer/checks/links.rb
199
227
  - lib/html/proofer/checks/scripts.rb
228
+ - lib/html/proofer/configuration.rb
200
229
  - lib/html/proofer/log.rb
201
230
  - lib/html/proofer/url_validator.rb
202
231
  - lib/html/proofer/utils.rb
@@ -222,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
222
251
  version: '0'
223
252
  requirements: []
224
253
  rubyforge_project:
225
- rubygems_version: 2.4.5
254
+ rubygems_version: 2.4.5.1
226
255
  signing_key:
227
256
  specification_version: 4
228
257
  summary: A set of tests to validate your HTML output. These tests check if your image