html-proofer 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproof +15 -22
- data/lib/html/proofer.rb +21 -38
- data/lib/html/proofer/cache.rb +132 -7
- data/lib/html/proofer/check_runner.rb +2 -1
- data/lib/html/proofer/checkable.rb +4 -0
- data/lib/html/proofer/checks/links.rb +1 -0
- data/lib/html/proofer/configuration.rb +48 -0
- data/lib/html/proofer/url_validator.rb +50 -7
- data/lib/html/proofer/utils.rb +22 -1
- data/lib/html/proofer/version.rb +1 -1
- metadata +32 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 904c91c8694ab71a3722677bb5e8be2c78074503
|
4
|
+
data.tar.gz: 8e8c720d05ac809b4b5628711a40516a88be8dfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e72778e7edd2f302a91b6727d4825ed64cc506cf473bc9575d0d79051ed39f56fca0bff8b79cba268c28a68b796f1c0495460e4c420d1f7f16439f84e7a94325
|
7
|
+
data.tar.gz: dd974ec72bf547882f85e59b51223b5ed2c54688288cf5265c34c01b872236daa4c1127c61ef4e25882f4dd340b74dc7c88370c31fcd603853b74f352b0e5213
|
data/bin/htmlproof
CHANGED
@@ -5,15 +5,6 @@ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w( .. lib ))
|
|
5
5
|
|
6
6
|
require 'html/proofer'
|
7
7
|
require 'mercenary'
|
8
|
-
require 'rubygems'
|
9
|
-
|
10
|
-
def to_regex?(item)
|
11
|
-
if item.start_with?('/') && item.end_with?('/')
|
12
|
-
Regexp.new item[1...-1]
|
13
|
-
else
|
14
|
-
item
|
15
|
-
end
|
16
|
-
end
|
17
8
|
|
18
9
|
Mercenary.program(:htmlproof) do |p|
|
19
10
|
p.version HTML::Proofer::VERSION
|
@@ -22,25 +13,27 @@ Mercenary.program(:htmlproof) do |p|
|
|
22
13
|
|
23
14
|
p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
|
24
15
|
|
16
|
+
p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, ignores the `href` `#`'
|
25
17
|
p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
|
26
|
-
p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, '
|
27
|
-
p.option 'empty_alt_ignore', '--empty-alt-ignore', 'Ignores images with empty alt tags.'
|
18
|
+
p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
|
28
19
|
p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
|
29
20
|
p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).'
|
30
21
|
p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
|
31
22
|
p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
|
32
23
|
p.option 'directory_index_file', '--directory-index-file', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
|
33
|
-
p.option 'disable_external', '--disable-external', '
|
34
|
-
p.option '
|
24
|
+
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
|
25
|
+
p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
|
26
|
+
p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `:path`, `:desc`, or `:status` (default: `path`).'
|
35
27
|
p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `false`).'
|
36
|
-
p.option 'ext', '--ext EXT', String, 'The extension of your HTML files (default: `.html`)'
|
37
|
-
p.option '
|
38
|
-
p.option '
|
39
|
-
p.option '
|
28
|
+
p.option 'ext', '--ext EXT', String, 'The extension of your HTML files including the dot. (default: `.html`)'
|
29
|
+
p.option 'external_only', '--external_only', 'Only checks problems with external references'
|
30
|
+
p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
31
|
+
p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. **Will be renamed in a future release.**'
|
32
|
+
p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. **Will be renamed in a future release.**'
|
40
33
|
p.option 'ignore_script_embeds', '--ignore-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
|
41
|
-
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the
|
42
|
-
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, '
|
43
|
-
p.option 'verbose', '--verbose', '
|
34
|
+
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
35
|
+
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
36
|
+
p.option 'verbose', '--verbose', 'If `true`, outputs extra information as the checking happens. Useful for debugging. **Will be deprecated in a future release.**'
|
44
37
|
p.option 'verbosity', '--verbosity', String, 'Sets the logging level, as determined by Yell'
|
45
38
|
|
46
39
|
p.action do |args, opts|
|
@@ -52,7 +45,7 @@ Mercenary.program(:htmlproof) do |p|
|
|
52
45
|
# prepare everything to go to proofer
|
53
46
|
p.options.select { |o| !opts[o.config_key].nil? }.each do |option|
|
54
47
|
if option.return_type.to_s == 'Array' # TODO: is_a? doesn't work here?
|
55
|
-
opts[option.config_key] = opts[option.config_key].map { |i| to_regex?(i) }
|
48
|
+
opts[option.config_key] = opts[option.config_key].map { |i| HTML::Proofer::Configuration.to_regex?(i) }
|
56
49
|
end
|
57
50
|
options[option.config_key.to_sym] = opts[option.config_key]
|
58
51
|
end
|
@@ -68,7 +61,7 @@ Mercenary.program(:htmlproof) do |p|
|
|
68
61
|
|
69
62
|
# check for ignore_scripts_embeds as it should be set in :validation
|
70
63
|
unless opts['ignore_script_embeds'].nil?
|
71
|
-
|
64
|
+
options[:validation] = { :ignore_script_embeds => true }
|
72
65
|
end
|
73
66
|
|
74
67
|
options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil?
|
data/lib/html/proofer.rb
CHANGED
@@ -8,30 +8,23 @@ end
|
|
8
8
|
require_all 'proofer'
|
9
9
|
require_all 'proofer/check_runner'
|
10
10
|
require_all 'proofer/checks'
|
11
|
-
require_relative './proofer/utils'
|
12
|
-
require_relative './proofer/xpathfunctions'
|
13
11
|
|
14
12
|
require 'parallel'
|
13
|
+
require 'fileutils'
|
15
14
|
|
16
15
|
begin
|
17
16
|
require 'awesome_print'
|
18
17
|
rescue LoadError; end
|
19
18
|
|
20
19
|
module HTML
|
21
|
-
|
22
20
|
class Proofer
|
23
21
|
include HTML::Proofer::Utils
|
24
22
|
|
25
23
|
attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts, :external_urls, :iterable_external_urls
|
26
24
|
|
27
|
-
TYPHOEUS_DEFAULTS = {
|
28
|
-
:followlocation => true,
|
29
|
-
:headers => {
|
30
|
-
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
31
|
-
}
|
32
|
-
}
|
33
|
-
|
34
25
|
def initialize(src, opts = {})
|
26
|
+
FileUtils.mkdir_p(STORAGE_DIR) unless File.exist?(STORAGE_DIR)
|
27
|
+
|
35
28
|
@src = src
|
36
29
|
|
37
30
|
if opts[:verbose]
|
@@ -41,30 +34,12 @@ module HTML
|
|
41
34
|
warn '`@options[:href_ignore]` will be renamed in a future 3.x.x release: http://git.io/vGHHy'
|
42
35
|
end
|
43
36
|
|
44
|
-
@proofer_opts =
|
45
|
-
|
46
|
-
|
47
|
-
:href_swap => [],
|
48
|
-
:href_ignore => [],
|
49
|
-
:file_ignore => [],
|
50
|
-
:url_ignore => [],
|
51
|
-
:check_external_hash => false,
|
52
|
-
:alt_ignore => [],
|
53
|
-
:empty_alt_ignore => false,
|
54
|
-
:enforce_https => false,
|
55
|
-
:disable_external => false,
|
56
|
-
:verbose => false,
|
57
|
-
:only_4xx => false,
|
58
|
-
:directory_index_file => 'index.html',
|
59
|
-
:check_html => false,
|
60
|
-
:error_sort => :path,
|
61
|
-
:checks_to_ignore => []
|
62
|
-
}
|
63
|
-
|
64
|
-
@typhoeus_opts = TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
37
|
+
@proofer_opts = HTML::Proofer::Configuration::PROOFER_DEFAULTS
|
38
|
+
|
39
|
+
@typhoeus_opts = HTML::Proofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
65
40
|
opts.delete(:typhoeus)
|
66
41
|
|
67
|
-
@hydra_opts = opts[:hydra] || {}
|
42
|
+
@hydra_opts = HTML::Proofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
|
68
43
|
opts.delete(:hydra)
|
69
44
|
|
70
45
|
# fall back to parallel defaults
|
@@ -84,9 +59,7 @@ module HTML
|
|
84
59
|
end
|
85
60
|
|
86
61
|
def run
|
87
|
-
|
88
|
-
check_text = "#{checks} " << (count == 1 ? 'check' : 'checks')
|
89
|
-
logger.log :info, :blue, "Running #{check_text} on #{@src} on *#{@options[:ext]}... \n\n"
|
62
|
+
logger.log :info, :blue, "Running #{checks} on #{@src} on *#{@options[:ext]}... \n\n"
|
90
63
|
|
91
64
|
if @src.is_a?(Array) && !@options[:disable_external]
|
92
65
|
check_list_of_links
|
@@ -123,9 +96,19 @@ module HTML
|
|
123
96
|
@failed_tests.concat(item[:failed_tests])
|
124
97
|
end
|
125
98
|
|
126
|
-
|
99
|
+
# TODO: lazy. if we're checking only external links,
|
100
|
+
# we'll just trash all the failed tests. really, we should
|
101
|
+
# just not run those other checks at all.
|
102
|
+
if @options[:external_only]
|
103
|
+
@failed_tests = []
|
104
|
+
validate_urls
|
105
|
+
elsif !@options[:disable_external]
|
106
|
+
validate_urls
|
107
|
+
end
|
127
108
|
|
128
|
-
|
109
|
+
count = files.length
|
110
|
+
file_text = pluralize(count, 'file', 'files')
|
111
|
+
logger.log :info, :blue, "Ran on #{file_text}!\n\n"
|
129
112
|
end
|
130
113
|
|
131
114
|
# Walks over each implemented check and runs them on the files, in parallel.
|
@@ -195,7 +178,7 @@ module HTML
|
|
195
178
|
|
196
179
|
sorted_failures.sort_and_report
|
197
180
|
count = @failed_tests.length
|
198
|
-
failure_text =
|
181
|
+
failure_text = pluralize(count, 'failure', 'failures')
|
199
182
|
fail logger.colorize :red, "HTML-Proofer found #{failure_text}!"
|
200
183
|
end
|
201
184
|
end
|
data/lib/html/proofer/cache.rb
CHANGED
@@ -1,16 +1,141 @@
|
|
1
|
+
require_relative 'utils'
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'active_support/core_ext/string'
|
5
|
+
require 'active_support/core_ext/date'
|
6
|
+
require 'active_support/core_ext/numeric/time'
|
7
|
+
|
1
8
|
module HTML
|
2
9
|
class Proofer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
10
|
+
class Cache
|
11
|
+
include HTML::Proofer::Utils
|
12
|
+
|
13
|
+
FILENAME = File.join(STORAGE_DIR, 'cache.log')
|
14
|
+
|
15
|
+
attr_accessor :exists, :load, :cache_log, :cache_time
|
16
|
+
|
17
|
+
def initialize(logger, options)
|
18
|
+
@logger = logger
|
19
|
+
@cache_log = {}
|
20
|
+
|
21
|
+
if options.nil? || options.empty?
|
22
|
+
@load = false
|
23
|
+
else
|
24
|
+
@load = true
|
25
|
+
@parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
|
26
|
+
end
|
27
|
+
@cache_time = Time.now
|
28
|
+
|
29
|
+
if File.exist?(FILENAME)
|
30
|
+
@exists = true
|
31
|
+
contents = File.read(FILENAME)
|
32
|
+
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
33
|
+
else
|
34
|
+
@exists = false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def within_timeframe?(time)
|
39
|
+
(@parsed_timeframe..@cache_time).cover?(time)
|
40
|
+
end
|
41
|
+
|
42
|
+
def urls
|
43
|
+
@cache_log['urls'] || []
|
44
|
+
end
|
45
|
+
|
46
|
+
def parsed_timeframe(timeframe)
|
47
|
+
time, date = timeframe.match(/(\d+)(\D)/).captures
|
48
|
+
time = time.to_f
|
49
|
+
case date
|
50
|
+
when 'M'
|
51
|
+
time.months.ago
|
52
|
+
when 'w'
|
53
|
+
time.weeks.ago
|
54
|
+
when 'd'
|
55
|
+
time.days.ago
|
56
|
+
when 'h'
|
57
|
+
time.hours.ago
|
7
58
|
else
|
8
|
-
|
59
|
+
fail ArgumentError, "#{date} is not a valid timeframe!"
|
9
60
|
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def add(url, filenames, status, msg = '')
|
64
|
+
data = {
|
65
|
+
:time => @cache_time,
|
66
|
+
:filenames => filenames,
|
67
|
+
:status => status,
|
68
|
+
:message => msg
|
69
|
+
}
|
70
|
+
|
71
|
+
@cache_log[clean_url(url)] = data
|
72
|
+
end
|
73
|
+
|
74
|
+
def detect_url_changes(found)
|
75
|
+
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
76
|
+
found_urls = found.keys.map { |url| clean_url(url) }
|
77
|
+
|
78
|
+
# prepare to add new URLs detected
|
79
|
+
additions = found.reject do |url, _|
|
80
|
+
url = clean_url(url)
|
81
|
+
if existing_urls.include?(url)
|
82
|
+
true
|
83
|
+
else
|
84
|
+
@logger.log :debug, :yellow, "Adding #{url} to cache check"
|
85
|
+
false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
new_link_count = additions.length
|
90
|
+
new_link_text = pluralize(new_link_count, 'link', 'links')
|
91
|
+
@logger.log :info, :blue, "Adding #{new_link_text} to the cache..."
|
92
|
+
|
93
|
+
# remove from cache URLs that no longer exist
|
94
|
+
del = 0
|
95
|
+
@cache_log.delete_if do |url, _|
|
96
|
+
url = clean_url(url)
|
97
|
+
if !found_urls.include?(url)
|
98
|
+
@logger.log :debug, :yellow, "Removing #{url} from cache check"
|
99
|
+
del += 1
|
100
|
+
true
|
101
|
+
else
|
102
|
+
false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
del_link_text = pluralize(del, 'link', 'links')
|
107
|
+
@logger.log :info, :blue, "Removing #{del_link_text} from the cache..."
|
108
|
+
|
109
|
+
additions
|
110
|
+
end
|
111
|
+
|
112
|
+
def write
|
113
|
+
File.write(FILENAME, @cache_log.to_json)
|
114
|
+
end
|
115
|
+
|
116
|
+
def load?
|
117
|
+
@load.nil?
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
# FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
|
122
|
+
# a trailing slash to URL strings, which causes issues with the cache
|
123
|
+
def slashless_url(url)
|
124
|
+
url.chomp('/')
|
125
|
+
end
|
126
|
+
|
127
|
+
# FIXME: it seems that Typhoeus actually acts on escaped URLs,
|
128
|
+
# but there's no way to get at that information, and the cache
|
129
|
+
# stores unescaped URLs. Because of this, some links, such as
|
130
|
+
# github.com/search/issues?q=is:open+is:issue+fig are not matched
|
131
|
+
# as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
|
132
|
+
def unescape_url(url)
|
133
|
+
Addressable::URI.unescape(url)
|
134
|
+
end
|
10
135
|
|
11
|
-
|
136
|
+
def clean_url(url)
|
137
|
+
slashless_url(unescape_url(url))
|
12
138
|
end
|
13
|
-
module_function :create_nokogiri
|
14
139
|
end
|
15
140
|
end
|
16
141
|
end
|
@@ -7,7 +7,7 @@ module HTML
|
|
7
7
|
|
8
8
|
attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \
|
9
9
|
:validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, \
|
10
|
-
:empty_alt_ignore
|
10
|
+
:empty_alt_ignore, :allow_hash_href
|
11
11
|
|
12
12
|
def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts)
|
13
13
|
@src = src
|
@@ -23,6 +23,7 @@ module HTML
|
|
23
23
|
@url_ignores = @options[:url_ignore]
|
24
24
|
@alt_ignores = @options[:alt_ignore]
|
25
25
|
@empty_alt_ignore = @options[:empty_alt_ignore]
|
26
|
+
@allow_hash_href = @options[:allow_hash_href]
|
26
27
|
@external_urls = {}
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +32,7 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
32
32
|
next if link.ignore?
|
33
33
|
next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
|
34
34
|
next if link.placeholder?
|
35
|
+
next if link.allow_hash_href? && link.href == '#'
|
35
36
|
|
36
37
|
# is it even a valid URL?
|
37
38
|
unless link.valid?
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module HTML
|
2
|
+
class Proofer
|
3
|
+
module Configuration
|
4
|
+
require_relative 'version'
|
5
|
+
|
6
|
+
PROOFER_DEFAULTS = {
|
7
|
+
:allow_hash_href => false,
|
8
|
+
:alt_ignore => [],
|
9
|
+
:check_external_hash => false,
|
10
|
+
:check_favicon => false,
|
11
|
+
:check_html => false,
|
12
|
+
:checks_to_ignore => [],
|
13
|
+
:directory_index_file => 'index.html',
|
14
|
+
:disable_external => false,
|
15
|
+
:empty_alt_ignore => false,
|
16
|
+
:enforce_https => false,
|
17
|
+
:error_sort => :path,
|
18
|
+
:ext => '.html',
|
19
|
+
:external_only => false,
|
20
|
+
:file_ignore => [],
|
21
|
+
:href_ignore => [],
|
22
|
+
:href_swap => [],
|
23
|
+
:only_4xx => false,
|
24
|
+
:url_ignore => [],
|
25
|
+
:verbose => false
|
26
|
+
}
|
27
|
+
|
28
|
+
TYPHOEUS_DEFAULTS = {
|
29
|
+
:followlocation => true,
|
30
|
+
:headers => {
|
31
|
+
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTML::Proofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
HYDRA_DEFAULTS = {
|
36
|
+
:max_concurrency => 50
|
37
|
+
}
|
38
|
+
|
39
|
+
def self.to_regex?(item)
|
40
|
+
if item.start_with?('/') && item.end_with?('/')
|
41
|
+
Regexp.new item[1...-1]
|
42
|
+
else
|
43
|
+
item
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'typhoeus'
|
2
2
|
require 'uri'
|
3
3
|
require_relative './utils'
|
4
|
+
require_relative './cache'
|
4
5
|
|
5
6
|
module HTML
|
6
7
|
class Proofer
|
@@ -18,16 +19,40 @@ module HTML
|
|
18
19
|
@hydra = Typhoeus::Hydra.new(hydra_opts)
|
19
20
|
@typhoeus_opts = typhoeus_opts
|
20
21
|
@external_domain_paths_with_queries = {}
|
22
|
+
@cache = Cache.new(@logger, @options[:cache])
|
21
23
|
end
|
22
24
|
|
23
25
|
def run
|
24
26
|
@iterable_external_urls = remove_query_values
|
25
|
-
|
27
|
+
|
28
|
+
if @cache.exists && @cache.load
|
29
|
+
cache_count = @cache.cache_log.length
|
30
|
+
cache_text = pluralize(cache_count, 'link', 'links')
|
31
|
+
|
32
|
+
logger.log :info, :blue, "Found #{cache_text} in the cache..."
|
33
|
+
|
34
|
+
urls_to_check = @cache.detect_url_changes(@iterable_external_urls)
|
35
|
+
|
36
|
+
@cache.cache_log.each_pair do |url, cache|
|
37
|
+
if @cache.within_timeframe?(cache['time'])
|
38
|
+
next if cache['message'].empty? # these were successes to skip
|
39
|
+
urls_to_check[url] = cache['filenames'] # these are failures to retry
|
40
|
+
else
|
41
|
+
urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
external_link_checker(urls_to_check)
|
46
|
+
else
|
47
|
+
external_link_checker(@iterable_external_urls)
|
48
|
+
end
|
49
|
+
|
50
|
+
@cache.write
|
26
51
|
@failed_tests
|
27
52
|
end
|
28
53
|
|
29
54
|
def remove_query_values
|
30
|
-
return if @external_urls.nil?
|
55
|
+
return nil if @external_urls.nil?
|
31
56
|
iterable_external_urls = @external_urls.dup
|
32
57
|
@external_urls.keys.each do |url|
|
33
58
|
uri = begin
|
@@ -75,14 +100,16 @@ module HTML
|
|
75
100
|
external_urls = Hash[external_urls.sort]
|
76
101
|
|
77
102
|
count = external_urls.length
|
78
|
-
check_text =
|
103
|
+
check_text = pluralize(count, 'external link', 'external links')
|
79
104
|
logger.log :info, :blue, "Checking #{check_text}..."
|
80
105
|
|
81
106
|
Ethon.logger = logger # log from Typhoeus/Ethon
|
82
107
|
|
83
108
|
url_processor(external_urls)
|
84
109
|
|
85
|
-
logger.log :debug, :yellow, "Running requests for
|
110
|
+
logger.log :debug, :yellow, "Running requests for:"
|
111
|
+
logger.log :debug, :yellow, "###\n" + external_urls.keys.join("\n") + "\n###"
|
112
|
+
|
86
113
|
hydra.run
|
87
114
|
end
|
88
115
|
|
@@ -125,14 +152,19 @@ module HTML
|
|
125
152
|
|
126
153
|
if response_code.between?(200, 299)
|
127
154
|
check_hash_in_2xx_response(href, effective_url, response, filenames)
|
155
|
+
@cache.add(href, filenames, response_code)
|
128
156
|
elsif response.timed_out?
|
129
157
|
handle_timeout(href, filenames, response_code)
|
158
|
+
elsif response_code == 0
|
159
|
+
handle_failure(href, filenames, response_code)
|
130
160
|
elsif method == :head
|
131
161
|
queue_request(:get, href, filenames)
|
132
162
|
else
|
133
163
|
return if @options[:only_4xx] && !response_code.between?(400, 499)
|
134
164
|
# Received a non-successful http response.
|
135
|
-
|
165
|
+
msg = "External link #{href} failed: #{response_code} #{response.return_message}"
|
166
|
+
add_external_issue(filenames, msg, response_code)
|
167
|
+
@cache.add(href, filenames, response_code, msg)
|
136
168
|
end
|
137
169
|
end
|
138
170
|
|
@@ -153,12 +185,23 @@ module HTML
|
|
153
185
|
|
154
186
|
return unless body_doc.xpath(xpath).empty?
|
155
187
|
|
156
|
-
|
188
|
+
msg = "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not"
|
189
|
+
add_external_issue(filenames, msg, response.code)
|
190
|
+
@cache.add(href, filenames, response.code, msg)
|
157
191
|
end
|
158
192
|
|
159
193
|
def handle_timeout(href, filenames, response_code)
|
194
|
+
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
195
|
+
@cache.add(href, filenames, 0, msg)
|
196
|
+
return if @options[:only_4xx]
|
197
|
+
add_external_issue(filenames, msg, response_code)
|
198
|
+
end
|
199
|
+
|
200
|
+
def handle_failure(href, filenames, response_code)
|
201
|
+
msg = "External link #{href} failed: response code #{response_code} means something's wrong"
|
202
|
+
@cache.add(href, filenames, 0, msg)
|
160
203
|
return if @options[:only_4xx]
|
161
|
-
add_external_issue
|
204
|
+
add_external_issue(filenames, msg, response_code)
|
162
205
|
end
|
163
206
|
|
164
207
|
def add_external_issue(filenames, desc, status = nil)
|
data/lib/html/proofer/utils.rb
CHANGED
@@ -3,6 +3,12 @@ require 'nokogiri'
|
|
3
3
|
module HTML
|
4
4
|
class Proofer
|
5
5
|
module Utils
|
6
|
+
STORAGE_DIR = File.join('tmp', '.htmlproofer')
|
7
|
+
|
8
|
+
def pluralize(count, single, plural)
|
9
|
+
"#{count} " << (count == 1 ? single : plural)
|
10
|
+
end
|
11
|
+
|
6
12
|
def create_nokogiri(path)
|
7
13
|
if File.exist? path
|
8
14
|
content = File.open(path).read
|
@@ -10,7 +16,7 @@ module HTML
|
|
10
16
|
content = path
|
11
17
|
end
|
12
18
|
|
13
|
-
Nokogiri::HTML(content)
|
19
|
+
Nokogiri::HTML(clean_content(content))
|
14
20
|
end
|
15
21
|
module_function :create_nokogiri
|
16
22
|
|
@@ -21,6 +27,21 @@ module HTML
|
|
21
27
|
href
|
22
28
|
end
|
23
29
|
module_function :swap
|
30
|
+
|
31
|
+
# address a problem with Nokogiri's parsing URL entities
|
32
|
+
# problem from http://git.io/vBYU1
|
33
|
+
# solution from http://git.io/vBYUi
|
34
|
+
def clean_content(string)
|
35
|
+
matches = string.scan(%r{https?://([^>]+)}i)
|
36
|
+
|
37
|
+
matches.flatten.each do |url|
|
38
|
+
escaped_url = url.gsub(/&(?!amp;)/, '&')
|
39
|
+
escaped_url = escaped_url.gsub(%r{/}, '/')
|
40
|
+
string.gsub!(url, escaped_url)
|
41
|
+
end
|
42
|
+
string
|
43
|
+
end
|
44
|
+
module_function :clean_content
|
24
45
|
end
|
25
46
|
end
|
26
47
|
end
|
data/lib/html/proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mercenary
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '2.3'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: activesupport
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '4.2'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '4.2'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: redcarpet
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,6 +192,20 @@ dependencies:
|
|
178
192
|
- - "~>"
|
179
193
|
- !ruby/object:Gem::Version
|
180
194
|
version: '2.9'
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: timecop
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0.8'
|
202
|
+
type: :development
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0.8'
|
181
209
|
description: Test your rendered HTML files to make sure they're accurate.
|
182
210
|
email:
|
183
211
|
- gjtorikian@gmail.com
|
@@ -197,6 +225,7 @@ files:
|
|
197
225
|
- lib/html/proofer/checks/images.rb
|
198
226
|
- lib/html/proofer/checks/links.rb
|
199
227
|
- lib/html/proofer/checks/scripts.rb
|
228
|
+
- lib/html/proofer/configuration.rb
|
200
229
|
- lib/html/proofer/log.rb
|
201
230
|
- lib/html/proofer/url_validator.rb
|
202
231
|
- lib/html/proofer/utils.rb
|
@@ -222,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
222
251
|
version: '0'
|
223
252
|
requirements: []
|
224
253
|
rubyforge_project:
|
225
|
-
rubygems_version: 2.4.5
|
254
|
+
rubygems_version: 2.4.5.1
|
226
255
|
signing_key:
|
227
256
|
specification_version: 4
|
228
257
|
summary: A set of tests to validate your HTML output. These tests check if your image
|