html-proofer 2.6.4 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/{htmlproof → htmlproofer} +31 -24
- data/lib/html-proofer.rb +47 -0
- data/lib/html-proofer/cache.rb +153 -0
- data/lib/html-proofer/check.rb +63 -0
- data/lib/{html/proofer/checks → html-proofer/check}/favicon.rb +2 -8
- data/lib/html-proofer/check/html.rb +21 -0
- data/lib/html-proofer/check/images.rb +47 -0
- data/lib/{html/proofer/checks → html-proofer/check}/links.rb +40 -48
- data/lib/html-proofer/check/scripts.rb +28 -0
- data/lib/html-proofer/configuration.rb +56 -0
- data/lib/html-proofer/element.rb +165 -0
- data/lib/{html/proofer/check_runner → html-proofer}/issue.rb +8 -10
- data/lib/html-proofer/log.rb +49 -0
- data/lib/html-proofer/runner.rb +160 -0
- data/lib/html-proofer/url_validator.rb +218 -0
- data/lib/html-proofer/utils.rb +40 -0
- data/lib/html-proofer/version.rb +3 -0
- metadata +20 -20
- data/lib/html/proofer.rb +0 -191
- data/lib/html/proofer/cache.rb +0 -141
- data/lib/html/proofer/check_runner.rb +0 -70
- data/lib/html/proofer/checkable.rb +0 -168
- data/lib/html/proofer/checks/html.rb +0 -46
- data/lib/html/proofer/checks/images.rb +0 -54
- data/lib/html/proofer/checks/scripts.rb +0 -40
- data/lib/html/proofer/configuration.rb +0 -48
- data/lib/html/proofer/log.rb +0 -42
- data/lib/html/proofer/url_validator.rb +0 -222
- data/lib/html/proofer/utils.rb +0 -42
- data/lib/html/proofer/version.rb +0 -5
- data/lib/html/proofer/xpathfunctions.rb +0 -9
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mercenary
|
@@ -210,27 +210,27 @@ description: Test your rendered HTML files to make sure they're accurate.
|
|
210
210
|
email:
|
211
211
|
- gjtorikian@gmail.com
|
212
212
|
executables:
|
213
|
-
-
|
213
|
+
- htmlproofer
|
214
214
|
extensions: []
|
215
215
|
extra_rdoc_files: []
|
216
216
|
files:
|
217
|
-
- bin/
|
218
|
-
- lib/html
|
219
|
-
- lib/html
|
220
|
-
- lib/html
|
221
|
-
- lib/html
|
222
|
-
- lib/html
|
223
|
-
- lib/html
|
224
|
-
- lib/html
|
225
|
-
- lib/html
|
226
|
-
- lib/html
|
227
|
-
- lib/html
|
228
|
-
- lib/html
|
229
|
-
- lib/html
|
230
|
-
- lib/html
|
231
|
-
- lib/html
|
232
|
-
- lib/html
|
233
|
-
- lib/html
|
217
|
+
- bin/htmlproofer
|
218
|
+
- lib/html-proofer.rb
|
219
|
+
- lib/html-proofer/cache.rb
|
220
|
+
- lib/html-proofer/check.rb
|
221
|
+
- lib/html-proofer/check/favicon.rb
|
222
|
+
- lib/html-proofer/check/html.rb
|
223
|
+
- lib/html-proofer/check/images.rb
|
224
|
+
- lib/html-proofer/check/links.rb
|
225
|
+
- lib/html-proofer/check/scripts.rb
|
226
|
+
- lib/html-proofer/configuration.rb
|
227
|
+
- lib/html-proofer/element.rb
|
228
|
+
- lib/html-proofer/issue.rb
|
229
|
+
- lib/html-proofer/log.rb
|
230
|
+
- lib/html-proofer/runner.rb
|
231
|
+
- lib/html-proofer/url_validator.rb
|
232
|
+
- lib/html-proofer/utils.rb
|
233
|
+
- lib/html-proofer/version.rb
|
234
234
|
homepage: https://github.com/gjtorikian/html-proofer
|
235
235
|
licenses:
|
236
236
|
- MIT
|
data/lib/html/proofer.rb
DELETED
@@ -1,191 +0,0 @@
|
|
1
|
-
def require_all(path)
|
2
|
-
glob = File.join(File.dirname(__FILE__), path, '*.rb')
|
3
|
-
Dir[glob].each do |f|
|
4
|
-
require f
|
5
|
-
end
|
6
|
-
end
|
7
|
-
|
8
|
-
require_all 'proofer'
|
9
|
-
require_all 'proofer/check_runner'
|
10
|
-
require_all 'proofer/checks'
|
11
|
-
|
12
|
-
require 'parallel'
|
13
|
-
require 'fileutils'
|
14
|
-
|
15
|
-
begin
|
16
|
-
require 'awesome_print'
|
17
|
-
rescue LoadError; end
|
18
|
-
|
19
|
-
module HTML
|
20
|
-
class Proofer
|
21
|
-
include HTML::Proofer::Utils
|
22
|
-
|
23
|
-
attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts, :external_urls, :iterable_external_urls
|
24
|
-
|
25
|
-
def initialize(src, opts = {})
|
26
|
-
FileUtils.mkdir_p(STORAGE_DIR) unless File.exist?(STORAGE_DIR)
|
27
|
-
|
28
|
-
@src = src
|
29
|
-
|
30
|
-
if opts[:verbose]
|
31
|
-
warn '`@options[:verbose]` will be removed in a future 3.x.x release: http://git.io/vGHHh'
|
32
|
-
end
|
33
|
-
if opts[:href_ignore]
|
34
|
-
warn '`@options[:href_ignore]` will be renamed in a future 3.x.x release: http://git.io/vGHHy'
|
35
|
-
end
|
36
|
-
|
37
|
-
@proofer_opts = HTML::Proofer::Configuration::PROOFER_DEFAULTS
|
38
|
-
|
39
|
-
@typhoeus_opts = HTML::Proofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
40
|
-
opts.delete(:typhoeus)
|
41
|
-
|
42
|
-
@hydra_opts = HTML::Proofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
|
43
|
-
opts.delete(:hydra)
|
44
|
-
|
45
|
-
# fall back to parallel defaults
|
46
|
-
@parallel_opts = opts[:parallel] || {}
|
47
|
-
opts.delete(:parallel)
|
48
|
-
|
49
|
-
@validation_opts = opts[:validation] || {}
|
50
|
-
opts.delete(:validation)
|
51
|
-
|
52
|
-
@options = @proofer_opts.merge(opts)
|
53
|
-
|
54
|
-
@failed_tests = []
|
55
|
-
end
|
56
|
-
|
57
|
-
def logger
|
58
|
-
@logger ||= HTML::Proofer::Log.new(@options[:verbose], @options[:verbosity])
|
59
|
-
end
|
60
|
-
|
61
|
-
def run
|
62
|
-
logger.log :info, :blue, "Running #{checks} on #{@src} on *#{@options[:ext]}... \n\n"
|
63
|
-
|
64
|
-
if @src.is_a?(Array) && !@options[:disable_external]
|
65
|
-
check_list_of_links
|
66
|
-
else
|
67
|
-
check_directory_of_files
|
68
|
-
end
|
69
|
-
|
70
|
-
if @failed_tests.empty?
|
71
|
-
logger.log :info, :green, 'HTML-Proofer finished successfully.'
|
72
|
-
else
|
73
|
-
print_failed_tests
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def check_list_of_links
|
78
|
-
if @options[:href_swap]
|
79
|
-
@src = @src.map do |url|
|
80
|
-
swap(url, @options[:href_swap])
|
81
|
-
end
|
82
|
-
end
|
83
|
-
@external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
|
84
|
-
validate_urls
|
85
|
-
end
|
86
|
-
|
87
|
-
# Collects any external URLs found in a directory of files. Also collectes
|
88
|
-
# every failed test from check_files_for_internal_woes.
|
89
|
-
# Sends the external URLs to Typhoeus for batch processing.
|
90
|
-
def check_directory_of_files
|
91
|
-
@external_urls = {}
|
92
|
-
results = check_files_for_internal_woes
|
93
|
-
|
94
|
-
results.each do |item|
|
95
|
-
@external_urls.merge!(item[:external_urls])
|
96
|
-
@failed_tests.concat(item[:failed_tests])
|
97
|
-
end
|
98
|
-
|
99
|
-
# TODO: lazy. if we're checking only external links,
|
100
|
-
# we'll just trash all the failed tests. really, we should
|
101
|
-
# just not run those other checks at all.
|
102
|
-
if @options[:external_only]
|
103
|
-
@failed_tests = []
|
104
|
-
validate_urls
|
105
|
-
elsif !@options[:disable_external]
|
106
|
-
validate_urls
|
107
|
-
end
|
108
|
-
|
109
|
-
count = files.length
|
110
|
-
file_text = pluralize(count, 'file', 'files')
|
111
|
-
logger.log :info, :blue, "Ran on #{file_text}!\n\n"
|
112
|
-
end
|
113
|
-
|
114
|
-
# Walks over each implemented check and runs them on the files, in parallel.
|
115
|
-
def check_files_for_internal_woes
|
116
|
-
if @parallel_opts.empty?
|
117
|
-
files.map { |path| check_path(path) }
|
118
|
-
else
|
119
|
-
Parallel.map(files, @parallel_opts) { |path| check_path(path) }
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
def check_path(path)
|
124
|
-
html = create_nokogiri(path)
|
125
|
-
result = { :external_urls => {}, :failed_tests => [] }
|
126
|
-
|
127
|
-
checks.each do |klass|
|
128
|
-
logger.log :debug, :yellow, "Checking #{klass.to_s.downcase} on #{path} ..."
|
129
|
-
check = Object.const_get(klass).new(@src, path, html, @options, @typhoeus_opts, @hydra_opts, @parallel_opts, @validation_opts)
|
130
|
-
check.run
|
131
|
-
result[:external_urls].merge!(check.external_urls)
|
132
|
-
result[:failed_tests].concat(check.issues) if check.issues.length > 0
|
133
|
-
end
|
134
|
-
result
|
135
|
-
end
|
136
|
-
|
137
|
-
def validate_urls
|
138
|
-
url_validator = HTML::Proofer::UrlValidator.new(logger, @external_urls, @options, @typhoeus_opts, @hydra_opts)
|
139
|
-
@failed_tests.concat(url_validator.run)
|
140
|
-
@iterable_external_urls = url_validator.iterable_external_urls
|
141
|
-
end
|
142
|
-
|
143
|
-
def files
|
144
|
-
if File.directory? @src
|
145
|
-
pattern = File.join(@src, '**', "*#{@options[:ext]}")
|
146
|
-
files = Dir.glob(pattern).select { |fn| File.file? fn }
|
147
|
-
files.reject { |f| ignore_file?(f) }
|
148
|
-
elsif File.extname(@src) == @options[:ext]
|
149
|
-
[@src].reject { |f| ignore_file?(f) }
|
150
|
-
else
|
151
|
-
[]
|
152
|
-
end
|
153
|
-
end
|
154
|
-
|
155
|
-
def ignore_file?(file)
|
156
|
-
options[:file_ignore].each do |pattern|
|
157
|
-
return true if pattern.is_a?(String) && pattern == file
|
158
|
-
return true if pattern.is_a?(Regexp) && pattern =~ file
|
159
|
-
end
|
160
|
-
|
161
|
-
false
|
162
|
-
end
|
163
|
-
|
164
|
-
def checks
|
165
|
-
return @checks unless @checks.nil?
|
166
|
-
@checks = HTML::Proofer::CheckRunner.checks.map(&:name)
|
167
|
-
@checks.delete('FaviconCheck') unless @options[:check_favicon]
|
168
|
-
@checks.delete('HtmlCheck') unless @options[:check_html]
|
169
|
-
@options[:checks_to_ignore].each do |ignored|
|
170
|
-
@checks.delete(ignored)
|
171
|
-
end
|
172
|
-
@checks
|
173
|
-
end
|
174
|
-
|
175
|
-
def failed_tests
|
176
|
-
return [] if @failed_tests.empty?
|
177
|
-
result = []
|
178
|
-
@failed_tests.each { |f| result << f.to_s }
|
179
|
-
result
|
180
|
-
end
|
181
|
-
|
182
|
-
def print_failed_tests
|
183
|
-
sorted_failures = HTML::Proofer::CheckRunner::SortedIssues.new(@failed_tests, @options[:error_sort], logger)
|
184
|
-
|
185
|
-
sorted_failures.sort_and_report
|
186
|
-
count = @failed_tests.length
|
187
|
-
failure_text = pluralize(count, 'failure', 'failures')
|
188
|
-
fail logger.colorize :red, "HTML-Proofer found #{failure_text}!"
|
189
|
-
end
|
190
|
-
end
|
191
|
-
end
|
data/lib/html/proofer/cache.rb
DELETED
@@ -1,141 +0,0 @@
|
|
1
|
-
require_relative 'utils'
|
2
|
-
|
3
|
-
require 'json'
|
4
|
-
require 'active_support/core_ext/string'
|
5
|
-
require 'active_support/core_ext/date'
|
6
|
-
require 'active_support/core_ext/numeric/time'
|
7
|
-
|
8
|
-
module HTML
|
9
|
-
class Proofer
|
10
|
-
class Cache
|
11
|
-
include HTML::Proofer::Utils
|
12
|
-
|
13
|
-
FILENAME = File.join(STORAGE_DIR, 'cache.log')
|
14
|
-
|
15
|
-
attr_accessor :exists, :load, :cache_log, :cache_time
|
16
|
-
|
17
|
-
def initialize(logger, options)
|
18
|
-
@logger = logger
|
19
|
-
@cache_log = {}
|
20
|
-
|
21
|
-
if options.nil? || options.empty?
|
22
|
-
@load = false
|
23
|
-
else
|
24
|
-
@load = true
|
25
|
-
@parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
|
26
|
-
end
|
27
|
-
@cache_time = Time.now
|
28
|
-
|
29
|
-
if File.exist?(FILENAME)
|
30
|
-
@exists = true
|
31
|
-
contents = File.read(FILENAME)
|
32
|
-
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
33
|
-
else
|
34
|
-
@exists = false
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def within_timeframe?(time)
|
39
|
-
(@parsed_timeframe..@cache_time).cover?(time)
|
40
|
-
end
|
41
|
-
|
42
|
-
def urls
|
43
|
-
@cache_log['urls'] || []
|
44
|
-
end
|
45
|
-
|
46
|
-
def parsed_timeframe(timeframe)
|
47
|
-
time, date = timeframe.match(/(\d+)(\D)/).captures
|
48
|
-
time = time.to_f
|
49
|
-
case date
|
50
|
-
when 'M'
|
51
|
-
time.months.ago
|
52
|
-
when 'w'
|
53
|
-
time.weeks.ago
|
54
|
-
when 'd'
|
55
|
-
time.days.ago
|
56
|
-
when 'h'
|
57
|
-
time.hours.ago
|
58
|
-
else
|
59
|
-
fail ArgumentError, "#{date} is not a valid timeframe!"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def add(url, filenames, status, msg = '')
|
64
|
-
data = {
|
65
|
-
:time => @cache_time,
|
66
|
-
:filenames => filenames,
|
67
|
-
:status => status,
|
68
|
-
:message => msg
|
69
|
-
}
|
70
|
-
|
71
|
-
@cache_log[clean_url(url)] = data
|
72
|
-
end
|
73
|
-
|
74
|
-
def detect_url_changes(found)
|
75
|
-
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
76
|
-
found_urls = found.keys.map { |url| clean_url(url) }
|
77
|
-
|
78
|
-
# prepare to add new URLs detected
|
79
|
-
additions = found.reject do |url, _|
|
80
|
-
url = clean_url(url)
|
81
|
-
if existing_urls.include?(url)
|
82
|
-
true
|
83
|
-
else
|
84
|
-
@logger.log :debug, :yellow, "Adding #{url} to cache check"
|
85
|
-
false
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
new_link_count = additions.length
|
90
|
-
new_link_text = pluralize(new_link_count, 'link', 'links')
|
91
|
-
@logger.log :info, :blue, "Adding #{new_link_text} to the cache..."
|
92
|
-
|
93
|
-
# remove from cache URLs that no longer exist
|
94
|
-
del = 0
|
95
|
-
@cache_log.delete_if do |url, _|
|
96
|
-
url = clean_url(url)
|
97
|
-
if !found_urls.include?(url)
|
98
|
-
@logger.log :debug, :yellow, "Removing #{url} from cache check"
|
99
|
-
del += 1
|
100
|
-
true
|
101
|
-
else
|
102
|
-
false
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
del_link_text = pluralize(del, 'link', 'links')
|
107
|
-
@logger.log :info, :blue, "Removing #{del_link_text} from the cache..."
|
108
|
-
|
109
|
-
additions
|
110
|
-
end
|
111
|
-
|
112
|
-
def write
|
113
|
-
File.write(FILENAME, @cache_log.to_json)
|
114
|
-
end
|
115
|
-
|
116
|
-
def load?
|
117
|
-
@load.nil?
|
118
|
-
end
|
119
|
-
|
120
|
-
|
121
|
-
# FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
|
122
|
-
# a trailing slash to URL strings, which causes issues with the cache
|
123
|
-
def slashless_url(url)
|
124
|
-
url.chomp('/')
|
125
|
-
end
|
126
|
-
|
127
|
-
# FIXME: it seems that Typhoeus actually acts on escaped URLs,
|
128
|
-
# but there's no way to get at that information, and the cache
|
129
|
-
# stores unescaped URLs. Because of this, some links, such as
|
130
|
-
# github.com/search/issues?q=is:open+is:issue+fig are not matched
|
131
|
-
# as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
|
132
|
-
def unescape_url(url)
|
133
|
-
Addressable::URI.unescape(url)
|
134
|
-
end
|
135
|
-
|
136
|
-
def clean_url(url)
|
137
|
-
slashless_url(unescape_url(url))
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Proofer
|
5
|
-
# Mostly handles issue management and collecting of external URLs.
|
6
|
-
class CheckRunner
|
7
|
-
|
8
|
-
attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \
|
9
|
-
:validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, \
|
10
|
-
:empty_alt_ignore, :allow_hash_href
|
11
|
-
|
12
|
-
def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts)
|
13
|
-
@src = src
|
14
|
-
@path = path
|
15
|
-
@html = remove_ignored(html)
|
16
|
-
@options = options
|
17
|
-
@typhoeus_opts = typhoeus_opts
|
18
|
-
@hydra_opts = hydra_opts
|
19
|
-
@parallel_opts = parallel_opts
|
20
|
-
@validation_opts = validation_opts
|
21
|
-
@issues = []
|
22
|
-
@href_ignores = @options[:href_ignore]
|
23
|
-
@url_ignores = @options[:url_ignore]
|
24
|
-
@alt_ignores = @options[:alt_ignore]
|
25
|
-
@empty_alt_ignore = @options[:empty_alt_ignore]
|
26
|
-
@allow_hash_href = @options[:allow_hash_href]
|
27
|
-
@external_urls = {}
|
28
|
-
end
|
29
|
-
|
30
|
-
def run
|
31
|
-
fail NotImplementedError, 'HTML::Proofer::CheckRunner subclasses must implement #run'
|
32
|
-
end
|
33
|
-
|
34
|
-
def add_issue(desc, line_number = nil, status = -1)
|
35
|
-
@issues << Issue.new(@path, desc, line_number, status)
|
36
|
-
end
|
37
|
-
|
38
|
-
def add_to_external_urls(url, line)
|
39
|
-
return if @external_urls[url]
|
40
|
-
add_path_for_url(url)
|
41
|
-
end
|
42
|
-
|
43
|
-
def add_path_for_url(url)
|
44
|
-
if @external_urls[url]
|
45
|
-
@external_urls[url] << @path
|
46
|
-
else
|
47
|
-
@external_urls[url] = [@path]
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.checks
|
52
|
-
classes = []
|
53
|
-
|
54
|
-
ObjectSpace.each_object(Class) do |c|
|
55
|
-
next unless c.superclass == self
|
56
|
-
classes << c
|
57
|
-
end
|
58
|
-
|
59
|
-
classes
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
def remove_ignored(html)
|
65
|
-
html.css('code, pre, tt').each(&:unlink)
|
66
|
-
html
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|