html-proofer 2.6.4 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/{htmlproof → htmlproofer} +31 -24
- data/lib/html-proofer.rb +47 -0
- data/lib/html-proofer/cache.rb +153 -0
- data/lib/html-proofer/check.rb +63 -0
- data/lib/{html/proofer/checks → html-proofer/check}/favicon.rb +2 -8
- data/lib/html-proofer/check/html.rb +21 -0
- data/lib/html-proofer/check/images.rb +47 -0
- data/lib/{html/proofer/checks → html-proofer/check}/links.rb +40 -48
- data/lib/html-proofer/check/scripts.rb +28 -0
- data/lib/html-proofer/configuration.rb +56 -0
- data/lib/html-proofer/element.rb +165 -0
- data/lib/{html/proofer/check_runner → html-proofer}/issue.rb +8 -10
- data/lib/html-proofer/log.rb +49 -0
- data/lib/html-proofer/runner.rb +160 -0
- data/lib/html-proofer/url_validator.rb +218 -0
- data/lib/html-proofer/utils.rb +40 -0
- data/lib/html-proofer/version.rb +3 -0
- metadata +20 -20
- data/lib/html/proofer.rb +0 -191
- data/lib/html/proofer/cache.rb +0 -141
- data/lib/html/proofer/check_runner.rb +0 -70
- data/lib/html/proofer/checkable.rb +0 -168
- data/lib/html/proofer/checks/html.rb +0 -46
- data/lib/html/proofer/checks/images.rb +0 -54
- data/lib/html/proofer/checks/scripts.rb +0 -40
- data/lib/html/proofer/configuration.rb +0 -48
- data/lib/html/proofer/log.rb +0 -42
- data/lib/html/proofer/url_validator.rb +0 -222
- data/lib/html/proofer/utils.rb +0 -42
- data/lib/html/proofer/version.rb +0 -5
- data/lib/html/proofer/xpathfunctions.rb +0 -9
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mercenary
|
@@ -210,27 +210,27 @@ description: Test your rendered HTML files to make sure they're accurate.
|
|
210
210
|
email:
|
211
211
|
- gjtorikian@gmail.com
|
212
212
|
executables:
|
213
|
-
-
|
213
|
+
- htmlproofer
|
214
214
|
extensions: []
|
215
215
|
extra_rdoc_files: []
|
216
216
|
files:
|
217
|
-
- bin/
|
218
|
-
- lib/html
|
219
|
-
- lib/html
|
220
|
-
- lib/html
|
221
|
-
- lib/html
|
222
|
-
- lib/html
|
223
|
-
- lib/html
|
224
|
-
- lib/html
|
225
|
-
- lib/html
|
226
|
-
- lib/html
|
227
|
-
- lib/html
|
228
|
-
- lib/html
|
229
|
-
- lib/html
|
230
|
-
- lib/html
|
231
|
-
- lib/html
|
232
|
-
- lib/html
|
233
|
-
- lib/html
|
217
|
+
- bin/htmlproofer
|
218
|
+
- lib/html-proofer.rb
|
219
|
+
- lib/html-proofer/cache.rb
|
220
|
+
- lib/html-proofer/check.rb
|
221
|
+
- lib/html-proofer/check/favicon.rb
|
222
|
+
- lib/html-proofer/check/html.rb
|
223
|
+
- lib/html-proofer/check/images.rb
|
224
|
+
- lib/html-proofer/check/links.rb
|
225
|
+
- lib/html-proofer/check/scripts.rb
|
226
|
+
- lib/html-proofer/configuration.rb
|
227
|
+
- lib/html-proofer/element.rb
|
228
|
+
- lib/html-proofer/issue.rb
|
229
|
+
- lib/html-proofer/log.rb
|
230
|
+
- lib/html-proofer/runner.rb
|
231
|
+
- lib/html-proofer/url_validator.rb
|
232
|
+
- lib/html-proofer/utils.rb
|
233
|
+
- lib/html-proofer/version.rb
|
234
234
|
homepage: https://github.com/gjtorikian/html-proofer
|
235
235
|
licenses:
|
236
236
|
- MIT
|
data/lib/html/proofer.rb
DELETED
@@ -1,191 +0,0 @@
|
|
1
|
-
def require_all(path)
|
2
|
-
glob = File.join(File.dirname(__FILE__), path, '*.rb')
|
3
|
-
Dir[glob].each do |f|
|
4
|
-
require f
|
5
|
-
end
|
6
|
-
end
|
7
|
-
|
8
|
-
require_all 'proofer'
|
9
|
-
require_all 'proofer/check_runner'
|
10
|
-
require_all 'proofer/checks'
|
11
|
-
|
12
|
-
require 'parallel'
|
13
|
-
require 'fileutils'
|
14
|
-
|
15
|
-
begin
|
16
|
-
require 'awesome_print'
|
17
|
-
rescue LoadError; end
|
18
|
-
|
19
|
-
module HTML
|
20
|
-
class Proofer
|
21
|
-
include HTML::Proofer::Utils
|
22
|
-
|
23
|
-
attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts, :external_urls, :iterable_external_urls
|
24
|
-
|
25
|
-
def initialize(src, opts = {})
|
26
|
-
FileUtils.mkdir_p(STORAGE_DIR) unless File.exist?(STORAGE_DIR)
|
27
|
-
|
28
|
-
@src = src
|
29
|
-
|
30
|
-
if opts[:verbose]
|
31
|
-
warn '`@options[:verbose]` will be removed in a future 3.x.x release: http://git.io/vGHHh'
|
32
|
-
end
|
33
|
-
if opts[:href_ignore]
|
34
|
-
warn '`@options[:href_ignore]` will be renamed in a future 3.x.x release: http://git.io/vGHHy'
|
35
|
-
end
|
36
|
-
|
37
|
-
@proofer_opts = HTML::Proofer::Configuration::PROOFER_DEFAULTS
|
38
|
-
|
39
|
-
@typhoeus_opts = HTML::Proofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
40
|
-
opts.delete(:typhoeus)
|
41
|
-
|
42
|
-
@hydra_opts = HTML::Proofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
|
43
|
-
opts.delete(:hydra)
|
44
|
-
|
45
|
-
# fall back to parallel defaults
|
46
|
-
@parallel_opts = opts[:parallel] || {}
|
47
|
-
opts.delete(:parallel)
|
48
|
-
|
49
|
-
@validation_opts = opts[:validation] || {}
|
50
|
-
opts.delete(:validation)
|
51
|
-
|
52
|
-
@options = @proofer_opts.merge(opts)
|
53
|
-
|
54
|
-
@failed_tests = []
|
55
|
-
end
|
56
|
-
|
57
|
-
def logger
|
58
|
-
@logger ||= HTML::Proofer::Log.new(@options[:verbose], @options[:verbosity])
|
59
|
-
end
|
60
|
-
|
61
|
-
def run
|
62
|
-
logger.log :info, :blue, "Running #{checks} on #{@src} on *#{@options[:ext]}... \n\n"
|
63
|
-
|
64
|
-
if @src.is_a?(Array) && !@options[:disable_external]
|
65
|
-
check_list_of_links
|
66
|
-
else
|
67
|
-
check_directory_of_files
|
68
|
-
end
|
69
|
-
|
70
|
-
if @failed_tests.empty?
|
71
|
-
logger.log :info, :green, 'HTML-Proofer finished successfully.'
|
72
|
-
else
|
73
|
-
print_failed_tests
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def check_list_of_links
|
78
|
-
if @options[:href_swap]
|
79
|
-
@src = @src.map do |url|
|
80
|
-
swap(url, @options[:href_swap])
|
81
|
-
end
|
82
|
-
end
|
83
|
-
@external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
|
84
|
-
validate_urls
|
85
|
-
end
|
86
|
-
|
87
|
-
# Collects any external URLs found in a directory of files. Also collectes
|
88
|
-
# every failed test from check_files_for_internal_woes.
|
89
|
-
# Sends the external URLs to Typhoeus for batch processing.
|
90
|
-
def check_directory_of_files
|
91
|
-
@external_urls = {}
|
92
|
-
results = check_files_for_internal_woes
|
93
|
-
|
94
|
-
results.each do |item|
|
95
|
-
@external_urls.merge!(item[:external_urls])
|
96
|
-
@failed_tests.concat(item[:failed_tests])
|
97
|
-
end
|
98
|
-
|
99
|
-
# TODO: lazy. if we're checking only external links,
|
100
|
-
# we'll just trash all the failed tests. really, we should
|
101
|
-
# just not run those other checks at all.
|
102
|
-
if @options[:external_only]
|
103
|
-
@failed_tests = []
|
104
|
-
validate_urls
|
105
|
-
elsif !@options[:disable_external]
|
106
|
-
validate_urls
|
107
|
-
end
|
108
|
-
|
109
|
-
count = files.length
|
110
|
-
file_text = pluralize(count, 'file', 'files')
|
111
|
-
logger.log :info, :blue, "Ran on #{file_text}!\n\n"
|
112
|
-
end
|
113
|
-
|
114
|
-
# Walks over each implemented check and runs them on the files, in parallel.
|
115
|
-
def check_files_for_internal_woes
|
116
|
-
if @parallel_opts.empty?
|
117
|
-
files.map { |path| check_path(path) }
|
118
|
-
else
|
119
|
-
Parallel.map(files, @parallel_opts) { |path| check_path(path) }
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
def check_path(path)
|
124
|
-
html = create_nokogiri(path)
|
125
|
-
result = { :external_urls => {}, :failed_tests => [] }
|
126
|
-
|
127
|
-
checks.each do |klass|
|
128
|
-
logger.log :debug, :yellow, "Checking #{klass.to_s.downcase} on #{path} ..."
|
129
|
-
check = Object.const_get(klass).new(@src, path, html, @options, @typhoeus_opts, @hydra_opts, @parallel_opts, @validation_opts)
|
130
|
-
check.run
|
131
|
-
result[:external_urls].merge!(check.external_urls)
|
132
|
-
result[:failed_tests].concat(check.issues) if check.issues.length > 0
|
133
|
-
end
|
134
|
-
result
|
135
|
-
end
|
136
|
-
|
137
|
-
def validate_urls
|
138
|
-
url_validator = HTML::Proofer::UrlValidator.new(logger, @external_urls, @options, @typhoeus_opts, @hydra_opts)
|
139
|
-
@failed_tests.concat(url_validator.run)
|
140
|
-
@iterable_external_urls = url_validator.iterable_external_urls
|
141
|
-
end
|
142
|
-
|
143
|
-
def files
|
144
|
-
if File.directory? @src
|
145
|
-
pattern = File.join(@src, '**', "*#{@options[:ext]}")
|
146
|
-
files = Dir.glob(pattern).select { |fn| File.file? fn }
|
147
|
-
files.reject { |f| ignore_file?(f) }
|
148
|
-
elsif File.extname(@src) == @options[:ext]
|
149
|
-
[@src].reject { |f| ignore_file?(f) }
|
150
|
-
else
|
151
|
-
[]
|
152
|
-
end
|
153
|
-
end
|
154
|
-
|
155
|
-
def ignore_file?(file)
|
156
|
-
options[:file_ignore].each do |pattern|
|
157
|
-
return true if pattern.is_a?(String) && pattern == file
|
158
|
-
return true if pattern.is_a?(Regexp) && pattern =~ file
|
159
|
-
end
|
160
|
-
|
161
|
-
false
|
162
|
-
end
|
163
|
-
|
164
|
-
def checks
|
165
|
-
return @checks unless @checks.nil?
|
166
|
-
@checks = HTML::Proofer::CheckRunner.checks.map(&:name)
|
167
|
-
@checks.delete('FaviconCheck') unless @options[:check_favicon]
|
168
|
-
@checks.delete('HtmlCheck') unless @options[:check_html]
|
169
|
-
@options[:checks_to_ignore].each do |ignored|
|
170
|
-
@checks.delete(ignored)
|
171
|
-
end
|
172
|
-
@checks
|
173
|
-
end
|
174
|
-
|
175
|
-
def failed_tests
|
176
|
-
return [] if @failed_tests.empty?
|
177
|
-
result = []
|
178
|
-
@failed_tests.each { |f| result << f.to_s }
|
179
|
-
result
|
180
|
-
end
|
181
|
-
|
182
|
-
def print_failed_tests
|
183
|
-
sorted_failures = HTML::Proofer::CheckRunner::SortedIssues.new(@failed_tests, @options[:error_sort], logger)
|
184
|
-
|
185
|
-
sorted_failures.sort_and_report
|
186
|
-
count = @failed_tests.length
|
187
|
-
failure_text = pluralize(count, 'failure', 'failures')
|
188
|
-
fail logger.colorize :red, "HTML-Proofer found #{failure_text}!"
|
189
|
-
end
|
190
|
-
end
|
191
|
-
end
|
data/lib/html/proofer/cache.rb
DELETED
@@ -1,141 +0,0 @@
|
|
1
|
-
require_relative 'utils'
|
2
|
-
|
3
|
-
require 'json'
|
4
|
-
require 'active_support/core_ext/string'
|
5
|
-
require 'active_support/core_ext/date'
|
6
|
-
require 'active_support/core_ext/numeric/time'
|
7
|
-
|
8
|
-
module HTML
|
9
|
-
class Proofer
|
10
|
-
class Cache
|
11
|
-
include HTML::Proofer::Utils
|
12
|
-
|
13
|
-
FILENAME = File.join(STORAGE_DIR, 'cache.log')
|
14
|
-
|
15
|
-
attr_accessor :exists, :load, :cache_log, :cache_time
|
16
|
-
|
17
|
-
def initialize(logger, options)
|
18
|
-
@logger = logger
|
19
|
-
@cache_log = {}
|
20
|
-
|
21
|
-
if options.nil? || options.empty?
|
22
|
-
@load = false
|
23
|
-
else
|
24
|
-
@load = true
|
25
|
-
@parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
|
26
|
-
end
|
27
|
-
@cache_time = Time.now
|
28
|
-
|
29
|
-
if File.exist?(FILENAME)
|
30
|
-
@exists = true
|
31
|
-
contents = File.read(FILENAME)
|
32
|
-
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
33
|
-
else
|
34
|
-
@exists = false
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def within_timeframe?(time)
|
39
|
-
(@parsed_timeframe..@cache_time).cover?(time)
|
40
|
-
end
|
41
|
-
|
42
|
-
def urls
|
43
|
-
@cache_log['urls'] || []
|
44
|
-
end
|
45
|
-
|
46
|
-
def parsed_timeframe(timeframe)
|
47
|
-
time, date = timeframe.match(/(\d+)(\D)/).captures
|
48
|
-
time = time.to_f
|
49
|
-
case date
|
50
|
-
when 'M'
|
51
|
-
time.months.ago
|
52
|
-
when 'w'
|
53
|
-
time.weeks.ago
|
54
|
-
when 'd'
|
55
|
-
time.days.ago
|
56
|
-
when 'h'
|
57
|
-
time.hours.ago
|
58
|
-
else
|
59
|
-
fail ArgumentError, "#{date} is not a valid timeframe!"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def add(url, filenames, status, msg = '')
|
64
|
-
data = {
|
65
|
-
:time => @cache_time,
|
66
|
-
:filenames => filenames,
|
67
|
-
:status => status,
|
68
|
-
:message => msg
|
69
|
-
}
|
70
|
-
|
71
|
-
@cache_log[clean_url(url)] = data
|
72
|
-
end
|
73
|
-
|
74
|
-
def detect_url_changes(found)
|
75
|
-
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
76
|
-
found_urls = found.keys.map { |url| clean_url(url) }
|
77
|
-
|
78
|
-
# prepare to add new URLs detected
|
79
|
-
additions = found.reject do |url, _|
|
80
|
-
url = clean_url(url)
|
81
|
-
if existing_urls.include?(url)
|
82
|
-
true
|
83
|
-
else
|
84
|
-
@logger.log :debug, :yellow, "Adding #{url} to cache check"
|
85
|
-
false
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
new_link_count = additions.length
|
90
|
-
new_link_text = pluralize(new_link_count, 'link', 'links')
|
91
|
-
@logger.log :info, :blue, "Adding #{new_link_text} to the cache..."
|
92
|
-
|
93
|
-
# remove from cache URLs that no longer exist
|
94
|
-
del = 0
|
95
|
-
@cache_log.delete_if do |url, _|
|
96
|
-
url = clean_url(url)
|
97
|
-
if !found_urls.include?(url)
|
98
|
-
@logger.log :debug, :yellow, "Removing #{url} from cache check"
|
99
|
-
del += 1
|
100
|
-
true
|
101
|
-
else
|
102
|
-
false
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
del_link_text = pluralize(del, 'link', 'links')
|
107
|
-
@logger.log :info, :blue, "Removing #{del_link_text} from the cache..."
|
108
|
-
|
109
|
-
additions
|
110
|
-
end
|
111
|
-
|
112
|
-
def write
|
113
|
-
File.write(FILENAME, @cache_log.to_json)
|
114
|
-
end
|
115
|
-
|
116
|
-
def load?
|
117
|
-
@load.nil?
|
118
|
-
end
|
119
|
-
|
120
|
-
|
121
|
-
# FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
|
122
|
-
# a trailing slash to URL strings, which causes issues with the cache
|
123
|
-
def slashless_url(url)
|
124
|
-
url.chomp('/')
|
125
|
-
end
|
126
|
-
|
127
|
-
# FIXME: it seems that Typhoeus actually acts on escaped URLs,
|
128
|
-
# but there's no way to get at that information, and the cache
|
129
|
-
# stores unescaped URLs. Because of this, some links, such as
|
130
|
-
# github.com/search/issues?q=is:open+is:issue+fig are not matched
|
131
|
-
# as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
|
132
|
-
def unescape_url(url)
|
133
|
-
Addressable::URI.unescape(url)
|
134
|
-
end
|
135
|
-
|
136
|
-
def clean_url(url)
|
137
|
-
slashless_url(unescape_url(url))
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Proofer
|
5
|
-
# Mostly handles issue management and collecting of external URLs.
|
6
|
-
class CheckRunner
|
7
|
-
|
8
|
-
attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \
|
9
|
-
:validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, \
|
10
|
-
:empty_alt_ignore, :allow_hash_href
|
11
|
-
|
12
|
-
def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts)
|
13
|
-
@src = src
|
14
|
-
@path = path
|
15
|
-
@html = remove_ignored(html)
|
16
|
-
@options = options
|
17
|
-
@typhoeus_opts = typhoeus_opts
|
18
|
-
@hydra_opts = hydra_opts
|
19
|
-
@parallel_opts = parallel_opts
|
20
|
-
@validation_opts = validation_opts
|
21
|
-
@issues = []
|
22
|
-
@href_ignores = @options[:href_ignore]
|
23
|
-
@url_ignores = @options[:url_ignore]
|
24
|
-
@alt_ignores = @options[:alt_ignore]
|
25
|
-
@empty_alt_ignore = @options[:empty_alt_ignore]
|
26
|
-
@allow_hash_href = @options[:allow_hash_href]
|
27
|
-
@external_urls = {}
|
28
|
-
end
|
29
|
-
|
30
|
-
def run
|
31
|
-
fail NotImplementedError, 'HTML::Proofer::CheckRunner subclasses must implement #run'
|
32
|
-
end
|
33
|
-
|
34
|
-
def add_issue(desc, line_number = nil, status = -1)
|
35
|
-
@issues << Issue.new(@path, desc, line_number, status)
|
36
|
-
end
|
37
|
-
|
38
|
-
def add_to_external_urls(url, line)
|
39
|
-
return if @external_urls[url]
|
40
|
-
add_path_for_url(url)
|
41
|
-
end
|
42
|
-
|
43
|
-
def add_path_for_url(url)
|
44
|
-
if @external_urls[url]
|
45
|
-
@external_urls[url] << @path
|
46
|
-
else
|
47
|
-
@external_urls[url] = [@path]
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.checks
|
52
|
-
classes = []
|
53
|
-
|
54
|
-
ObjectSpace.each_object(Class) do |c|
|
55
|
-
next unless c.superclass == self
|
56
|
-
classes << c
|
57
|
-
end
|
58
|
-
|
59
|
-
classes
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
def remove_ignored(html)
|
65
|
-
html.css('code, pre, tt').each(&:unlink)
|
66
|
-
html
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|