html-proofer 2.6.4 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ module HTMLProofer
2
+ VERSION = '3.0.0'
3
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.4
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-26 00:00:00.000000000 Z
11
+ date: 2016-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mercenary
@@ -210,27 +210,27 @@ description: Test your rendered HTML files to make sure they're accurate.
210
210
  email:
211
211
  - gjtorikian@gmail.com
212
212
  executables:
213
- - htmlproof
213
+ - htmlproofer
214
214
  extensions: []
215
215
  extra_rdoc_files: []
216
216
  files:
217
- - bin/htmlproof
218
- - lib/html/proofer.rb
219
- - lib/html/proofer/cache.rb
220
- - lib/html/proofer/check_runner.rb
221
- - lib/html/proofer/check_runner/issue.rb
222
- - lib/html/proofer/checkable.rb
223
- - lib/html/proofer/checks/favicon.rb
224
- - lib/html/proofer/checks/html.rb
225
- - lib/html/proofer/checks/images.rb
226
- - lib/html/proofer/checks/links.rb
227
- - lib/html/proofer/checks/scripts.rb
228
- - lib/html/proofer/configuration.rb
229
- - lib/html/proofer/log.rb
230
- - lib/html/proofer/url_validator.rb
231
- - lib/html/proofer/utils.rb
232
- - lib/html/proofer/version.rb
233
- - lib/html/proofer/xpathfunctions.rb
217
+ - bin/htmlproofer
218
+ - lib/html-proofer.rb
219
+ - lib/html-proofer/cache.rb
220
+ - lib/html-proofer/check.rb
221
+ - lib/html-proofer/check/favicon.rb
222
+ - lib/html-proofer/check/html.rb
223
+ - lib/html-proofer/check/images.rb
224
+ - lib/html-proofer/check/links.rb
225
+ - lib/html-proofer/check/scripts.rb
226
+ - lib/html-proofer/configuration.rb
227
+ - lib/html-proofer/element.rb
228
+ - lib/html-proofer/issue.rb
229
+ - lib/html-proofer/log.rb
230
+ - lib/html-proofer/runner.rb
231
+ - lib/html-proofer/url_validator.rb
232
+ - lib/html-proofer/utils.rb
233
+ - lib/html-proofer/version.rb
234
234
  homepage: https://github.com/gjtorikian/html-proofer
235
235
  licenses:
236
236
  - MIT
data/lib/html/proofer.rb DELETED
@@ -1,191 +0,0 @@
1
- def require_all(path)
2
- glob = File.join(File.dirname(__FILE__), path, '*.rb')
3
- Dir[glob].each do |f|
4
- require f
5
- end
6
- end
7
-
8
- require_all 'proofer'
9
- require_all 'proofer/check_runner'
10
- require_all 'proofer/checks'
11
-
12
- require 'parallel'
13
- require 'fileutils'
14
-
15
- begin
16
- require 'awesome_print'
17
- rescue LoadError; end
18
-
19
- module HTML
20
- class Proofer
21
- include HTML::Proofer::Utils
22
-
23
- attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts, :external_urls, :iterable_external_urls
24
-
25
- def initialize(src, opts = {})
26
- FileUtils.mkdir_p(STORAGE_DIR) unless File.exist?(STORAGE_DIR)
27
-
28
- @src = src
29
-
30
- if opts[:verbose]
31
- warn '`@options[:verbose]` will be removed in a future 3.x.x release: http://git.io/vGHHh'
32
- end
33
- if opts[:href_ignore]
34
- warn '`@options[:href_ignore]` will be renamed in a future 3.x.x release: http://git.io/vGHHy'
35
- end
36
-
37
- @proofer_opts = HTML::Proofer::Configuration::PROOFER_DEFAULTS
38
-
39
- @typhoeus_opts = HTML::Proofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
40
- opts.delete(:typhoeus)
41
-
42
- @hydra_opts = HTML::Proofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
43
- opts.delete(:hydra)
44
-
45
- # fall back to parallel defaults
46
- @parallel_opts = opts[:parallel] || {}
47
- opts.delete(:parallel)
48
-
49
- @validation_opts = opts[:validation] || {}
50
- opts.delete(:validation)
51
-
52
- @options = @proofer_opts.merge(opts)
53
-
54
- @failed_tests = []
55
- end
56
-
57
- def logger
58
- @logger ||= HTML::Proofer::Log.new(@options[:verbose], @options[:verbosity])
59
- end
60
-
61
- def run
62
- logger.log :info, :blue, "Running #{checks} on #{@src} on *#{@options[:ext]}... \n\n"
63
-
64
- if @src.is_a?(Array) && !@options[:disable_external]
65
- check_list_of_links
66
- else
67
- check_directory_of_files
68
- end
69
-
70
- if @failed_tests.empty?
71
- logger.log :info, :green, 'HTML-Proofer finished successfully.'
72
- else
73
- print_failed_tests
74
- end
75
- end
76
-
77
- def check_list_of_links
78
- if @options[:href_swap]
79
- @src = @src.map do |url|
80
- swap(url, @options[:href_swap])
81
- end
82
- end
83
- @external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
84
- validate_urls
85
- end
86
-
87
- # Collects any external URLs found in a directory of files. Also collectes
88
- # every failed test from check_files_for_internal_woes.
89
- # Sends the external URLs to Typhoeus for batch processing.
90
- def check_directory_of_files
91
- @external_urls = {}
92
- results = check_files_for_internal_woes
93
-
94
- results.each do |item|
95
- @external_urls.merge!(item[:external_urls])
96
- @failed_tests.concat(item[:failed_tests])
97
- end
98
-
99
- # TODO: lazy. if we're checking only external links,
100
- # we'll just trash all the failed tests. really, we should
101
- # just not run those other checks at all.
102
- if @options[:external_only]
103
- @failed_tests = []
104
- validate_urls
105
- elsif !@options[:disable_external]
106
- validate_urls
107
- end
108
-
109
- count = files.length
110
- file_text = pluralize(count, 'file', 'files')
111
- logger.log :info, :blue, "Ran on #{file_text}!\n\n"
112
- end
113
-
114
- # Walks over each implemented check and runs them on the files, in parallel.
115
- def check_files_for_internal_woes
116
- if @parallel_opts.empty?
117
- files.map { |path| check_path(path) }
118
- else
119
- Parallel.map(files, @parallel_opts) { |path| check_path(path) }
120
- end
121
- end
122
-
123
- def check_path(path)
124
- html = create_nokogiri(path)
125
- result = { :external_urls => {}, :failed_tests => [] }
126
-
127
- checks.each do |klass|
128
- logger.log :debug, :yellow, "Checking #{klass.to_s.downcase} on #{path} ..."
129
- check = Object.const_get(klass).new(@src, path, html, @options, @typhoeus_opts, @hydra_opts, @parallel_opts, @validation_opts)
130
- check.run
131
- result[:external_urls].merge!(check.external_urls)
132
- result[:failed_tests].concat(check.issues) if check.issues.length > 0
133
- end
134
- result
135
- end
136
-
137
- def validate_urls
138
- url_validator = HTML::Proofer::UrlValidator.new(logger, @external_urls, @options, @typhoeus_opts, @hydra_opts)
139
- @failed_tests.concat(url_validator.run)
140
- @iterable_external_urls = url_validator.iterable_external_urls
141
- end
142
-
143
- def files
144
- if File.directory? @src
145
- pattern = File.join(@src, '**', "*#{@options[:ext]}")
146
- files = Dir.glob(pattern).select { |fn| File.file? fn }
147
- files.reject { |f| ignore_file?(f) }
148
- elsif File.extname(@src) == @options[:ext]
149
- [@src].reject { |f| ignore_file?(f) }
150
- else
151
- []
152
- end
153
- end
154
-
155
- def ignore_file?(file)
156
- options[:file_ignore].each do |pattern|
157
- return true if pattern.is_a?(String) && pattern == file
158
- return true if pattern.is_a?(Regexp) && pattern =~ file
159
- end
160
-
161
- false
162
- end
163
-
164
- def checks
165
- return @checks unless @checks.nil?
166
- @checks = HTML::Proofer::CheckRunner.checks.map(&:name)
167
- @checks.delete('FaviconCheck') unless @options[:check_favicon]
168
- @checks.delete('HtmlCheck') unless @options[:check_html]
169
- @options[:checks_to_ignore].each do |ignored|
170
- @checks.delete(ignored)
171
- end
172
- @checks
173
- end
174
-
175
- def failed_tests
176
- return [] if @failed_tests.empty?
177
- result = []
178
- @failed_tests.each { |f| result << f.to_s }
179
- result
180
- end
181
-
182
- def print_failed_tests
183
- sorted_failures = HTML::Proofer::CheckRunner::SortedIssues.new(@failed_tests, @options[:error_sort], logger)
184
-
185
- sorted_failures.sort_and_report
186
- count = @failed_tests.length
187
- failure_text = pluralize(count, 'failure', 'failures')
188
- fail logger.colorize :red, "HTML-Proofer found #{failure_text}!"
189
- end
190
- end
191
- end
@@ -1,141 +0,0 @@
1
- require_relative 'utils'
2
-
3
- require 'json'
4
- require 'active_support/core_ext/string'
5
- require 'active_support/core_ext/date'
6
- require 'active_support/core_ext/numeric/time'
7
-
8
- module HTML
9
- class Proofer
10
- class Cache
11
- include HTML::Proofer::Utils
12
-
13
- FILENAME = File.join(STORAGE_DIR, 'cache.log')
14
-
15
- attr_accessor :exists, :load, :cache_log, :cache_time
16
-
17
- def initialize(logger, options)
18
- @logger = logger
19
- @cache_log = {}
20
-
21
- if options.nil? || options.empty?
22
- @load = false
23
- else
24
- @load = true
25
- @parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
26
- end
27
- @cache_time = Time.now
28
-
29
- if File.exist?(FILENAME)
30
- @exists = true
31
- contents = File.read(FILENAME)
32
- @cache_log = contents.empty? ? {} : JSON.parse(contents)
33
- else
34
- @exists = false
35
- end
36
- end
37
-
38
- def within_timeframe?(time)
39
- (@parsed_timeframe..@cache_time).cover?(time)
40
- end
41
-
42
- def urls
43
- @cache_log['urls'] || []
44
- end
45
-
46
- def parsed_timeframe(timeframe)
47
- time, date = timeframe.match(/(\d+)(\D)/).captures
48
- time = time.to_f
49
- case date
50
- when 'M'
51
- time.months.ago
52
- when 'w'
53
- time.weeks.ago
54
- when 'd'
55
- time.days.ago
56
- when 'h'
57
- time.hours.ago
58
- else
59
- fail ArgumentError, "#{date} is not a valid timeframe!"
60
- end
61
- end
62
-
63
- def add(url, filenames, status, msg = '')
64
- data = {
65
- :time => @cache_time,
66
- :filenames => filenames,
67
- :status => status,
68
- :message => msg
69
- }
70
-
71
- @cache_log[clean_url(url)] = data
72
- end
73
-
74
- def detect_url_changes(found)
75
- existing_urls = @cache_log.keys.map { |url| clean_url(url) }
76
- found_urls = found.keys.map { |url| clean_url(url) }
77
-
78
- # prepare to add new URLs detected
79
- additions = found.reject do |url, _|
80
- url = clean_url(url)
81
- if existing_urls.include?(url)
82
- true
83
- else
84
- @logger.log :debug, :yellow, "Adding #{url} to cache check"
85
- false
86
- end
87
- end
88
-
89
- new_link_count = additions.length
90
- new_link_text = pluralize(new_link_count, 'link', 'links')
91
- @logger.log :info, :blue, "Adding #{new_link_text} to the cache..."
92
-
93
- # remove from cache URLs that no longer exist
94
- del = 0
95
- @cache_log.delete_if do |url, _|
96
- url = clean_url(url)
97
- if !found_urls.include?(url)
98
- @logger.log :debug, :yellow, "Removing #{url} from cache check"
99
- del += 1
100
- true
101
- else
102
- false
103
- end
104
- end
105
-
106
- del_link_text = pluralize(del, 'link', 'links')
107
- @logger.log :info, :blue, "Removing #{del_link_text} from the cache..."
108
-
109
- additions
110
- end
111
-
112
- def write
113
- File.write(FILENAME, @cache_log.to_json)
114
- end
115
-
116
- def load?
117
- @load.nil?
118
- end
119
-
120
-
121
- # FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
122
- # a trailing slash to URL strings, which causes issues with the cache
123
- def slashless_url(url)
124
- url.chomp('/')
125
- end
126
-
127
- # FIXME: it seems that Typhoeus actually acts on escaped URLs,
128
- # but there's no way to get at that information, and the cache
129
- # stores unescaped URLs. Because of this, some links, such as
130
- # github.com/search/issues?q=is:open+is:issue+fig are not matched
131
- # as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
132
- def unescape_url(url)
133
- Addressable::URI.unescape(url)
134
- end
135
-
136
- def clean_url(url)
137
- slashless_url(unescape_url(url))
138
- end
139
- end
140
- end
141
- end
@@ -1,70 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module HTML
4
- class Proofer
5
- # Mostly handles issue management and collecting of external URLs.
6
- class CheckRunner
7
-
8
- attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \
9
- :validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, \
10
- :empty_alt_ignore, :allow_hash_href
11
-
12
- def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts)
13
- @src = src
14
- @path = path
15
- @html = remove_ignored(html)
16
- @options = options
17
- @typhoeus_opts = typhoeus_opts
18
- @hydra_opts = hydra_opts
19
- @parallel_opts = parallel_opts
20
- @validation_opts = validation_opts
21
- @issues = []
22
- @href_ignores = @options[:href_ignore]
23
- @url_ignores = @options[:url_ignore]
24
- @alt_ignores = @options[:alt_ignore]
25
- @empty_alt_ignore = @options[:empty_alt_ignore]
26
- @allow_hash_href = @options[:allow_hash_href]
27
- @external_urls = {}
28
- end
29
-
30
- def run
31
- fail NotImplementedError, 'HTML::Proofer::CheckRunner subclasses must implement #run'
32
- end
33
-
34
- def add_issue(desc, line_number = nil, status = -1)
35
- @issues << Issue.new(@path, desc, line_number, status)
36
- end
37
-
38
- def add_to_external_urls(url, line)
39
- return if @external_urls[url]
40
- add_path_for_url(url)
41
- end
42
-
43
- def add_path_for_url(url)
44
- if @external_urls[url]
45
- @external_urls[url] << @path
46
- else
47
- @external_urls[url] = [@path]
48
- end
49
- end
50
-
51
- def self.checks
52
- classes = []
53
-
54
- ObjectSpace.each_object(Class) do |c|
55
- next unless c.superclass == self
56
- classes << c
57
- end
58
-
59
- classes
60
- end
61
-
62
- private
63
-
64
- def remove_ignored(html)
65
- html.css('code, pre, tt').each(&:unlink)
66
- html
67
- end
68
- end
69
- end
70
- end