html-proofer 2.6.4 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 949542fac56daeafe93fbf17014e96cba9deba3c
4
- data.tar.gz: b42e8375b5886aa310fcb1920ca0c676e9dedcc3
3
+ metadata.gz: 1ce6c96e75683a8da4de60754d83aef138221d02
4
+ data.tar.gz: b341f1ee5b7237e58cba8e775dfaa152eaeeb0a5
5
5
  SHA512:
6
- metadata.gz: 6d2f2741cc3c6c7ca32220f1e338986148d5bf4c026e1bd5b5e7955652f6a0807d9037a4d71a0aaec7c81e3c9d58ae09ef649388c5be589c08991d27c2a88c33
7
- data.tar.gz: 5a478a174241a58cf29192bb62a964703ecb5e90060ac702183067c42a894858dbad4cf5a7591cb216d92785b9216f1eaca2232bcfa248da2af20d206ac1daf6
6
+ metadata.gz: 000ff0816076d1e8b738526f380e88309fdc04d93fa203a43d62960696064062b7942647a5f79be0f8e7239ca69e67f08f669866352cabf67152a6f95beb643b
7
+ data.tar.gz: 1863ee52843f2d5071170aaed79ce2ae9f6902ffb1a99d93021b746eb9d3d792d460d4caff2f703907291915b0bc8861af8967cd4f5a394e0a6604aa23e0dec0
@@ -3,19 +3,20 @@ STDOUT.sync = true
3
3
 
4
4
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w( .. lib ))
5
5
 
6
- require 'html/proofer'
6
+ require 'html-proofer'
7
7
  require 'mercenary'
8
8
 
9
- Mercenary.program(:htmlproof) do |p|
10
- p.version HTML::Proofer::VERSION
9
+ Mercenary.program(:htmlproofer) do |p|
10
+ p.version HTMLProofer::VERSION
11
11
  p.description %(Test your rendered HTML files to make sure they're accurate.)
12
- p.syntax 'htmlproof PATH [options]'
12
+ p.syntax 'htmlproofer PATH [options]'
13
13
 
14
14
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
15
15
 
16
16
  p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, ignores the `href` `#`'
17
17
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
18
18
  p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
19
+ p.option 'assume_extension', '--assume-extension', 'Automatically add extension (e.g. `.html`) to file paths, to allow extensionless URLs (as supported by Jekyll 3 and GitHub Pages) (default: `false`).'
19
20
  p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
20
21
  p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).'
21
22
  p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
@@ -25,16 +26,17 @@ Mercenary.program(:htmlproof) do |p|
25
26
  p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
26
27
  p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `:path`, `:desc`, or `:status` (default: `path`).'
27
28
  p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `false`).'
28
- p.option 'ext', '--ext EXT', String, 'The extension of your HTML files including the dot. (default: `.html`)'
29
+ p.option 'extension', '--extension EXT', String, 'The extension of your HTML files including the dot. (default: `.html`)'
29
30
  p.option 'external_only', '--external_only', 'Only checks problems with external references'
30
31
  p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
31
- p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. **Will be renamed in a future release.**'
32
- p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. **Will be renamed in a future release.**'
33
- p.option 'ignore_script_embeds', '--ignore-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
32
+ p.option 'http_status_ignore', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
33
+ p.option 'report_invalid_tags', '--report-invalid-tags', 'Ignore `check_html` errors associated with unknown markup (default: `false`)'
34
+ p.option 'report_script_embeds', '--report-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
35
+ p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell'
34
36
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
37
+ p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
35
38
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
36
- p.option 'verbose', '--verbose', 'If `true`, outputs extra information as the checking happens. Useful for debugging. **Will be deprecated in a future release.**'
37
- p.option 'verbosity', '--verbosity', String, 'Sets the logging level, as determined by Yell'
39
+ p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`.'
38
40
 
39
41
  p.action do |args, opts|
40
42
  args = ['.'] if args.empty?
@@ -45,34 +47,39 @@ Mercenary.program(:htmlproof) do |p|
45
47
  # prepare everything to go to proofer
46
48
  p.options.select { |o| !opts[o.config_key].nil? }.each do |option|
47
49
  if option.return_type.to_s == 'Array' # TODO: is_a? doesn't work here?
48
- opts[option.config_key] = opts[option.config_key].map { |i| HTML::Proofer::Configuration.to_regex?(i) }
50
+ opts[option.config_key] = opts[option.config_key].map { |i| HTMLProofer::Configuration.to_regex?(i) }
49
51
  end
50
52
  options[option.config_key.to_sym] = opts[option.config_key]
51
53
  end
52
54
 
53
55
  # some minor manipulation of a special option
54
- unless opts['href_swap'].nil?
55
- options[:href_swap] = {}
56
- opts['href_swap'].each do |s|
56
+ unless opts['url_swap'].nil?
57
+ options[:url_swap] = {}
58
+ opts['url_swap'].each do |s|
57
59
  pair = s.split(':', 2)
58
- options[:href_swap][Regexp.new(pair[0])] = pair[1]
60
+ options[:url_swap][Regexp.new(pair[0])] = pair[1]
59
61
  end
60
62
  end
61
63
 
62
- # check for ignore_scripts_embeds as it should be set in :validation
63
- unless opts['ignore_script_embeds'].nil?
64
- options[:validation] = { :ignore_script_embeds => true }
65
- end
66
-
67
64
  options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil?
68
- options[:verbosity] = opts['verbosity'].to_sym unless opts['verbosity'].nil?
65
+ options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
69
66
 
70
67
  # FIXME: this is gross
71
68
  options[:validation] = {}
72
- options[:validation][:ignore_script_embeds] = opts['ignore_script_embeds']
69
+ options[:validation][:report_script_embeds] = opts['report_script_embeds']
70
+ options[:validation][:report_invalid_tags] = opts['report_invalid_tags']
73
71
 
74
- path = path.delete(' ').split(',') if opts['as_links']
72
+ options[:cache] = {}
73
+ options[:cache][:timeframe] = opts['timeframe'] unless opts['timeframe'].nil?
75
74
 
76
- HTML::Proofer.new(path, options).run
75
+ paths = path.split(',')
76
+ if opts['as_links']
77
+ links = path.delete(' ').split(',')
78
+ HTMLProofer.check_links(links, options).run
79
+ elsif File.directory?(paths.first)
80
+ HTMLProofer.check_directories(paths, options).run
81
+ else
82
+ HTMLProofer.check_file(path, options).run
83
+ end
77
84
  end
78
85
  end
@@ -0,0 +1,47 @@
1
+ def require_all(path)
2
+ glob = File.join(File.dirname(__FILE__), path, '*.rb')
3
+ Dir[glob].each do |f|
4
+ require f
5
+ end
6
+ end
7
+
8
+ require_all 'html-proofer'
9
+ require_all 'html-proofer/check'
10
+
11
+ require 'parallel'
12
+ require 'fileutils'
13
+
14
+ begin
15
+ require 'awesome_print'
16
+ rescue LoadError; end
17
+
18
+ module HTMLProofer
19
+
20
+ def check_file(file, options = {})
21
+ fail ArgumentError unless file.is_a?(String)
22
+ options[:type] = :file
23
+ HTMLProofer::Runner.new(file, options)
24
+ end
25
+ module_function :check_file
26
+
27
+ def check_directory(directory, options = {})
28
+ fail ArgumentError unless directory.is_a?(String)
29
+ options[:type] = :directory
30
+ HTMLProofer::Runner.new([directory], options)
31
+ end
32
+ module_function :check_directory
33
+
34
+ def check_directories(directories, options = {})
35
+ fail ArgumentError unless directories.is_a?(Array)
36
+ options[:type] = :directory
37
+ HTMLProofer::Runner.new(directories, options)
38
+ end
39
+ module_function :check_directories
40
+
41
+ def check_links(links, options = {})
42
+ fail ArgumentError unless links.is_a?(Array)
43
+ options[:type] = :links
44
+ HTMLProofer::Runner.new(links, options)
45
+ end
46
+ module_function :check_links
47
+ end
@@ -0,0 +1,153 @@
1
+ require_relative 'utils'
2
+
3
+ require 'json'
4
+ require 'active_support/core_ext/string'
5
+ require 'active_support/core_ext/date'
6
+ require 'active_support/core_ext/numeric/time'
7
+
8
+ module HTMLProofer
9
+ class Cache
10
+ include HTMLProofer::Utils
11
+
12
+ CACHE_LOG = File.join(STORAGE_DIR, 'cache.log')
13
+
14
+ attr_reader :exists, :cache_log
15
+
16
+ def initialize(logger, options)
17
+ @logger = logger
18
+ @cache_log = {}
19
+
20
+ if options.nil? || options.empty?
21
+ define_singleton_method('use_cache?') { false }
22
+ else
23
+ define_singleton_method('use_cache?') { true }
24
+ @parsed_timeframe = parsed_timeframe(options[:timeframe])
25
+ end
26
+
27
+ @cache_time = Time.now
28
+
29
+ if File.exist?(CACHE_LOG)
30
+ contents = File.read(CACHE_LOG)
31
+ @cache_log = contents.empty? ? {} : JSON.parse(contents)
32
+ end
33
+ end
34
+
35
+ def within_timeframe?(time)
36
+ (@parsed_timeframe..@cache_time).cover?(time)
37
+ end
38
+
39
+ def urls
40
+ @cache_log['urls'] || []
41
+ end
42
+
43
+ def size
44
+ @cache_log.length
45
+ end
46
+
47
+ def parsed_timeframe(timeframe)
48
+ time, date = timeframe.match(/(\d+)(\D)/).captures
49
+ time = time.to_f
50
+ case date
51
+ when 'M'
52
+ time.months.ago
53
+ when 'w'
54
+ time.weeks.ago
55
+ when 'd'
56
+ time.days.ago
57
+ when 'h'
58
+ time.hours.ago
59
+ else
60
+ fail ArgumentError, "#{date} is not a valid timeframe!"
61
+ end
62
+ end
63
+
64
+ def add(url, filenames, status, msg = '')
65
+ data = {
66
+ :time => @cache_time,
67
+ :filenames => filenames,
68
+ :status => status,
69
+ :message => msg
70
+ }
71
+
72
+ @cache_log[clean_url(url)] = data
73
+ end
74
+
75
+ def detect_url_changes(found)
76
+ existing_urls = @cache_log.keys.map { |url| clean_url(url) }
77
+ found_urls = found.keys.map { |url| clean_url(url) }
78
+
79
+ # prepare to add new URLs detected
80
+ additions = found.reject do |url, _|
81
+ url = clean_url(url)
82
+ if existing_urls.include?(url)
83
+ true
84
+ else
85
+ @logger.log :debug, "Adding #{url} to cache check"
86
+ false
87
+ end
88
+ end
89
+
90
+ new_link_count = additions.length
91
+ new_link_text = pluralize(new_link_count, 'link', 'links')
92
+ @logger.log :info, "Adding #{new_link_text} to the cache..."
93
+
94
+ # remove from cache URLs that no longer exist
95
+ del = 0
96
+ @cache_log.delete_if do |url, _|
97
+ url = clean_url(url)
98
+ if !found_urls.include?(url)
99
+ @logger.log :debug, "Removing #{url} from cache check"
100
+ del += 1
101
+ true
102
+ else
103
+ false
104
+ end
105
+ end
106
+
107
+ del_link_text = pluralize(del, 'link', 'links')
108
+ @logger.log :info, "Removing #{del_link_text} from the cache..."
109
+
110
+ additions
111
+ end
112
+
113
+ def write
114
+ File.write(CACHE_LOG, @cache_log.to_json)
115
+ end
116
+
117
+ def load?
118
+ @load.nil?
119
+ end
120
+
121
+ def retrieve_urls(external_urls)
122
+ urls_to_check = detect_url_changes(external_urls)
123
+ @cache_log.each_pair do |url, cache|
124
+ if within_timeframe?(cache['time'])
125
+ next if cache['message'].empty? # these were successes to skip
126
+ urls_to_check[url] = cache['filenames'] # these are failures to retry
127
+ else
128
+ urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
129
+ end
130
+ end
131
+ urls_to_check
132
+ end
133
+
134
+ # FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
135
+ # a trailing slash to URL strings, which causes issues with the cache
136
+ def slashless_url(url)
137
+ url.chomp('/')
138
+ end
139
+
140
+ # FIXME: it seems that Typhoeus actually acts on escaped URLs,
141
+ # but there's no way to get at that information, and the cache
142
+ # stores unescaped URLs. Because of this, some links, such as
143
+ # github.com/search/issues?q=is:open+is:issue+fig are not matched
144
+ # as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
145
+ def unescape_url(url)
146
+ Addressable::URI.unescape(url)
147
+ end
148
+
149
+ def clean_url(url)
150
+ slashless_url(unescape_url(url))
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,63 @@
1
+ module HTMLProofer
2
+ # Mostly handles issue management and collecting of external URLs.
3
+ class Check
4
+ attr_reader :node, :element, :src, :path, :options, :issues, :external_urls
5
+
6
+ def initialize(src, path, html, options)
7
+ @src = src
8
+ @path = path
9
+ @html = remove_ignored(html)
10
+ @options = options
11
+ @issues = []
12
+ @external_urls = {}
13
+ end
14
+
15
+ def create_element(node)
16
+ @node = node
17
+ Element.new(node, self)
18
+ end
19
+
20
+ def run
21
+ fail NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
22
+ end
23
+
24
+ def add_issue(desc, line: nil, status: -1)
25
+ @issues << Issue.new(@path, desc, line: line, status: status)
26
+ end
27
+
28
+ def add_to_external_urls(url, _)
29
+ return if @external_urls[url]
30
+ add_path_for_url(url)
31
+ end
32
+
33
+ def add_path_for_url(url)
34
+ if @external_urls[url]
35
+ @external_urls[url] << @path
36
+ else
37
+ @external_urls[url] = [@path]
38
+ end
39
+ end
40
+
41
+ def self.subchecks
42
+ classes = []
43
+
44
+ ObjectSpace.each_object(Class) do |c|
45
+ next unless c.superclass == self
46
+ classes << c
47
+ end
48
+
49
+ classes
50
+ end
51
+
52
+ def blank?(attr)
53
+ attr.nil? || attr.empty?
54
+ end
55
+
56
+ private
57
+
58
+ def remove_ignored(html)
59
+ html.css('code, pre, tt').each(&:unlink)
60
+ html
61
+ end
62
+ end
63
+ end
@@ -1,14 +1,8 @@
1
- # encoding: utf-8
2
-
3
- class FaviconCheckable < ::HTML::Proofer::Checkable
4
- attr_reader :rel
5
- end
6
-
7
- class FaviconCheck < ::HTML::Proofer::CheckRunner
1
+ class FaviconCheck < ::HTMLProofer::Check
8
2
  def run
9
3
  found = false
10
4
  @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |node|
11
- favicon = FaviconCheckable.new(node, self)
5
+ favicon = create_element(node)
12
6
  next if favicon.ignore?
13
7
  found = true if favicon.rel.split(' ').last.eql? 'icon'
14
8
  break if found
@@ -0,0 +1,21 @@
1
+ class HtmlCheck < ::HTMLProofer::Check
2
+ SCRIPT_EMBEDS_MSG = /Element script embeds close tag/
3
+ INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/
4
+ INVALID_PREFIX = /Namespace prefix/
5
+
6
+ def run
7
+ @html.errors.each do |error|
8
+ message = error.message
9
+ line = error.line
10
+
11
+ if message =~ INVALID_TAG_MSG || message =~ INVALID_PREFIX
12
+ next unless options[:validation][:report_invalid_tags]
13
+ end
14
+
15
+ # tags embedded in scripts are used in templating languages: http://git.io/vOovv
16
+ next if !options[:validation][:report_script_embeds] && message =~ SCRIPT_EMBEDS_MSG
17
+
18
+ add_issue(message, line: line)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,47 @@
1
+ class ImageCheck < ::HTMLProofer::Check
2
+ SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
3
+
4
+ def empty_alt_tag?
5
+ @img.alt.strip.empty?
6
+ end
7
+
8
+ def terrible_filename?
9
+ @img.url =~ SCREEN_SHOT_REGEX
10
+ end
11
+
12
+ def missing_src?
13
+ blank?(@img.url)
14
+ end
15
+
16
+ def run
17
+ @html.css('img').each do |node|
18
+ @img = create_element(node)
19
+ line = node.line
20
+
21
+ next if @img.ignore?
22
+
23
+ # screenshot filenames should return because of terrible names
24
+ if terrible_filename?
25
+ add_issue("image has a terrible filename (#{@img.url})", line: line)
26
+ next
27
+ end
28
+
29
+ # does the image exist?
30
+ if missing_src?
31
+ add_issue('image has no src or srcset attribute', line: line)
32
+ else
33
+ if @img.remote?
34
+ add_to_external_urls(@img.url, line)
35
+ elsif !@img.exists?
36
+ add_issue("internal image #{@img.url} does not exist", line: line)
37
+ end
38
+ end
39
+
40
+ if @img.alt.nil? || (empty_alt_tag? && !@img.ignore_empty_alt?)
41
+ add_issue("image #{@img.url} does not have an alt attribute", line: line)
42
+ end
43
+ end
44
+
45
+ external_urls
46
+ end
47
+ end