validate-website 1.8.1 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.md +37 -0
- data/LICENSE +1 -1
- data/README.md +38 -25
- data/Rakefile +5 -1
- data/bin/validate-website +1 -1
- data/bin/validate-website-static +1 -1
- data/lib/validate_website.rb +2 -0
- data/lib/validate_website/colorful_messages.rb +3 -0
- data/lib/validate_website/core.rb +14 -6
- data/lib/validate_website/crawl.rb +8 -1
- data/lib/validate_website/option_parser.rb +58 -54
- data/lib/validate_website/runner.rb +3 -1
- data/lib/validate_website/static.rb +19 -9
- data/lib/validate_website/static_link.rb +4 -2
- data/lib/validate_website/utils.rb +3 -0
- data/lib/validate_website/validator.rb +24 -27
- data/lib/validate_website/validator_class_methods.rb +3 -0
- data/lib/validate_website/version.rb +1 -1
- data/man/man1/validate-website-static.1 +17 -8
- data/man/man1/validate-website.1 +17 -8
- data/test/core_test.rb +4 -2
- data/test/crawler_test.rb +36 -17
- data/test/data/html5-fail.html +0 -337
- data/test/static_test.rb +26 -6
- data/test/test_helper.rb +9 -2
- data/test/validator_test.rb +26 -24
- data/test/webmock_helper.rb +4 -2
- metadata +34 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a5b0553089c9d66d3622781fe6c4ab5ca68ac6a198fe36c53c84c8d34e14adb
|
4
|
+
data.tar.gz: f3f0cc4aef203f85ebb9868e793b7cd0312db8c4b6f7fb0d15beefeaaef1cc83
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c9a8fae92bcb19d5f92466a7b77f3e40cca59643c946c15563885d9e65a1d2793271dee2b723a296fe487d57634fdd88f4440a60d2fac625a6a7208c8105159d
|
7
|
+
data.tar.gz: e7a78f899b42d5f27cb41e99472e37499389bf39c567dd9f16ab064fce38927a43de89c00411c25697c622ad76f59b8d4b9836d898249822cf9bc0d37591c3bf
|
data/History.md
CHANGED
@@ -1,4 +1,41 @@
|
|
1
1
|
|
2
|
+
1.10.0 / 2020-07-03
|
3
|
+
==================
|
4
|
+
|
5
|
+
* Fix build for Ruby 2.3 and 2.4
|
6
|
+
* Remove rbx-3 from build
|
7
|
+
* Remove minitest-focus and fix minitest 6 warnings
|
8
|
+
* Fix html5_validator option and change html5_validator_service_url
|
9
|
+
* Add Ruby 2.7 to CI and update jruby
|
10
|
+
* Update rubocop and fix offences
|
11
|
+
* Remove Ruby 2.2 support and update rubocop
|
12
|
+
|
13
|
+
1.9.3 / 2019-04-11
|
14
|
+
==================
|
15
|
+
|
16
|
+
* Update tidy_ffi to 1.0
|
17
|
+
* Avoid testing tidy bug with js
|
18
|
+
|
19
|
+
1.9.2 / 2019-03-09
|
20
|
+
==================
|
21
|
+
|
22
|
+
* Load schema when needed instead of boot
|
23
|
+
|
24
|
+
1.9.1 / 2019-03-05
|
25
|
+
==================
|
26
|
+
|
27
|
+
* Improve start message for static validator
|
28
|
+
* Update travis config
|
29
|
+
* Fix bundler to <2 on travis (dropped support Ruby < 2.3)
|
30
|
+
|
31
|
+
1.9.0 / 2018-12-25
|
32
|
+
==================
|
33
|
+
|
34
|
+
* Update deps paint; slop; webmock
|
35
|
+
* Remove Ruby 2.1 support
|
36
|
+
* Force nonet and disable substitute entities on xhtml parse
|
37
|
+
* Use coveralls for code coverage
|
38
|
+
|
2
39
|
v1.8.1 / 2018-03-25
|
3
40
|
===================
|
4
41
|
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2009-
|
3
|
+
Copyright (c) 2009-2020 Laurent Arnoud <laurent@spkdev.net>
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -10,22 +10,34 @@ Web crawler for checking the validity of your documents
|
|
10
10
|
|
11
11
|
### Debian
|
12
12
|
|
13
|
-
|
14
|
-
apt install ruby-dev
|
15
|
-
|
13
|
+
```
|
14
|
+
apt install ruby-dev libxslt1-dev libxml2-dev
|
15
|
+
```
|
16
|
+
|
17
|
+
If you want complete local validation look [tidy
|
18
|
+
packages](https://binaries.html-tidy.org/)
|
16
19
|
|
17
20
|
### RubyGems
|
18
21
|
|
19
|
-
|
22
|
+
```
|
20
23
|
gem install validate-website
|
21
|
-
|
24
|
+
```
|
22
25
|
|
23
26
|
## Synopsis
|
24
27
|
|
25
|
-
|
28
|
+
```
|
26
29
|
validate-website [OPTIONS]
|
27
30
|
validate-website-static [OPTIONS]
|
28
|
-
|
31
|
+
```
|
32
|
+
|
33
|
+
## Examples
|
34
|
+
|
35
|
+
```
|
36
|
+
validate-website -v -s https://www.ruby-lang.org/
|
37
|
+
validate-website -v -x tidy -s https://www.ruby-lang.org/
|
38
|
+
validate-website -v -x nu -s https://www.ruby-lang.org/
|
39
|
+
validate-website -h
|
40
|
+
```
|
29
41
|
|
30
42
|
## Description
|
31
43
|
|
@@ -47,30 +59,31 @@ Service](https://checker.html5.org/).
|
|
47
59
|
|
48
60
|
## On your application
|
49
61
|
|
50
|
-
|
62
|
+
``` ruby
|
51
63
|
require 'validate_website/validator'
|
52
64
|
body = '<!DOCTYPE html><html></html>'
|
53
65
|
v = ValidateWebsite::Validator.new(Nokogiri::HTML(body), body)
|
54
66
|
v.valid? # => false
|
55
|
-
|
67
|
+
```
|
56
68
|
|
57
69
|
## Jekyll static site validation
|
58
70
|
|
59
71
|
You can add this Rake task to validate a
|
60
72
|
[jekyll](https://github.com/jekyll/jekyll) site:
|
61
73
|
|
62
|
-
|
74
|
+
``` ruby
|
63
75
|
desc 'validate _site with validate website'
|
64
76
|
task validate: :build do
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
77
|
+
Dir.chdir("_site") do
|
78
|
+
system("validate-website-static",
|
79
|
+
"--verbose",
|
80
|
+
"--exclude", "examples",
|
81
|
+
"--site", HTTP_URL)
|
82
|
+
exit($?.exitstatus)
|
70
83
|
end
|
71
84
|
end
|
72
85
|
end
|
73
|
-
|
86
|
+
```
|
74
87
|
|
75
88
|
## More info
|
76
89
|
|
@@ -97,17 +110,17 @@ validation service.
|
|
97
110
|
You can download [validator](https://github.com/validator/validator) jar and
|
98
111
|
start it with:
|
99
112
|
|
100
|
-
|
113
|
+
```
|
101
114
|
java -cp PATH_TO/vnu.jar nu.validator.servlet.Main 8888
|
102
|
-
|
115
|
+
```
|
103
116
|
|
104
117
|
Then you can use validate-website option:
|
105
118
|
|
106
|
-
|
119
|
+
```
|
107
120
|
--html5-validator-service-url http://localhost:8888/
|
108
121
|
# or
|
109
122
|
export VALIDATOR_NU_URL="http://localhost:8888/"
|
110
|
-
|
123
|
+
```
|
111
124
|
|
112
125
|
This will prevent you to be blacklisted from validator webservice.
|
113
126
|
|
@@ -115,9 +128,9 @@ This will prevent you to be blacklisted from validator webservice.
|
|
115
128
|
|
116
129
|
With standard environment:
|
117
130
|
|
118
|
-
|
131
|
+
```
|
119
132
|
bundle exec rake
|
120
|
-
|
133
|
+
```
|
121
134
|
|
122
135
|
## Credits
|
123
136
|
|
@@ -132,12 +145,12 @@ See [GitHub](https://github.com/spk/validate-website/graphs/contributors).
|
|
132
145
|
|
133
146
|
The MIT License
|
134
147
|
|
135
|
-
Copyright (c) 2009-
|
148
|
+
Copyright (c) 2009-2020 Laurent Arnoud <laurent@spkdev.net>
|
136
149
|
|
137
150
|
---
|
138
|
-
[](https://travis-ci.org/spk/validate-website)
|
139
152
|
[](https://rubygems.org/gems/validate-website)
|
140
153
|
[](http://www.rubydoc.info/gems/validate-website)
|
141
154
|
[](http://opensource.org/licenses/MIT "MIT")
|
142
|
-
[](https://coveralls.io/github/spk/validate-website?branch=master)
|
143
156
|
[](http://inch-ci.org/github/spk/validate-website)
|
data/Rakefile
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rake/testtask'
|
2
4
|
require 'rubocop/rake_task'
|
3
5
|
require 'asciidoctor'
|
4
6
|
|
5
|
-
|
7
|
+
default = %i[test]
|
8
|
+
default << :rubocop unless RUBY_ENGINE == 'rbx'
|
9
|
+
task default: default
|
6
10
|
|
7
11
|
desc 'Update manpage from asciidoc file'
|
8
12
|
task :manpage do
|
data/bin/validate-website
CHANGED
data/bin/validate-website-static
CHANGED
data/lib/validate_website.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'webrick/cookie'
|
@@ -26,6 +28,7 @@ module ValidateWebsite
|
|
26
28
|
EXIT_FAILURE_MARKUP = 64
|
27
29
|
EXIT_FAILURE_NOT_FOUND = 65
|
28
30
|
EXIT_FAILURE_MARKUP_NOT_FOUND = 66
|
31
|
+
START_MESSAGE = 'Validating'
|
29
32
|
|
30
33
|
# Initialize core ValidateWebsite class
|
31
34
|
# @example
|
@@ -39,15 +42,14 @@ module ValidateWebsite
|
|
39
42
|
@site = @options[:site]
|
40
43
|
@service_url = @options[:html5_validator_service_url]
|
41
44
|
Validator.html5_validator_service_url = @service_url if @service_url
|
42
|
-
puts color(:note, "validating #{@site}\n", @options[:color])
|
43
45
|
end
|
44
46
|
|
45
47
|
def errors?
|
46
|
-
@errors_count
|
48
|
+
@errors_count.positive?
|
47
49
|
end
|
48
50
|
|
49
51
|
def not_founds?
|
50
|
-
@not_founds_count
|
52
|
+
@not_founds_count.positive?
|
51
53
|
end
|
52
54
|
|
53
55
|
def exit_status
|
@@ -71,9 +73,14 @@ module ValidateWebsite
|
|
71
73
|
|
72
74
|
private
|
73
75
|
|
76
|
+
def start_message(type)
|
77
|
+
puts color(:note, "#{START_MESSAGE} #{type}\n", @options[:color])
|
78
|
+
end
|
79
|
+
|
74
80
|
def check_css_syntax(page)
|
75
81
|
nodes = Crass::Parser.parse_stylesheet(page.body)
|
76
82
|
return unless any_css_errors?(nodes)
|
83
|
+
|
77
84
|
handle_validation_error(page.url)
|
78
85
|
end
|
79
86
|
|
@@ -107,10 +114,10 @@ module ValidateWebsite
|
|
107
114
|
# @param [Nokogiri::HTML::Document] original_doc
|
108
115
|
# @param [String] The raw HTTP response body of the page
|
109
116
|
# @param [String] url
|
110
|
-
# @param [
|
117
|
+
# @param [Hash] Validator options
|
111
118
|
#
|
112
|
-
def validate(doc, body, url,
|
113
|
-
validator = Validator.new(doc, body,
|
119
|
+
def validate(doc, body, url, options)
|
120
|
+
validator = Validator.new(doc, body, options)
|
114
121
|
if validator.valid?
|
115
122
|
print color(:success, '.', options[:color]) # rspec style
|
116
123
|
else
|
@@ -121,6 +128,7 @@ module ValidateWebsite
|
|
121
128
|
def handle_html_validation_error(validator, url)
|
122
129
|
handle_validation_error(url)
|
123
130
|
return unless options[:verbose]
|
131
|
+
|
124
132
|
puts color(:error, validator.errors.join(', '), options[:color])
|
125
133
|
end
|
126
134
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -8,6 +10,7 @@ module ValidateWebsite
|
|
8
10
|
|
9
11
|
def initialize(options = {}, validation_type = :crawl)
|
10
12
|
super
|
13
|
+
start_message(@site)
|
11
14
|
end
|
12
15
|
|
13
16
|
def history_count
|
@@ -40,6 +43,7 @@ module ValidateWebsite
|
|
40
43
|
#
|
41
44
|
def extract_imgs_from_page(page)
|
42
45
|
return Set[] if page.is_redirect?
|
46
|
+
|
43
47
|
page.doc.search('//img[@src]').reduce(Set[]) do |result, elem|
|
44
48
|
u = elem.attributes['src'].content
|
45
49
|
result << page.to_absolute(URI.parse(URI.encode(u)))
|
@@ -76,7 +80,10 @@ module ValidateWebsite
|
|
76
80
|
end
|
77
81
|
|
78
82
|
if validate?(page)
|
79
|
-
|
83
|
+
keys = %i[ignore html5_validator]
|
84
|
+
# slice does not exists on Ruby <= 2.4
|
85
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
86
|
+
validate(page.doc, page.body, page.url, slice)
|
80
87
|
end
|
81
88
|
end
|
82
89
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'slop'
|
4
|
+
require File.expand_path('version', __dir__)
|
2
5
|
|
3
6
|
module ValidateWebsite
|
4
7
|
# Internal class for parse command line args
|
@@ -28,6 +31,7 @@ module ValidateWebsite
|
|
28
31
|
# Generic parse method for crawl or static options
|
29
32
|
def self.parse(options, type)
|
30
33
|
raise ArgumentError unless VALID_TYPES.include?(type)
|
34
|
+
|
31
35
|
# We are in command line (ARGV)
|
32
36
|
if options.is_a?(Array)
|
33
37
|
send("command_line_parse_#{type}", options)
|
@@ -38,57 +42,57 @@ module ValidateWebsite
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def self.default_args
|
41
|
-
Slop.parse do |
|
42
|
-
yield
|
43
|
-
markup_syntax(
|
44
|
-
boolean_options(
|
45
|
-
ignore_html5_options(
|
46
|
-
verbose_option(
|
47
|
-
version_help(
|
45
|
+
Slop.parse do |opt|
|
46
|
+
yield opt if block_given?
|
47
|
+
markup_syntax(opt)
|
48
|
+
boolean_options(opt)
|
49
|
+
ignore_html5_options(opt)
|
50
|
+
verbose_option(opt)
|
51
|
+
version_help(opt)
|
48
52
|
end
|
49
53
|
end
|
50
54
|
|
51
|
-
def self.ignore_html5_options(
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
def self.ignore_html5_options(opt)
|
56
|
+
opt.regexp('-i', '--ignore',
|
57
|
+
'Validation errors to ignore (ex: "valign|autocorrect")')
|
58
|
+
opt.string('-x', '--html5-validator',
|
59
|
+
'Change default html5 validator engine (ex: tidy or nu)',
|
60
|
+
default: DEFAULT_OPTIONS[:html5_validator])
|
61
|
+
opt.string('-5', '--html5-validator-service-url',
|
62
|
+
'Change default html5 validator service URL for "nu" engine')
|
59
63
|
end
|
60
64
|
|
61
|
-
def self.markup_syntax(
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
65
|
+
def self.markup_syntax(opt)
|
66
|
+
opt.bool('-m', '--markup',
|
67
|
+
"Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
|
68
|
+
default: DEFAULT_OPTIONS[:markup])
|
69
|
+
opt.bool('--css-syntax',
|
70
|
+
"Css validation (default: #{DEFAULT_OPTIONS[:css_syntax]})",
|
71
|
+
default: DEFAULT_OPTIONS[:css_syntax])
|
68
72
|
end
|
69
73
|
|
70
|
-
def self.boolean_options(
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
74
|
+
def self.boolean_options(opt)
|
75
|
+
opt.bool('-n', '--not-found',
|
76
|
+
"Log not found url (default: #{DEFAULT_OPTIONS[:not_found]})",
|
77
|
+
default: DEFAULT_OPTIONS[:not_found])
|
78
|
+
opt.bool('--color',
|
79
|
+
"Show colored output (default: #{DEFAULT_OPTIONS[:color]})",
|
80
|
+
default: DEFAULT_OPTIONS[:color])
|
77
81
|
end
|
78
82
|
|
79
|
-
def self.verbose_option(
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
+
def self.verbose_option(opt)
|
84
|
+
opt.bool('-v', '--verbose',
|
85
|
+
"Show validator errors (default: #{DEFAULT_OPTIONS[:verbose]})",
|
86
|
+
default: DEFAULT_OPTIONS[:verbose])
|
83
87
|
end
|
84
88
|
|
85
|
-
def self.version_help(
|
86
|
-
|
89
|
+
def self.version_help(opt)
|
90
|
+
opt.on('--version', 'Display version.') do
|
87
91
|
puts ValidateWebsite::VERSION
|
88
92
|
exit
|
89
93
|
end
|
90
|
-
|
91
|
-
puts
|
94
|
+
opt.on('-h', '--help', 'Display this help message.') do
|
95
|
+
puts opt
|
92
96
|
exit
|
93
97
|
end
|
94
98
|
end
|
@@ -97,15 +101,15 @@ module ValidateWebsite
|
|
97
101
|
# @params [ARGV]
|
98
102
|
# @return [Hash]
|
99
103
|
def self.command_line_parse_crawl(_args)
|
100
|
-
default_args do |
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
104
|
+
default_args do |opt|
|
105
|
+
opt.string('-s', '--site',
|
106
|
+
"Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
|
107
|
+
default: DEFAULT_OPTIONS[:site])
|
108
|
+
opt.string('-u', '--user-agent',
|
109
|
+
'Change user agent',
|
110
|
+
default: DEFAULT_OPTIONS[:user_agent])
|
111
|
+
opt.regexp('-e', '--exclude', 'Url to exclude (ex: "redirect|news")')
|
112
|
+
opt.string('-c', '--cookies', 'Set defaults cookies')
|
109
113
|
end
|
110
114
|
end
|
111
115
|
|
@@ -113,14 +117,14 @@ module ValidateWebsite
|
|
113
117
|
# @params [ARGV]
|
114
118
|
# @return [Hash]
|
115
119
|
def self.command_line_parse_static(_args)
|
116
|
-
default_args do |
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
120
|
+
default_args do |opt|
|
121
|
+
opt.string('-s', '--site',
|
122
|
+
"Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
|
123
|
+
default: DEFAULT_OPTIONS[:site])
|
124
|
+
opt.string('-p', '--pattern',
|
125
|
+
"Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
|
126
|
+
default: DEFAULT_OPTIONS[:pattern])
|
127
|
+
opt.regexp('-e', '--exclude', 'Url to exclude (ex: "redirect|news")')
|
124
128
|
end
|
125
129
|
end
|
126
130
|
end
|