validate-website 1.8.1 → 1.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +37 -0
- data/LICENSE +1 -1
- data/README.md +38 -25
- data/Rakefile +5 -1
- data/bin/validate-website +1 -1
- data/bin/validate-website-static +1 -1
- data/lib/validate_website.rb +2 -0
- data/lib/validate_website/colorful_messages.rb +3 -0
- data/lib/validate_website/core.rb +14 -6
- data/lib/validate_website/crawl.rb +8 -1
- data/lib/validate_website/option_parser.rb +58 -54
- data/lib/validate_website/runner.rb +3 -1
- data/lib/validate_website/static.rb +19 -9
- data/lib/validate_website/static_link.rb +4 -2
- data/lib/validate_website/utils.rb +3 -0
- data/lib/validate_website/validator.rb +24 -27
- data/lib/validate_website/validator_class_methods.rb +3 -0
- data/lib/validate_website/version.rb +1 -1
- data/man/man1/validate-website-static.1 +17 -8
- data/man/man1/validate-website.1 +17 -8
- data/test/core_test.rb +4 -2
- data/test/crawler_test.rb +36 -17
- data/test/data/html5-fail.html +0 -337
- data/test/static_test.rb +26 -6
- data/test/test_helper.rb +9 -2
- data/test/validator_test.rb +26 -24
- data/test/webmock_helper.rb +4 -2
- metadata +34 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a5b0553089c9d66d3622781fe6c4ab5ca68ac6a198fe36c53c84c8d34e14adb
|
4
|
+
data.tar.gz: f3f0cc4aef203f85ebb9868e793b7cd0312db8c4b6f7fb0d15beefeaaef1cc83
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c9a8fae92bcb19d5f92466a7b77f3e40cca59643c946c15563885d9e65a1d2793271dee2b723a296fe487d57634fdd88f4440a60d2fac625a6a7208c8105159d
|
7
|
+
data.tar.gz: e7a78f899b42d5f27cb41e99472e37499389bf39c567dd9f16ab064fce38927a43de89c00411c25697c622ad76f59b8d4b9836d898249822cf9bc0d37591c3bf
|
data/History.md
CHANGED
@@ -1,4 +1,41 @@
|
|
1
1
|
|
2
|
+
1.10.0 / 2020-07-03
|
3
|
+
==================
|
4
|
+
|
5
|
+
* Fix build for Ruby 2.3 and 2.4
|
6
|
+
* Remove rbx-3 from build
|
7
|
+
* Remove minitest-focus and fix minitest 6 warnings
|
8
|
+
* Fix html5_validator option and change html5_validator_service_url
|
9
|
+
* Add Ruby 2.7 to CI and update jruby
|
10
|
+
* Update rubocop and fix offences
|
11
|
+
* Remove Ruby 2.2 support and update rubocop
|
12
|
+
|
13
|
+
1.9.3 / 2019-04-11
|
14
|
+
==================
|
15
|
+
|
16
|
+
* Update tidy_ffi to 1.0
|
17
|
+
* Avoid testing tidy bug with js
|
18
|
+
|
19
|
+
1.9.2 / 2019-03-09
|
20
|
+
==================
|
21
|
+
|
22
|
+
* Load schema when needed instead of boot
|
23
|
+
|
24
|
+
1.9.1 / 2019-03-05
|
25
|
+
==================
|
26
|
+
|
27
|
+
* Improve start message for static validator
|
28
|
+
* Update travis config
|
29
|
+
* Fix bundler to <2 on travis (dropped support Ruby < 2.3)
|
30
|
+
|
31
|
+
1.9.0 / 2018-12-25
|
32
|
+
==================
|
33
|
+
|
34
|
+
* Update deps paint; slop; webmock
|
35
|
+
* Remove Ruby 2.1 support
|
36
|
+
* Force nonet and disable substitute entities on xhtml parse
|
37
|
+
* Use coveralls for code coverage
|
38
|
+
|
2
39
|
v1.8.1 / 2018-03-25
|
3
40
|
===================
|
4
41
|
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2009-
|
3
|
+
Copyright (c) 2009-2020 Laurent Arnoud <laurent@spkdev.net>
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -10,22 +10,34 @@ Web crawler for checking the validity of your documents
|
|
10
10
|
|
11
11
|
### Debian
|
12
12
|
|
13
|
-
|
14
|
-
apt install ruby-dev
|
15
|
-
|
13
|
+
```
|
14
|
+
apt install ruby-dev libxslt1-dev libxml2-dev
|
15
|
+
```
|
16
|
+
|
17
|
+
If you want complete local validation look [tidy
|
18
|
+
packages](https://binaries.html-tidy.org/)
|
16
19
|
|
17
20
|
### RubyGems
|
18
21
|
|
19
|
-
|
22
|
+
```
|
20
23
|
gem install validate-website
|
21
|
-
|
24
|
+
```
|
22
25
|
|
23
26
|
## Synopsis
|
24
27
|
|
25
|
-
|
28
|
+
```
|
26
29
|
validate-website [OPTIONS]
|
27
30
|
validate-website-static [OPTIONS]
|
28
|
-
|
31
|
+
```
|
32
|
+
|
33
|
+
## Examples
|
34
|
+
|
35
|
+
```
|
36
|
+
validate-website -v -s https://www.ruby-lang.org/
|
37
|
+
validate-website -v -x tidy -s https://www.ruby-lang.org/
|
38
|
+
validate-website -v -x nu -s https://www.ruby-lang.org/
|
39
|
+
validate-website -h
|
40
|
+
```
|
29
41
|
|
30
42
|
## Description
|
31
43
|
|
@@ -47,30 +59,31 @@ Service](https://checker.html5.org/).
|
|
47
59
|
|
48
60
|
## On your application
|
49
61
|
|
50
|
-
|
62
|
+
``` ruby
|
51
63
|
require 'validate_website/validator'
|
52
64
|
body = '<!DOCTYPE html><html></html>'
|
53
65
|
v = ValidateWebsite::Validator.new(Nokogiri::HTML(body), body)
|
54
66
|
v.valid? # => false
|
55
|
-
|
67
|
+
```
|
56
68
|
|
57
69
|
## Jekyll static site validation
|
58
70
|
|
59
71
|
You can add this Rake task to validate a
|
60
72
|
[jekyll](https://github.com/jekyll/jekyll) site:
|
61
73
|
|
62
|
-
|
74
|
+
``` ruby
|
63
75
|
desc 'validate _site with validate website'
|
64
76
|
task validate: :build do
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
77
|
+
Dir.chdir("_site") do
|
78
|
+
system("validate-website-static",
|
79
|
+
"--verbose",
|
80
|
+
"--exclude", "examples",
|
81
|
+
"--site", HTTP_URL)
|
82
|
+
exit($?.exitstatus)
|
70
83
|
end
|
71
84
|
end
|
72
85
|
end
|
73
|
-
|
86
|
+
```
|
74
87
|
|
75
88
|
## More info
|
76
89
|
|
@@ -97,17 +110,17 @@ validation service.
|
|
97
110
|
You can download [validator](https://github.com/validator/validator) jar and
|
98
111
|
start it with:
|
99
112
|
|
100
|
-
|
113
|
+
```
|
101
114
|
java -cp PATH_TO/vnu.jar nu.validator.servlet.Main 8888
|
102
|
-
|
115
|
+
```
|
103
116
|
|
104
117
|
Then you can use validate-website option:
|
105
118
|
|
106
|
-
|
119
|
+
```
|
107
120
|
--html5-validator-service-url http://localhost:8888/
|
108
121
|
# or
|
109
122
|
export VALIDATOR_NU_URL="http://localhost:8888/"
|
110
|
-
|
123
|
+
```
|
111
124
|
|
112
125
|
This will prevent you to be blacklisted from validator webservice.
|
113
126
|
|
@@ -115,9 +128,9 @@ This will prevent you to be blacklisted from validator webservice.
|
|
115
128
|
|
116
129
|
With standard environment:
|
117
130
|
|
118
|
-
|
131
|
+
```
|
119
132
|
bundle exec rake
|
120
|
-
|
133
|
+
```
|
121
134
|
|
122
135
|
## Credits
|
123
136
|
|
@@ -132,12 +145,12 @@ See [GitHub](https://github.com/spk/validate-website/graphs/contributors).
|
|
132
145
|
|
133
146
|
The MIT License
|
134
147
|
|
135
|
-
Copyright (c) 2009-
|
148
|
+
Copyright (c) 2009-2020 Laurent Arnoud <laurent@spkdev.net>
|
136
149
|
|
137
150
|
---
|
138
|
-
[![Build](https://img.shields.io/travis
|
151
|
+
[![Build](https://img.shields.io/travis/spk/validate-website.svg)](https://travis-ci.org/spk/validate-website)
|
139
152
|
[![Version](https://img.shields.io/gem/v/validate-website.svg)](https://rubygems.org/gems/validate-website)
|
140
153
|
[![Documentation](https://img.shields.io/badge/doc-rubydoc-blue.svg)](http://www.rubydoc.info/gems/validate-website)
|
141
154
|
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](http://opensource.org/licenses/MIT "MIT")
|
142
|
-
[![
|
155
|
+
[![Coverage Status](https://img.shields.io/coveralls/github/spk/validate-website.svg)](https://coveralls.io/github/spk/validate-website?branch=master)
|
143
156
|
[![Inline docs](https://inch-ci.org/github/spk/validate-website.svg?branch=master)](http://inch-ci.org/github/spk/validate-website)
|
data/Rakefile
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rake/testtask'
|
2
4
|
require 'rubocop/rake_task'
|
3
5
|
require 'asciidoctor'
|
4
6
|
|
5
|
-
|
7
|
+
default = %i[test]
|
8
|
+
default << :rubocop unless RUBY_ENGINE == 'rbx'
|
9
|
+
task default: default
|
6
10
|
|
7
11
|
desc 'Update manpage from asciidoc file'
|
8
12
|
task :manpage do
|
data/bin/validate-website
CHANGED
data/bin/validate-website-static
CHANGED
data/lib/validate_website.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'webrick/cookie'
|
@@ -26,6 +28,7 @@ module ValidateWebsite
|
|
26
28
|
EXIT_FAILURE_MARKUP = 64
|
27
29
|
EXIT_FAILURE_NOT_FOUND = 65
|
28
30
|
EXIT_FAILURE_MARKUP_NOT_FOUND = 66
|
31
|
+
START_MESSAGE = 'Validating'
|
29
32
|
|
30
33
|
# Initialize core ValidateWebsite class
|
31
34
|
# @example
|
@@ -39,15 +42,14 @@ module ValidateWebsite
|
|
39
42
|
@site = @options[:site]
|
40
43
|
@service_url = @options[:html5_validator_service_url]
|
41
44
|
Validator.html5_validator_service_url = @service_url if @service_url
|
42
|
-
puts color(:note, "validating #{@site}\n", @options[:color])
|
43
45
|
end
|
44
46
|
|
45
47
|
def errors?
|
46
|
-
@errors_count
|
48
|
+
@errors_count.positive?
|
47
49
|
end
|
48
50
|
|
49
51
|
def not_founds?
|
50
|
-
@not_founds_count
|
52
|
+
@not_founds_count.positive?
|
51
53
|
end
|
52
54
|
|
53
55
|
def exit_status
|
@@ -71,9 +73,14 @@ module ValidateWebsite
|
|
71
73
|
|
72
74
|
private
|
73
75
|
|
76
|
+
def start_message(type)
|
77
|
+
puts color(:note, "#{START_MESSAGE} #{type}\n", @options[:color])
|
78
|
+
end
|
79
|
+
|
74
80
|
def check_css_syntax(page)
|
75
81
|
nodes = Crass::Parser.parse_stylesheet(page.body)
|
76
82
|
return unless any_css_errors?(nodes)
|
83
|
+
|
77
84
|
handle_validation_error(page.url)
|
78
85
|
end
|
79
86
|
|
@@ -107,10 +114,10 @@ module ValidateWebsite
|
|
107
114
|
# @param [Nokogiri::HTML::Document] original_doc
|
108
115
|
# @param [String] The raw HTTP response body of the page
|
109
116
|
# @param [String] url
|
110
|
-
# @param [
|
117
|
+
# @param [Hash] Validator options
|
111
118
|
#
|
112
|
-
def validate(doc, body, url,
|
113
|
-
validator = Validator.new(doc, body,
|
119
|
+
def validate(doc, body, url, options)
|
120
|
+
validator = Validator.new(doc, body, options)
|
114
121
|
if validator.valid?
|
115
122
|
print color(:success, '.', options[:color]) # rspec style
|
116
123
|
else
|
@@ -121,6 +128,7 @@ module ValidateWebsite
|
|
121
128
|
def handle_html_validation_error(validator, url)
|
122
129
|
handle_validation_error(url)
|
123
130
|
return unless options[:verbose]
|
131
|
+
|
124
132
|
puts color(:error, validator.errors.join(', '), options[:color])
|
125
133
|
end
|
126
134
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -8,6 +10,7 @@ module ValidateWebsite
|
|
8
10
|
|
9
11
|
def initialize(options = {}, validation_type = :crawl)
|
10
12
|
super
|
13
|
+
start_message(@site)
|
11
14
|
end
|
12
15
|
|
13
16
|
def history_count
|
@@ -40,6 +43,7 @@ module ValidateWebsite
|
|
40
43
|
#
|
41
44
|
def extract_imgs_from_page(page)
|
42
45
|
return Set[] if page.is_redirect?
|
46
|
+
|
43
47
|
page.doc.search('//img[@src]').reduce(Set[]) do |result, elem|
|
44
48
|
u = elem.attributes['src'].content
|
45
49
|
result << page.to_absolute(URI.parse(URI.encode(u)))
|
@@ -76,7 +80,10 @@ module ValidateWebsite
|
|
76
80
|
end
|
77
81
|
|
78
82
|
if validate?(page)
|
79
|
-
|
83
|
+
keys = %i[ignore html5_validator]
|
84
|
+
# slice does not exists on Ruby <= 2.4
|
85
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
86
|
+
validate(page.doc, page.body, page.url, slice)
|
80
87
|
end
|
81
88
|
end
|
82
89
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'slop'
|
4
|
+
require File.expand_path('version', __dir__)
|
2
5
|
|
3
6
|
module ValidateWebsite
|
4
7
|
# Internal class for parse command line args
|
@@ -28,6 +31,7 @@ module ValidateWebsite
|
|
28
31
|
# Generic parse method for crawl or static options
|
29
32
|
def self.parse(options, type)
|
30
33
|
raise ArgumentError unless VALID_TYPES.include?(type)
|
34
|
+
|
31
35
|
# We are in command line (ARGV)
|
32
36
|
if options.is_a?(Array)
|
33
37
|
send("command_line_parse_#{type}", options)
|
@@ -38,57 +42,57 @@ module ValidateWebsite
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def self.default_args
|
41
|
-
Slop.parse do |
|
42
|
-
yield
|
43
|
-
markup_syntax(
|
44
|
-
boolean_options(
|
45
|
-
ignore_html5_options(
|
46
|
-
verbose_option(
|
47
|
-
version_help(
|
45
|
+
Slop.parse do |opt|
|
46
|
+
yield opt if block_given?
|
47
|
+
markup_syntax(opt)
|
48
|
+
boolean_options(opt)
|
49
|
+
ignore_html5_options(opt)
|
50
|
+
verbose_option(opt)
|
51
|
+
version_help(opt)
|
48
52
|
end
|
49
53
|
end
|
50
54
|
|
51
|
-
def self.ignore_html5_options(
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
def self.ignore_html5_options(opt)
|
56
|
+
opt.regexp('-i', '--ignore',
|
57
|
+
'Validation errors to ignore (ex: "valign|autocorrect")')
|
58
|
+
opt.string('-x', '--html5-validator',
|
59
|
+
'Change default html5 validator engine (ex: tidy or nu)',
|
60
|
+
default: DEFAULT_OPTIONS[:html5_validator])
|
61
|
+
opt.string('-5', '--html5-validator-service-url',
|
62
|
+
'Change default html5 validator service URL for "nu" engine')
|
59
63
|
end
|
60
64
|
|
61
|
-
def self.markup_syntax(
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
65
|
+
def self.markup_syntax(opt)
|
66
|
+
opt.bool('-m', '--markup',
|
67
|
+
"Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
|
68
|
+
default: DEFAULT_OPTIONS[:markup])
|
69
|
+
opt.bool('--css-syntax',
|
70
|
+
"Css validation (default: #{DEFAULT_OPTIONS[:css_syntax]})",
|
71
|
+
default: DEFAULT_OPTIONS[:css_syntax])
|
68
72
|
end
|
69
73
|
|
70
|
-
def self.boolean_options(
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
74
|
+
def self.boolean_options(opt)
|
75
|
+
opt.bool('-n', '--not-found',
|
76
|
+
"Log not found url (default: #{DEFAULT_OPTIONS[:not_found]})",
|
77
|
+
default: DEFAULT_OPTIONS[:not_found])
|
78
|
+
opt.bool('--color',
|
79
|
+
"Show colored output (default: #{DEFAULT_OPTIONS[:color]})",
|
80
|
+
default: DEFAULT_OPTIONS[:color])
|
77
81
|
end
|
78
82
|
|
79
|
-
def self.verbose_option(
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
+
def self.verbose_option(opt)
|
84
|
+
opt.bool('-v', '--verbose',
|
85
|
+
"Show validator errors (default: #{DEFAULT_OPTIONS[:verbose]})",
|
86
|
+
default: DEFAULT_OPTIONS[:verbose])
|
83
87
|
end
|
84
88
|
|
85
|
-
def self.version_help(
|
86
|
-
|
89
|
+
def self.version_help(opt)
|
90
|
+
opt.on('--version', 'Display version.') do
|
87
91
|
puts ValidateWebsite::VERSION
|
88
92
|
exit
|
89
93
|
end
|
90
|
-
|
91
|
-
puts
|
94
|
+
opt.on('-h', '--help', 'Display this help message.') do
|
95
|
+
puts opt
|
92
96
|
exit
|
93
97
|
end
|
94
98
|
end
|
@@ -97,15 +101,15 @@ module ValidateWebsite
|
|
97
101
|
# @params [ARGV]
|
98
102
|
# @return [Hash]
|
99
103
|
def self.command_line_parse_crawl(_args)
|
100
|
-
default_args do |
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
104
|
+
default_args do |opt|
|
105
|
+
opt.string('-s', '--site',
|
106
|
+
"Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
|
107
|
+
default: DEFAULT_OPTIONS[:site])
|
108
|
+
opt.string('-u', '--user-agent',
|
109
|
+
'Change user agent',
|
110
|
+
default: DEFAULT_OPTIONS[:user_agent])
|
111
|
+
opt.regexp('-e', '--exclude', 'Url to exclude (ex: "redirect|news")')
|
112
|
+
opt.string('-c', '--cookies', 'Set defaults cookies')
|
109
113
|
end
|
110
114
|
end
|
111
115
|
|
@@ -113,14 +117,14 @@ module ValidateWebsite
|
|
113
117
|
# @params [ARGV]
|
114
118
|
# @return [Hash]
|
115
119
|
def self.command_line_parse_static(_args)
|
116
|
-
default_args do |
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
120
|
+
default_args do |opt|
|
121
|
+
opt.string('-s', '--site',
|
122
|
+
"Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
|
123
|
+
default: DEFAULT_OPTIONS[:site])
|
124
|
+
opt.string('-p', '--pattern',
|
125
|
+
"Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
|
126
|
+
default: DEFAULT_OPTIONS[:pattern])
|
127
|
+
opt.regexp('-e', '--exclude', 'Url to exclude (ex: "redirect|news")')
|
124
128
|
end
|
125
129
|
end
|
126
130
|
end
|