validate-website 1.9.0 → 1.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +39 -0
- data/LICENSE +1 -1
- data/README.md +27 -26
- data/Rakefile +3 -2
- data/bin/validate-website +1 -0
- data/bin/validate-website-static +1 -0
- data/lib/validate_website.rb +2 -0
- data/lib/validate_website/colorful_messages.rb +3 -0
- data/lib/validate_website/core.rb +15 -6
- data/lib/validate_website/crawl.rb +9 -2
- data/lib/validate_website/option_parser.rb +4 -0
- data/lib/validate_website/runner.rb +3 -1
- data/lib/validate_website/static.rb +12 -2
- data/lib/validate_website/static_link.rb +5 -3
- data/lib/validate_website/utils.rb +3 -0
- data/lib/validate_website/validator.rb +23 -26
- data/lib/validate_website/validator_class_methods.rb +3 -0
- data/lib/validate_website/version.rb +1 -1
- data/man/man1/validate-website-static.1 +17 -8
- data/man/man1/validate-website.1 +17 -8
- data/test/core_test.rb +3 -1
- data/test/crawler_test.rb +35 -16
- data/test/data/html5-fail.html +0 -337
- data/test/static_test.rb +25 -5
- data/test/test_helper.rb +4 -5
- data/test/validator_test.rb +25 -23
- data/test/webmock_helper.rb +4 -2
- metadata +18 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f63d4c84f653035be69262c33ccad0025c48013492b653882a803f7d110e717f
|
4
|
+
data.tar.gz: fdf8e994af98a8dbff7411dd480b88f759009a70b5b5c1fcde15120b5db1ad02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b0774aaab6ca32e10d43cfc0dd6c57219cc9e3862af5febd892a30884920195512301f702e5af11459a55c8dde4eaa4238a09833b102924a8ca36c9deb8480f
|
7
|
+
data.tar.gz: c0cdf68b1486c2d15257d7fb07e8561f52054b5a234e46162a468b1ffdc6bbc26905563150ccab3053e78a15a70c59142a857afa1aaff7563a22279840752332
|
data/History.md
CHANGED
@@ -1,4 +1,43 @@
|
|
1
1
|
|
2
|
+
1.11.0 / 2021-01-08
|
3
|
+
===================
|
4
|
+
|
5
|
+
* Merge pull request #23 from @marocchino / ruby-3-support
|
6
|
+
* Use webrick's escape instead of encode_www_form_component
|
7
|
+
* Support ruby 3
|
8
|
+
* Fix doc for ValidateWebsite::Core initialize
|
9
|
+
* Switch to gitlab ci and remove 2.{3,4} support
|
10
|
+
* Update rubocop to 0.76.0
|
11
|
+
|
12
|
+
1.10.0 / 2020-07-03
|
13
|
+
==================
|
14
|
+
|
15
|
+
* Fix build for Ruby 2.3 and 2.4
|
16
|
+
* Remove rbx-3 from build
|
17
|
+
* Remove minitest-focus and fix minitest 6 warnings
|
18
|
+
* Fix html5_validator option and change html5_validator_service_url
|
19
|
+
* Add Ruby 2.7 to CI and update jruby
|
20
|
+
* Update rubocop and fix offences
|
21
|
+
* Remove Ruby 2.2 support and update rubocop
|
22
|
+
|
23
|
+
1.9.3 / 2019-04-11
|
24
|
+
==================
|
25
|
+
|
26
|
+
* Update tidy_ffi to 1.0
|
27
|
+
* Avoid testing tidy bug with js
|
28
|
+
|
29
|
+
1.9.2 / 2019-03-09
|
30
|
+
==================
|
31
|
+
|
32
|
+
* Load schema when needed instead of boot
|
33
|
+
|
34
|
+
1.9.1 / 2019-03-05
|
35
|
+
==================
|
36
|
+
|
37
|
+
* Improve start message for static validator
|
38
|
+
* Update travis config
|
39
|
+
* Fix bundler to <2 on travis (dropped support Ruby < 2.3)
|
40
|
+
|
2
41
|
1.9.0 / 2018-12-25
|
3
42
|
==================
|
4
43
|
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2009-
|
3
|
+
Copyright (c) 2009-2021 Laurent Arnoud <laurent@spkdev.net>
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -10,34 +10,34 @@ Web crawler for checking the validity of your documents
|
|
10
10
|
|
11
11
|
### Debian
|
12
12
|
|
13
|
-
|
13
|
+
```
|
14
14
|
apt install ruby-dev libxslt1-dev libxml2-dev
|
15
|
-
|
15
|
+
```
|
16
16
|
|
17
17
|
If you want complete local validation look [tidy
|
18
18
|
packages](https://binaries.html-tidy.org/)
|
19
19
|
|
20
20
|
### RubyGems
|
21
21
|
|
22
|
-
|
22
|
+
```
|
23
23
|
gem install validate-website
|
24
|
-
|
24
|
+
```
|
25
25
|
|
26
26
|
## Synopsis
|
27
27
|
|
28
|
-
|
28
|
+
```
|
29
29
|
validate-website [OPTIONS]
|
30
30
|
validate-website-static [OPTIONS]
|
31
|
-
|
31
|
+
```
|
32
32
|
|
33
33
|
## Examples
|
34
34
|
|
35
|
-
|
35
|
+
```
|
36
36
|
validate-website -v -s https://www.ruby-lang.org/
|
37
37
|
validate-website -v -x tidy -s https://www.ruby-lang.org/
|
38
38
|
validate-website -v -x nu -s https://www.ruby-lang.org/
|
39
39
|
validate-website -h
|
40
|
-
|
40
|
+
```
|
41
41
|
|
42
42
|
## Description
|
43
43
|
|
@@ -59,30 +59,31 @@ Service](https://checker.html5.org/).
|
|
59
59
|
|
60
60
|
## On your application
|
61
61
|
|
62
|
-
|
62
|
+
``` ruby
|
63
63
|
require 'validate_website/validator'
|
64
64
|
body = '<!DOCTYPE html><html></html>'
|
65
65
|
v = ValidateWebsite::Validator.new(Nokogiri::HTML(body), body)
|
66
66
|
v.valid? # => false
|
67
|
-
|
67
|
+
```
|
68
68
|
|
69
69
|
## Jekyll static site validation
|
70
70
|
|
71
71
|
You can add this Rake task to validate a
|
72
72
|
[jekyll](https://github.com/jekyll/jekyll) site:
|
73
73
|
|
74
|
-
|
74
|
+
``` ruby
|
75
75
|
desc 'validate _site with validate website'
|
76
76
|
task validate: :build do
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
77
|
+
Dir.chdir("_site") do
|
78
|
+
system("validate-website-static",
|
79
|
+
"--verbose",
|
80
|
+
"--exclude", "examples",
|
81
|
+
"--site", HTTP_URL)
|
82
|
+
exit($?.exitstatus)
|
82
83
|
end
|
83
84
|
end
|
84
85
|
end
|
85
|
-
|
86
|
+
```
|
86
87
|
|
87
88
|
## More info
|
88
89
|
|
@@ -109,17 +110,17 @@ validation service.
|
|
109
110
|
You can download [validator](https://github.com/validator/validator) jar and
|
110
111
|
start it with:
|
111
112
|
|
112
|
-
|
113
|
+
```
|
113
114
|
java -cp PATH_TO/vnu.jar nu.validator.servlet.Main 8888
|
114
|
-
|
115
|
+
```
|
115
116
|
|
116
117
|
Then you can use validate-website option:
|
117
118
|
|
118
|
-
|
119
|
+
```
|
119
120
|
--html5-validator-service-url http://localhost:8888/
|
120
121
|
# or
|
121
122
|
export VALIDATOR_NU_URL="http://localhost:8888/"
|
122
|
-
|
123
|
+
```
|
123
124
|
|
124
125
|
This will prevent you to be blacklisted from validator webservice.
|
125
126
|
|
@@ -127,9 +128,9 @@ This will prevent you to be blacklisted from validator webservice.
|
|
127
128
|
|
128
129
|
With standard environment:
|
129
130
|
|
130
|
-
|
131
|
+
```
|
131
132
|
bundle exec rake
|
132
|
-
|
133
|
+
```
|
133
134
|
|
134
135
|
## Credits
|
135
136
|
|
@@ -144,12 +145,12 @@ See [GitHub](https://github.com/spk/validate-website/graphs/contributors).
|
|
144
145
|
|
145
146
|
The MIT License
|
146
147
|
|
147
|
-
Copyright (c) 2009-
|
148
|
+
Copyright (c) 2009-2021 Laurent Arnoud <laurent@spkdev.net>
|
148
149
|
|
149
150
|
---
|
150
|
-
[![Build](https://img.shields.io/
|
151
|
+
[![Build](https://img.shields.io/gitlab/pipeline/spkdev/validate-website/master)](https://gitlab.com/spkdev/validate-website/-/commits/master)
|
152
|
+
[![Coverage](https://gitlab.com/spkdev/validate-website/badges/master/coverage.svg)](https://gitlab.com/spkdev/validate-website/-/commits/master)
|
151
153
|
[![Version](https://img.shields.io/gem/v/validate-website.svg)](https://rubygems.org/gems/validate-website)
|
152
154
|
[![Documentation](https://img.shields.io/badge/doc-rubydoc-blue.svg)](http://www.rubydoc.info/gems/validate-website)
|
153
155
|
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](http://opensource.org/licenses/MIT "MIT")
|
154
|
-
[![Coverage Status](https://img.shields.io/coveralls/github/spk/validate-website.svg)](https://coveralls.io/github/spk/validate-website?branch=master)
|
155
156
|
[![Inline docs](https://inch-ci.org/github/spk/validate-website.svg?branch=master)](http://inch-ci.org/github/spk/validate-website)
|
data/Rakefile
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rake/testtask'
|
2
4
|
require 'rubocop/rake_task'
|
3
5
|
require 'asciidoctor'
|
4
6
|
|
5
|
-
default = %i[test]
|
6
|
-
default << :rubocop unless RUBY_ENGINE == 'rbx'
|
7
|
+
default = %i[test rubocop]
|
7
8
|
task default: default
|
8
9
|
|
9
10
|
desc 'Update manpage from asciidoc file'
|
data/bin/validate-website
CHANGED
data/bin/validate-website-static
CHANGED
data/lib/validate_website.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'webrick/cookie'
|
@@ -26,11 +28,13 @@ module ValidateWebsite
|
|
26
28
|
EXIT_FAILURE_MARKUP = 64
|
27
29
|
EXIT_FAILURE_NOT_FOUND = 65
|
28
30
|
EXIT_FAILURE_MARKUP_NOT_FOUND = 66
|
31
|
+
START_MESSAGE = 'Validating'
|
29
32
|
|
30
33
|
# Initialize core ValidateWebsite class
|
31
34
|
# @example
|
32
35
|
# new({ site: "https://example.com/" }, :crawl)
|
33
36
|
# @param [Hash] options
|
37
|
+
# @param [Symbol] validation_type `crawl` for web or `static` for local
|
34
38
|
# @return [NilClass]
|
35
39
|
def initialize(options, validation_type)
|
36
40
|
@not_founds_count = 0
|
@@ -39,15 +43,14 @@ module ValidateWebsite
|
|
39
43
|
@site = @options[:site]
|
40
44
|
@service_url = @options[:html5_validator_service_url]
|
41
45
|
Validator.html5_validator_service_url = @service_url if @service_url
|
42
|
-
puts color(:note, "validating #{@site}\n", @options[:color])
|
43
46
|
end
|
44
47
|
|
45
48
|
def errors?
|
46
|
-
@errors_count
|
49
|
+
@errors_count.positive?
|
47
50
|
end
|
48
51
|
|
49
52
|
def not_founds?
|
50
|
-
@not_founds_count
|
53
|
+
@not_founds_count.positive?
|
51
54
|
end
|
52
55
|
|
53
56
|
def exit_status
|
@@ -71,9 +74,14 @@ module ValidateWebsite
|
|
71
74
|
|
72
75
|
private
|
73
76
|
|
77
|
+
def start_message(type)
|
78
|
+
puts color(:note, "#{START_MESSAGE} #{type}\n", @options[:color])
|
79
|
+
end
|
80
|
+
|
74
81
|
def check_css_syntax(page)
|
75
82
|
nodes = Crass::Parser.parse_stylesheet(page.body)
|
76
83
|
return unless any_css_errors?(nodes)
|
84
|
+
|
77
85
|
handle_validation_error(page.url)
|
78
86
|
end
|
79
87
|
|
@@ -107,10 +115,10 @@ module ValidateWebsite
|
|
107
115
|
# @param [Nokogiri::HTML::Document] original_doc
|
108
116
|
# @param [String] The raw HTTP response body of the page
|
109
117
|
# @param [String] url
|
110
|
-
# @param [
|
118
|
+
# @param [Hash] Validator options
|
111
119
|
#
|
112
|
-
def validate(doc, body, url,
|
113
|
-
validator = Validator.new(doc, body,
|
120
|
+
def validate(doc, body, url, options)
|
121
|
+
validator = Validator.new(doc, body, **options)
|
114
122
|
if validator.valid?
|
115
123
|
print color(:success, '.', options[:color]) # rspec style
|
116
124
|
else
|
@@ -121,6 +129,7 @@ module ValidateWebsite
|
|
121
129
|
def handle_html_validation_error(validator, url)
|
122
130
|
handle_validation_error(url)
|
123
131
|
return unless options[:verbose]
|
132
|
+
|
124
133
|
puts color(:error, validator.errors.join(', '), options[:color])
|
125
134
|
end
|
126
135
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -8,6 +10,7 @@ module ValidateWebsite
|
|
8
10
|
|
9
11
|
def initialize(options = {}, validation_type = :crawl)
|
10
12
|
super
|
13
|
+
start_message(@site)
|
11
14
|
end
|
12
15
|
|
13
16
|
def history_count
|
@@ -40,9 +43,10 @@ module ValidateWebsite
|
|
40
43
|
#
|
41
44
|
def extract_imgs_from_page(page)
|
42
45
|
return Set[] if page.is_redirect?
|
46
|
+
|
43
47
|
page.doc.search('//img[@src]').reduce(Set[]) do |result, elem|
|
44
48
|
u = elem.attributes['src'].content
|
45
|
-
result << page.to_absolute(URI.parse(
|
49
|
+
result << page.to_absolute(URI.parse(WEBrick::HTTPUtils.escape(u)))
|
46
50
|
end
|
47
51
|
end
|
48
52
|
|
@@ -76,7 +80,10 @@ module ValidateWebsite
|
|
76
80
|
end
|
77
81
|
|
78
82
|
if validate?(page)
|
79
|
-
|
83
|
+
keys = %i[ignore html5_validator]
|
84
|
+
# slice does not exists on Ruby <= 2.4
|
85
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
86
|
+
validate(page.doc, page.body, page.url, slice)
|
80
87
|
end
|
81
88
|
end
|
82
89
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'slop'
|
4
|
+
require File.expand_path('version', __dir__)
|
2
5
|
|
3
6
|
module ValidateWebsite
|
4
7
|
# Internal class for parse command line args
|
@@ -28,6 +31,7 @@ module ValidateWebsite
|
|
28
31
|
# Generic parse method for crawl or static options
|
29
32
|
def self.parse(options, type)
|
30
33
|
raise ArgumentError unless VALID_TYPES.include?(type)
|
34
|
+
|
31
35
|
# We are in command line (ARGV)
|
32
36
|
if options.is_a?(Array)
|
33
37
|
send("command_line_parse_#{type}", options)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
|
3
5
|
module ValidateWebsite
|
@@ -5,7 +7,7 @@ module ValidateWebsite
|
|
5
7
|
class Runner
|
6
8
|
def self.trap_interrupt
|
7
9
|
trap('INT') do
|
8
|
-
|
10
|
+
warn "\nExiting..."
|
9
11
|
exit!(1)
|
10
12
|
end
|
11
13
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -5,12 +7,14 @@ module ValidateWebsite
|
|
5
7
|
# Class for validation Static website
|
6
8
|
class Static < Core
|
7
9
|
CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
|
10
|
+
START_MESSAGE_TYPE = 'files'
|
8
11
|
|
9
12
|
attr_reader :history_count
|
10
13
|
|
11
14
|
def initialize(options = {}, validation_type = :static)
|
12
15
|
@history_count = 0
|
13
16
|
super
|
17
|
+
start_message("#{START_MESSAGE_TYPE} in #{Dir.pwd} (#{@site} as site)")
|
14
18
|
end
|
15
19
|
|
16
20
|
# @param [Hash] options
|
@@ -22,7 +26,8 @@ module ValidateWebsite
|
|
22
26
|
files = Dir.glob(@options[:pattern])
|
23
27
|
files.each do |file|
|
24
28
|
next unless File.file?(file)
|
25
|
-
next if @options[:exclude]
|
29
|
+
next if @options[:exclude]&.match(file)
|
30
|
+
|
26
31
|
@history_count += 1
|
27
32
|
check_static_file(file)
|
28
33
|
end
|
@@ -55,7 +60,10 @@ module ValidateWebsite
|
|
55
60
|
|
56
61
|
def check_page(file, page)
|
57
62
|
if page.html? && options[:markup]
|
58
|
-
|
63
|
+
keys = %i[ignore html5_validator]
|
64
|
+
# slice does not exists on Ruby <= 2.4
|
65
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
66
|
+
validate(page.doc, page.body, file, slice)
|
59
67
|
end
|
60
68
|
check_static_not_found(page.links) if options[:not_found]
|
61
69
|
end
|
@@ -66,11 +74,13 @@ module ValidateWebsite
|
|
66
74
|
static_links = links.map { |l| StaticLink.new(l, @site) }
|
67
75
|
static_links.each do |static_link|
|
68
76
|
next unless static_link.check?
|
77
|
+
|
69
78
|
unless File.exist?(static_link.file_path)
|
70
79
|
not_found_error(static_link.file_path)
|
71
80
|
next
|
72
81
|
end
|
73
82
|
next unless static_link.css?
|
83
|
+
|
74
84
|
check_static_not_found static_link.extract_urls_from_fake_css_response
|
75
85
|
end
|
76
86
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
require 'validate_website/static'
|
@@ -6,7 +8,7 @@ require 'spidr'
|
|
6
8
|
# rubocop:disable Metrics/BlockLength
|
7
9
|
StaticLink = Struct.new(:link, :site) do
|
8
10
|
def link_uri
|
9
|
-
@link_uri = URI.parse(
|
11
|
+
@link_uri = URI.parse(WEBrick::HTTPUtils.escape(link))
|
10
12
|
@link_uri = URI.join(site, @link_uri) if @link_uri.host.nil?
|
11
13
|
@link_uri
|
12
14
|
end
|
@@ -25,9 +27,9 @@ StaticLink = Struct.new(:link, :site) do
|
|
25
27
|
|
26
28
|
def body
|
27
29
|
if File.exist?(link)
|
28
|
-
open(link).read
|
30
|
+
File.open(link).read
|
29
31
|
else
|
30
|
-
open(file_path).read
|
32
|
+
File.open(file_path).read
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|