validate-website 1.9.1 → 1.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.md +44 -0
- data/LICENSE +1 -1
- data/README.md +8 -3
- data/Rakefile +3 -2
- data/bin/validate-website +1 -0
- data/bin/validate-website-static +1 -0
- data/lib/validate_website.rb +2 -0
- data/lib/validate_website/colorful_messages.rb +3 -0
- data/lib/validate_website/core.rb +11 -6
- data/lib/validate_website/crawl.rb +8 -2
- data/lib/validate_website/option_parser.rb +5 -1
- data/lib/validate_website/runner.rb +3 -1
- data/lib/validate_website/static.rb +11 -3
- data/lib/validate_website/static_link.rb +5 -3
- data/lib/validate_website/utils.rb +3 -0
- data/lib/validate_website/validator.rb +27 -28
- data/lib/validate_website/validator_class_methods.rb +3 -0
- data/lib/validate_website/version.rb +5 -1
- data/man/man1/validate-website-static.1 +3 -3
- data/man/man1/validate-website.1 +3 -3
- data/test/core_test.rb +3 -1
- data/test/crawler_test.rb +35 -16
- data/test/data/html5-fail.html +0 -337
- data/test/static_test.rb +25 -5
- data/test/test_helper.rb +4 -5
- data/test/validator_test.rb +50 -24
- data/test/webmock_helper.rb +4 -2
- metadata +32 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7208a9c728f7798aecc7a10bec8a3537940a6857e3eb4f6612efa551f2871a94
|
4
|
+
data.tar.gz: 25e5112b42c14a6ed86f0271fba1bb9ada63c1d92cc75414a227716289e241f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95962fe71e144cc5c8f244e1960556e56c9956bdcd5852e194532ee16b40dfbda5ba39db75169651ddd4048afc79311fdee2aa1087dddc3388aaa894c4dc27fe
|
7
|
+
data.tar.gz: 6a6e8185f8a359e0482c0ef5893b28d307a31be560a2e83bfd19f88f042cfbd3ec37e244f602e5ea4a6876afce9b8d2a38f98f645452168b752c03ee62fdc70d
|
data/History.md
CHANGED
@@ -1,4 +1,48 @@
|
|
1
1
|
|
2
|
+
1.11.1 / 2021-01-10
|
3
|
+
===================
|
4
|
+
|
5
|
+
* Add nokogumbo doc
|
6
|
+
* nokogumbo support for ruby engine
|
7
|
+
* Add support for nokogumbo
|
8
|
+
* Add jruby to github actions
|
9
|
+
* Merge pull request #24 from @marocchino / patch-1
|
10
|
+
* Some minor improvements
|
11
|
+
* Remove travis ci
|
12
|
+
* Init github actions ci
|
13
|
+
|
14
|
+
1.11.0 / 2021-01-08
|
15
|
+
===================
|
16
|
+
|
17
|
+
* Merge pull request #23 from @marocchino / ruby-3-support
|
18
|
+
* Use webrick's escape instead of encode_www_form_component
|
19
|
+
* Support ruby 3
|
20
|
+
* Fix doc for ValidateWebsite::Core initialize
|
21
|
+
* Switch to gitlab ci and remove 2.{3,4} support
|
22
|
+
* Update rubocop to 0.76.0
|
23
|
+
|
24
|
+
1.10.0 / 2020-07-03
|
25
|
+
==================
|
26
|
+
|
27
|
+
* Fix build for Ruby 2.3 and 2.4
|
28
|
+
* Remove rbx-3 from build
|
29
|
+
* Remove minitest-focus and fix minitest 6 warnings
|
30
|
+
* Fix html5_validator option and change html5_validator_service_url
|
31
|
+
* Add Ruby 2.7 to CI and update jruby
|
32
|
+
* Update rubocop and fix offences
|
33
|
+
* Remove Ruby 2.2 support and update rubocop
|
34
|
+
|
35
|
+
1.9.3 / 2019-04-11
|
36
|
+
==================
|
37
|
+
|
38
|
+
* Update tidy_ffi to 1.0
|
39
|
+
* Avoid testing tidy bug with js
|
40
|
+
|
41
|
+
1.9.2 / 2019-03-09
|
42
|
+
==================
|
43
|
+
|
44
|
+
* Load schema when needed instead of boot
|
45
|
+
|
2
46
|
1.9.1 / 2019-03-05
|
3
47
|
==================
|
4
48
|
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2009-
|
3
|
+
Copyright (c) 2009-2021 Laurent Arnoud <laurent@spkdev.net>
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -95,6 +95,11 @@ If the libtidy5 is found on your system this will be the default to validate
|
|
95
95
|
your html5 document. This does not depend on a tier service everything is done
|
96
96
|
locally.
|
97
97
|
|
98
|
+
#### Nokogumbo
|
99
|
+
|
100
|
+
Nokogumbo can validate html5 document without tier service but reports less
|
101
|
+
errors than tidy.
|
102
|
+
|
98
103
|
#### Validator.nu web service
|
99
104
|
|
100
105
|
When `--html5-validator nu` option is used HTML5 support is done by using the
|
@@ -145,12 +150,12 @@ See [GitHub](https://github.com/spk/validate-website/graphs/contributors).
|
|
145
150
|
|
146
151
|
The MIT License
|
147
152
|
|
148
|
-
Copyright (c) 2009-
|
153
|
+
Copyright (c) 2009-2021 Laurent Arnoud <laurent@spkdev.net>
|
149
154
|
|
150
155
|
---
|
151
|
-
[](https://gitlab.com/spkdev/validate-website/-/commits/master)
|
157
|
+
[](https://gitlab.com/spkdev/validate-website/-/commits/master)
|
152
158
|
[](https://rubygems.org/gems/validate-website)
|
153
159
|
[](http://www.rubydoc.info/gems/validate-website)
|
154
160
|
[](http://opensource.org/licenses/MIT "MIT")
|
155
|
-
[](https://coveralls.io/github/spk/validate-website?branch=master)
|
156
161
|
[](http://inch-ci.org/github/spk/validate-website)
|
data/Rakefile
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rake/testtask'
|
2
4
|
require 'rubocop/rake_task'
|
3
5
|
require 'asciidoctor'
|
4
6
|
|
5
|
-
default = %i[test]
|
6
|
-
default << :rubocop unless RUBY_ENGINE == 'rbx'
|
7
|
+
default = %i[test rubocop]
|
7
8
|
task default: default
|
8
9
|
|
9
10
|
desc 'Update manpage from asciidoc file'
|
data/bin/validate-website
CHANGED
data/bin/validate-website-static
CHANGED
data/lib/validate_website.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'webrick/cookie'
|
@@ -26,12 +28,13 @@ module ValidateWebsite
|
|
26
28
|
EXIT_FAILURE_MARKUP = 64
|
27
29
|
EXIT_FAILURE_NOT_FOUND = 65
|
28
30
|
EXIT_FAILURE_MARKUP_NOT_FOUND = 66
|
29
|
-
START_MESSAGE = 'Validating'
|
31
|
+
START_MESSAGE = 'Validating'
|
30
32
|
|
31
33
|
# Initialize core ValidateWebsite class
|
32
34
|
# @example
|
33
35
|
# new({ site: "https://example.com/" }, :crawl)
|
34
36
|
# @param [Hash] options
|
37
|
+
# @param [Symbol] validation_type `crawl` for web or `static` for local
|
35
38
|
# @return [NilClass]
|
36
39
|
def initialize(options, validation_type)
|
37
40
|
@not_founds_count = 0
|
@@ -43,11 +46,11 @@ module ValidateWebsite
|
|
43
46
|
end
|
44
47
|
|
45
48
|
def errors?
|
46
|
-
@errors_count
|
49
|
+
@errors_count.positive?
|
47
50
|
end
|
48
51
|
|
49
52
|
def not_founds?
|
50
|
-
@not_founds_count
|
53
|
+
@not_founds_count.positive?
|
51
54
|
end
|
52
55
|
|
53
56
|
def exit_status
|
@@ -78,6 +81,7 @@ module ValidateWebsite
|
|
78
81
|
def check_css_syntax(page)
|
79
82
|
nodes = Crass::Parser.parse_stylesheet(page.body)
|
80
83
|
return unless any_css_errors?(nodes)
|
84
|
+
|
81
85
|
handle_validation_error(page.url)
|
82
86
|
end
|
83
87
|
|
@@ -111,10 +115,10 @@ module ValidateWebsite
|
|
111
115
|
# @param [Nokogiri::HTML::Document] original_doc
|
112
116
|
# @param [String] The raw HTTP response body of the page
|
113
117
|
# @param [String] url
|
114
|
-
# @param [
|
118
|
+
# @param [Hash] Validator options
|
115
119
|
#
|
116
|
-
def validate(doc, body, url,
|
117
|
-
validator = Validator.new(doc, body,
|
120
|
+
def validate(doc, body, url, options)
|
121
|
+
validator = Validator.new(doc, body, **options)
|
118
122
|
if validator.valid?
|
119
123
|
print color(:success, '.', options[:color]) # rspec style
|
120
124
|
else
|
@@ -125,6 +129,7 @@ module ValidateWebsite
|
|
125
129
|
def handle_html_validation_error(validator, url)
|
126
130
|
handle_validation_error(url)
|
127
131
|
return unless options[:verbose]
|
132
|
+
|
128
133
|
puts color(:error, validator.errors.join(', '), options[:color])
|
129
134
|
end
|
130
135
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -41,9 +43,10 @@ module ValidateWebsite
|
|
41
43
|
#
|
42
44
|
def extract_imgs_from_page(page)
|
43
45
|
return Set[] if page.is_redirect?
|
46
|
+
|
44
47
|
page.doc.search('//img[@src]').reduce(Set[]) do |result, elem|
|
45
48
|
u = elem.attributes['src'].content
|
46
|
-
result << page.to_absolute(URI.parse(
|
49
|
+
result << page.to_absolute(URI.parse(WEBrick::HTTPUtils.escape(u)))
|
47
50
|
end
|
48
51
|
end
|
49
52
|
|
@@ -77,7 +80,10 @@ module ValidateWebsite
|
|
77
80
|
end
|
78
81
|
|
79
82
|
if validate?(page)
|
80
|
-
|
83
|
+
keys = %i[ignore html5_validator]
|
84
|
+
# slice does not exists on Ruby <= 2.4
|
85
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
86
|
+
validate(page.doc, page.body, page.url, slice)
|
81
87
|
end
|
82
88
|
end
|
83
89
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'slop'
|
4
|
+
require File.expand_path('version', __dir__)
|
2
5
|
|
3
6
|
module ValidateWebsite
|
4
7
|
# Internal class for parse command line args
|
@@ -28,6 +31,7 @@ module ValidateWebsite
|
|
28
31
|
# Generic parse method for crawl or static options
|
29
32
|
def self.parse(options, type)
|
30
33
|
raise ArgumentError unless VALID_TYPES.include?(type)
|
34
|
+
|
31
35
|
# We are in command line (ARGV)
|
32
36
|
if options.is_a?(Array)
|
33
37
|
send("command_line_parse_#{type}", options)
|
@@ -52,7 +56,7 @@ module ValidateWebsite
|
|
52
56
|
opt.regexp('-i', '--ignore',
|
53
57
|
'Validation errors to ignore (ex: "valign|autocorrect")')
|
54
58
|
opt.string('-x', '--html5-validator',
|
55
|
-
'Change default html5 validator engine (
|
59
|
+
'Change default html5 validator engine (tidy/nu/nokogumbo)',
|
56
60
|
default: DEFAULT_OPTIONS[:html5_validator])
|
57
61
|
opt.string('-5', '--html5-validator-service-url',
|
58
62
|
'Change default html5 validator service URL for "nu" engine')
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
|
3
5
|
module ValidateWebsite
|
@@ -5,7 +7,7 @@ module ValidateWebsite
|
|
5
7
|
class Runner
|
6
8
|
def self.trap_interrupt
|
7
9
|
trap('INT') do
|
8
|
-
|
10
|
+
warn "\nExiting..."
|
9
11
|
exit!(1)
|
10
12
|
end
|
11
13
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -5,7 +7,7 @@ module ValidateWebsite
|
|
5
7
|
# Class for validation Static website
|
6
8
|
class Static < Core
|
7
9
|
CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
|
8
|
-
START_MESSAGE_TYPE = 'files'
|
10
|
+
START_MESSAGE_TYPE = 'files'
|
9
11
|
|
10
12
|
attr_reader :history_count
|
11
13
|
|
@@ -24,7 +26,8 @@ module ValidateWebsite
|
|
24
26
|
files = Dir.glob(@options[:pattern])
|
25
27
|
files.each do |file|
|
26
28
|
next unless File.file?(file)
|
27
|
-
next if @options[:exclude]
|
29
|
+
next if @options[:exclude]&.match(file)
|
30
|
+
|
28
31
|
@history_count += 1
|
29
32
|
check_static_file(file)
|
30
33
|
end
|
@@ -57,7 +60,10 @@ module ValidateWebsite
|
|
57
60
|
|
58
61
|
def check_page(file, page)
|
59
62
|
if page.html? && options[:markup]
|
60
|
-
|
63
|
+
keys = %i[ignore html5_validator]
|
64
|
+
# slice does not exists on Ruby <= 2.4
|
65
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
66
|
+
validate(page.doc, page.body, file, slice)
|
61
67
|
end
|
62
68
|
check_static_not_found(page.links) if options[:not_found]
|
63
69
|
end
|
@@ -68,11 +74,13 @@ module ValidateWebsite
|
|
68
74
|
static_links = links.map { |l| StaticLink.new(l, @site) }
|
69
75
|
static_links.each do |static_link|
|
70
76
|
next unless static_link.check?
|
77
|
+
|
71
78
|
unless File.exist?(static_link.file_path)
|
72
79
|
not_found_error(static_link.file_path)
|
73
80
|
next
|
74
81
|
end
|
75
82
|
next unless static_link.css?
|
83
|
+
|
76
84
|
check_static_not_found static_link.extract_urls_from_fake_css_response
|
77
85
|
end
|
78
86
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
require 'validate_website/static'
|
@@ -6,7 +8,7 @@ require 'spidr'
|
|
6
8
|
# rubocop:disable Metrics/BlockLength
|
7
9
|
StaticLink = Struct.new(:link, :site) do
|
8
10
|
def link_uri
|
9
|
-
@link_uri = URI.parse(
|
11
|
+
@link_uri = URI.parse(WEBrick::HTTPUtils.escape(link))
|
10
12
|
@link_uri = URI.join(site, @link_uri) if @link_uri.host.nil?
|
11
13
|
@link_uri
|
12
14
|
end
|
@@ -25,9 +27,9 @@ StaticLink = Struct.new(:link, :site) do
|
|
25
27
|
|
26
28
|
def body
|
27
29
|
if File.exist?(link)
|
28
|
-
open(link).read
|
30
|
+
File.open(link).read
|
29
31
|
else
|
30
|
-
open(file_path).read
|
32
|
+
File.open(file_path).read
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Base module ValidateWebsite
|
2
4
|
module ValidateWebsite
|
3
5
|
# Utils class for CSS helpers
|
@@ -10,6 +12,7 @@ module ValidateWebsite
|
|
10
12
|
def self.extract_urls_from_css(page)
|
11
13
|
return Set[] unless page
|
12
14
|
return Set[] if page.body.nil?
|
15
|
+
|
13
16
|
nodes = Crass::Parser.parse_stylesheet(page.body)
|
14
17
|
extract_urls_from_nodes nodes, page
|
15
18
|
end
|
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
|
3
|
-
require '
|
5
|
+
require 'nokogumbo' unless ValidateWebsite.jruby?
|
4
6
|
require 'w3c_validators'
|
5
7
|
|
6
8
|
require 'validate_website/validator_class_methods'
|
@@ -10,27 +12,25 @@ module ValidateWebsite
|
|
10
12
|
class Validator
|
11
13
|
extend ValidatorClassMethods
|
12
14
|
|
13
|
-
@html5_validator_service_url = 'https://
|
15
|
+
@html5_validator_service_url = 'https://validator.nu/'
|
16
|
+
XHTML_SCHEMA_PATH = File.expand_path('../../data/schemas', __dir__)
|
17
|
+
@mutex = Mutex.new
|
18
|
+
|
14
19
|
class << self
|
15
20
|
attr_accessor :html5_validator_service_url
|
16
|
-
end
|
17
|
-
|
18
|
-
XHTML_PATH = File.expand_path('../../data/schemas', __dir__)
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
begin
|
29
|
-
@xsd_schemas[schema_name] = Nokogiri::XML::Schema(schema_content)
|
30
|
-
rescue Nokogiri::XML::SyntaxError
|
31
|
-
STDERR.puts "XSD SCHEMA: #{schema} cannot be loaded"
|
22
|
+
# http://www.w3.org/TR/xhtml1-schema/
|
23
|
+
def schema(namespace)
|
24
|
+
@mutex.synchronize do
|
25
|
+
Dir.chdir(XHTML_SCHEMA_PATH) do
|
26
|
+
if File.exist?("#{namespace}.xsd")
|
27
|
+
Nokogiri::XML::Schema(File.read("#{namespace}.xsd"))
|
28
|
+
end
|
29
|
+
end
|
32
30
|
end
|
33
31
|
end
|
32
|
+
|
33
|
+
alias xsd schema
|
34
34
|
end
|
35
35
|
|
36
36
|
attr_reader :original_doc, :body, :dtd, :doc, :namespace, :html5_validator
|
@@ -65,25 +65,22 @@ module ValidateWebsite
|
|
65
65
|
@ignore ? @errors.reject { |e| @ignore =~ e } : @errors
|
66
66
|
end
|
67
67
|
|
68
|
-
# http://www.w3.org/TR/xhtml1-schema/
|
69
|
-
def self.xsd(namespace)
|
70
|
-
return unless namespace
|
71
|
-
@xsd_schemas[namespace] if @xsd_schemas.key? namespace
|
72
|
-
end
|
73
|
-
|
74
68
|
private
|
75
69
|
|
76
70
|
# http://www.w3.org/TR/xhtml1/#dtds
|
77
71
|
def find_namespace(dtd)
|
78
72
|
return unless dtd.system_id
|
73
|
+
|
79
74
|
dtd_uri = URI.parse(dtd.system_id)
|
80
75
|
return unless dtd_uri.path
|
76
|
+
|
81
77
|
@dtd_uri = dtd_uri
|
82
78
|
File.basename(@dtd_uri.path, '.dtd')
|
83
79
|
end
|
84
80
|
|
85
81
|
def document
|
86
82
|
return @document if @document
|
83
|
+
|
87
84
|
@document = if @dtd_uri && @body.match(@dtd_uri.to_s)
|
88
85
|
@body.sub(@dtd_uri.to_s, @namespace + '.dtd')
|
89
86
|
else
|
@@ -95,11 +92,11 @@ module ValidateWebsite
|
|
95
92
|
def validate
|
96
93
|
if document =~ /^\<!DOCTYPE html\>/i
|
97
94
|
html5_validate
|
98
|
-
elsif self.class.
|
99
|
-
self.class.
|
95
|
+
elsif self.class.schema(@namespace)
|
96
|
+
self.class.schema(@namespace).validate(xhtml_doc)
|
100
97
|
else
|
101
98
|
# dont have xsd fall back to dtd
|
102
|
-
Dir.chdir(
|
99
|
+
Dir.chdir(XHTML_SCHEMA_PATH) do
|
103
100
|
Nokogiri::HTML.parse(document)
|
104
101
|
end.errors
|
105
102
|
end
|
@@ -115,8 +112,10 @@ module ValidateWebsite
|
|
115
112
|
def html5_validate
|
116
113
|
if html5_validator.to_sym == :tidy && self.class.tidy
|
117
114
|
tidy_validate
|
118
|
-
|
115
|
+
elsif html5_validator.to_sym == :nu
|
119
116
|
nu_validate
|
117
|
+
else
|
118
|
+
Nokogiri::HTML5(document, max_errors: -1).errors
|
120
119
|
end
|
121
120
|
end
|
122
121
|
|
@@ -138,7 +137,7 @@ module ValidateWebsite
|
|
138
137
|
end
|
139
138
|
|
140
139
|
def xhtml_doc
|
141
|
-
Dir.chdir(
|
140
|
+
Dir.chdir(XHTML_SCHEMA_PATH) do
|
142
141
|
Nokogiri::XML(document) { |cfg| cfg.nonoent.dtdload.dtdvalid.nonet }
|
143
142
|
end
|
144
143
|
end
|