validate-website 1.9.1 → 1.11.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +44 -0
- data/LICENSE +1 -1
- data/README.md +8 -3
- data/Rakefile +3 -2
- data/bin/validate-website +1 -0
- data/bin/validate-website-static +1 -0
- data/lib/validate_website.rb +2 -0
- data/lib/validate_website/colorful_messages.rb +3 -0
- data/lib/validate_website/core.rb +11 -6
- data/lib/validate_website/crawl.rb +8 -2
- data/lib/validate_website/option_parser.rb +5 -1
- data/lib/validate_website/runner.rb +3 -1
- data/lib/validate_website/static.rb +11 -3
- data/lib/validate_website/static_link.rb +5 -3
- data/lib/validate_website/utils.rb +3 -0
- data/lib/validate_website/validator.rb +27 -28
- data/lib/validate_website/validator_class_methods.rb +3 -0
- data/lib/validate_website/version.rb +5 -1
- data/man/man1/validate-website-static.1 +3 -3
- data/man/man1/validate-website.1 +3 -3
- data/test/core_test.rb +3 -1
- data/test/crawler_test.rb +35 -16
- data/test/data/html5-fail.html +0 -337
- data/test/static_test.rb +25 -5
- data/test/test_helper.rb +4 -5
- data/test/validator_test.rb +50 -24
- data/test/webmock_helper.rb +4 -2
- metadata +32 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7208a9c728f7798aecc7a10bec8a3537940a6857e3eb4f6612efa551f2871a94
|
4
|
+
data.tar.gz: 25e5112b42c14a6ed86f0271fba1bb9ada63c1d92cc75414a227716289e241f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95962fe71e144cc5c8f244e1960556e56c9956bdcd5852e194532ee16b40dfbda5ba39db75169651ddd4048afc79311fdee2aa1087dddc3388aaa894c4dc27fe
|
7
|
+
data.tar.gz: 6a6e8185f8a359e0482c0ef5893b28d307a31be560a2e83bfd19f88f042cfbd3ec37e244f602e5ea4a6876afce9b8d2a38f98f645452168b752c03ee62fdc70d
|
data/History.md
CHANGED
@@ -1,4 +1,48 @@
|
|
1
1
|
|
2
|
+
1.11.1 / 2021-01-10
|
3
|
+
===================
|
4
|
+
|
5
|
+
* Add nokogumbo doc
|
6
|
+
* nokogumbo support for ruby engine
|
7
|
+
* Add support for nokogumbo
|
8
|
+
* Add jruby to github actions
|
9
|
+
* Merge pull request #24 from @marocchino / patch-1
|
10
|
+
* Some minor improvements
|
11
|
+
* Remove travis ci
|
12
|
+
* Init github actions ci
|
13
|
+
|
14
|
+
1.11.0 / 2021-01-08
|
15
|
+
===================
|
16
|
+
|
17
|
+
* Merge pull request #23 from @marocchino / ruby-3-support
|
18
|
+
* Use webrick's escape instead of encode_www_form_component
|
19
|
+
* Support ruby 3
|
20
|
+
* Fix doc for ValidateWebsite::Core initialize
|
21
|
+
* Switch to gitlab ci and remove 2.{3,4} support
|
22
|
+
* Update rubocop to 0.76.0
|
23
|
+
|
24
|
+
1.10.0 / 2020-07-03
|
25
|
+
==================
|
26
|
+
|
27
|
+
* Fix build for Ruby 2.3 and 2.4
|
28
|
+
* Remove rbx-3 from build
|
29
|
+
* Remove minitest-focus and fix minitest 6 warnings
|
30
|
+
* Fix html5_validator option and change html5_validator_service_url
|
31
|
+
* Add Ruby 2.7 to CI and update jruby
|
32
|
+
* Update rubocop and fix offences
|
33
|
+
* Remove Ruby 2.2 support and update rubocop
|
34
|
+
|
35
|
+
1.9.3 / 2019-04-11
|
36
|
+
==================
|
37
|
+
|
38
|
+
* Update tidy_ffi to 1.0
|
39
|
+
* Avoid testing tidy bug with js
|
40
|
+
|
41
|
+
1.9.2 / 2019-03-09
|
42
|
+
==================
|
43
|
+
|
44
|
+
* Load schema when needed instead of boot
|
45
|
+
|
2
46
|
1.9.1 / 2019-03-05
|
3
47
|
==================
|
4
48
|
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2009-
|
3
|
+
Copyright (c) 2009-2021 Laurent Arnoud <laurent@spkdev.net>
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -95,6 +95,11 @@ If the libtidy5 is found on your system this will be the default to validate
|
|
95
95
|
your html5 document. This does not depend on a tier service everything is done
|
96
96
|
locally.
|
97
97
|
|
98
|
+
#### Nokogumbo
|
99
|
+
|
100
|
+
Nokogumbo can validate html5 document without tier service but reports less
|
101
|
+
errors than tidy.
|
102
|
+
|
98
103
|
#### Validator.nu web service
|
99
104
|
|
100
105
|
When `--html5-validator nu` option is used HTML5 support is done by using the
|
@@ -145,12 +150,12 @@ See [GitHub](https://github.com/spk/validate-website/graphs/contributors).
|
|
145
150
|
|
146
151
|
The MIT License
|
147
152
|
|
148
|
-
Copyright (c) 2009-
|
153
|
+
Copyright (c) 2009-2021 Laurent Arnoud <laurent@spkdev.net>
|
149
154
|
|
150
155
|
---
|
151
|
-
[![Build](https://img.shields.io/
|
156
|
+
[![Build](https://img.shields.io/gitlab/pipeline/spkdev/validate-website/master)](https://gitlab.com/spkdev/validate-website/-/commits/master)
|
157
|
+
[![Coverage](https://gitlab.com/spkdev/validate-website/badges/master/coverage.svg)](https://gitlab.com/spkdev/validate-website/-/commits/master)
|
152
158
|
[![Version](https://img.shields.io/gem/v/validate-website.svg)](https://rubygems.org/gems/validate-website)
|
153
159
|
[![Documentation](https://img.shields.io/badge/doc-rubydoc-blue.svg)](http://www.rubydoc.info/gems/validate-website)
|
154
160
|
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](http://opensource.org/licenses/MIT "MIT")
|
155
|
-
[![Coverage Status](https://img.shields.io/coveralls/github/spk/validate-website.svg)](https://coveralls.io/github/spk/validate-website?branch=master)
|
156
161
|
[![Inline docs](https://inch-ci.org/github/spk/validate-website.svg?branch=master)](http://inch-ci.org/github/spk/validate-website)
|
data/Rakefile
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rake/testtask'
|
2
4
|
require 'rubocop/rake_task'
|
3
5
|
require 'asciidoctor'
|
4
6
|
|
5
|
-
default = %i[test]
|
6
|
-
default << :rubocop unless RUBY_ENGINE == 'rbx'
|
7
|
+
default = %i[test rubocop]
|
7
8
|
task default: default
|
8
9
|
|
9
10
|
desc 'Update manpage from asciidoc file'
|
data/bin/validate-website
CHANGED
data/bin/validate-website-static
CHANGED
data/lib/validate_website.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'webrick/cookie'
|
@@ -26,12 +28,13 @@ module ValidateWebsite
|
|
26
28
|
EXIT_FAILURE_MARKUP = 64
|
27
29
|
EXIT_FAILURE_NOT_FOUND = 65
|
28
30
|
EXIT_FAILURE_MARKUP_NOT_FOUND = 66
|
29
|
-
START_MESSAGE = 'Validating'
|
31
|
+
START_MESSAGE = 'Validating'
|
30
32
|
|
31
33
|
# Initialize core ValidateWebsite class
|
32
34
|
# @example
|
33
35
|
# new({ site: "https://example.com/" }, :crawl)
|
34
36
|
# @param [Hash] options
|
37
|
+
# @param [Symbol] validation_type `crawl` for web or `static` for local
|
35
38
|
# @return [NilClass]
|
36
39
|
def initialize(options, validation_type)
|
37
40
|
@not_founds_count = 0
|
@@ -43,11 +46,11 @@ module ValidateWebsite
|
|
43
46
|
end
|
44
47
|
|
45
48
|
def errors?
|
46
|
-
@errors_count
|
49
|
+
@errors_count.positive?
|
47
50
|
end
|
48
51
|
|
49
52
|
def not_founds?
|
50
|
-
@not_founds_count
|
53
|
+
@not_founds_count.positive?
|
51
54
|
end
|
52
55
|
|
53
56
|
def exit_status
|
@@ -78,6 +81,7 @@ module ValidateWebsite
|
|
78
81
|
def check_css_syntax(page)
|
79
82
|
nodes = Crass::Parser.parse_stylesheet(page.body)
|
80
83
|
return unless any_css_errors?(nodes)
|
84
|
+
|
81
85
|
handle_validation_error(page.url)
|
82
86
|
end
|
83
87
|
|
@@ -111,10 +115,10 @@ module ValidateWebsite
|
|
111
115
|
# @param [Nokogiri::HTML::Document] original_doc
|
112
116
|
# @param [String] The raw HTTP response body of the page
|
113
117
|
# @param [String] url
|
114
|
-
# @param [
|
118
|
+
# @param [Hash] Validator options
|
115
119
|
#
|
116
|
-
def validate(doc, body, url,
|
117
|
-
validator = Validator.new(doc, body,
|
120
|
+
def validate(doc, body, url, options)
|
121
|
+
validator = Validator.new(doc, body, **options)
|
118
122
|
if validator.valid?
|
119
123
|
print color(:success, '.', options[:color]) # rspec style
|
120
124
|
else
|
@@ -125,6 +129,7 @@ module ValidateWebsite
|
|
125
129
|
def handle_html_validation_error(validator, url)
|
126
130
|
handle_validation_error(url)
|
127
131
|
return unless options[:verbose]
|
132
|
+
|
128
133
|
puts color(:error, validator.errors.join(', '), options[:color])
|
129
134
|
end
|
130
135
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -41,9 +43,10 @@ module ValidateWebsite
|
|
41
43
|
#
|
42
44
|
def extract_imgs_from_page(page)
|
43
45
|
return Set[] if page.is_redirect?
|
46
|
+
|
44
47
|
page.doc.search('//img[@src]').reduce(Set[]) do |result, elem|
|
45
48
|
u = elem.attributes['src'].content
|
46
|
-
result << page.to_absolute(URI.parse(
|
49
|
+
result << page.to_absolute(URI.parse(WEBrick::HTTPUtils.escape(u)))
|
47
50
|
end
|
48
51
|
end
|
49
52
|
|
@@ -77,7 +80,10 @@ module ValidateWebsite
|
|
77
80
|
end
|
78
81
|
|
79
82
|
if validate?(page)
|
80
|
-
|
83
|
+
keys = %i[ignore html5_validator]
|
84
|
+
# slice does not exists on Ruby <= 2.4
|
85
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
86
|
+
validate(page.doc, page.body, page.url, slice)
|
81
87
|
end
|
82
88
|
end
|
83
89
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'slop'
|
4
|
+
require File.expand_path('version', __dir__)
|
2
5
|
|
3
6
|
module ValidateWebsite
|
4
7
|
# Internal class for parse command line args
|
@@ -28,6 +31,7 @@ module ValidateWebsite
|
|
28
31
|
# Generic parse method for crawl or static options
|
29
32
|
def self.parse(options, type)
|
30
33
|
raise ArgumentError unless VALID_TYPES.include?(type)
|
34
|
+
|
31
35
|
# We are in command line (ARGV)
|
32
36
|
if options.is_a?(Array)
|
33
37
|
send("command_line_parse_#{type}", options)
|
@@ -52,7 +56,7 @@ module ValidateWebsite
|
|
52
56
|
opt.regexp('-i', '--ignore',
|
53
57
|
'Validation errors to ignore (ex: "valign|autocorrect")')
|
54
58
|
opt.string('-x', '--html5-validator',
|
55
|
-
'Change default html5 validator engine (
|
59
|
+
'Change default html5 validator engine (tidy/nu/nokogumbo)',
|
56
60
|
default: DEFAULT_OPTIONS[:html5_validator])
|
57
61
|
opt.string('-5', '--html5-validator-service-url',
|
58
62
|
'Change default html5 validator service URL for "nu" engine')
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
|
3
5
|
module ValidateWebsite
|
@@ -5,7 +7,7 @@ module ValidateWebsite
|
|
5
7
|
class Runner
|
6
8
|
def self.trap_interrupt
|
7
9
|
trap('INT') do
|
8
|
-
|
10
|
+
warn "\nExiting..."
|
9
11
|
exit!(1)
|
10
12
|
end
|
11
13
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'validate_website/core'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
|
@@ -5,7 +7,7 @@ module ValidateWebsite
|
|
5
7
|
# Class for validation Static website
|
6
8
|
class Static < Core
|
7
9
|
CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
|
8
|
-
START_MESSAGE_TYPE = 'files'
|
10
|
+
START_MESSAGE_TYPE = 'files'
|
9
11
|
|
10
12
|
attr_reader :history_count
|
11
13
|
|
@@ -24,7 +26,8 @@ module ValidateWebsite
|
|
24
26
|
files = Dir.glob(@options[:pattern])
|
25
27
|
files.each do |file|
|
26
28
|
next unless File.file?(file)
|
27
|
-
next if @options[:exclude]
|
29
|
+
next if @options[:exclude]&.match(file)
|
30
|
+
|
28
31
|
@history_count += 1
|
29
32
|
check_static_file(file)
|
30
33
|
end
|
@@ -57,7 +60,10 @@ module ValidateWebsite
|
|
57
60
|
|
58
61
|
def check_page(file, page)
|
59
62
|
if page.html? && options[:markup]
|
60
|
-
|
63
|
+
keys = %i[ignore html5_validator]
|
64
|
+
# slice does not exists on Ruby <= 2.4
|
65
|
+
slice = Hash[[keys, options.values_at(*keys)].transpose]
|
66
|
+
validate(page.doc, page.body, file, slice)
|
61
67
|
end
|
62
68
|
check_static_not_found(page.links) if options[:not_found]
|
63
69
|
end
|
@@ -68,11 +74,13 @@ module ValidateWebsite
|
|
68
74
|
static_links = links.map { |l| StaticLink.new(l, @site) }
|
69
75
|
static_links.each do |static_link|
|
70
76
|
next unless static_link.check?
|
77
|
+
|
71
78
|
unless File.exist?(static_link.file_path)
|
72
79
|
not_found_error(static_link.file_path)
|
73
80
|
next
|
74
81
|
end
|
75
82
|
next unless static_link.css?
|
83
|
+
|
76
84
|
check_static_not_found static_link.extract_urls_from_fake_css_response
|
77
85
|
end
|
78
86
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
require 'validate_website/utils'
|
3
5
|
require 'validate_website/static'
|
@@ -6,7 +8,7 @@ require 'spidr'
|
|
6
8
|
# rubocop:disable Metrics/BlockLength
|
7
9
|
StaticLink = Struct.new(:link, :site) do
|
8
10
|
def link_uri
|
9
|
-
@link_uri = URI.parse(
|
11
|
+
@link_uri = URI.parse(WEBrick::HTTPUtils.escape(link))
|
10
12
|
@link_uri = URI.join(site, @link_uri) if @link_uri.host.nil?
|
11
13
|
@link_uri
|
12
14
|
end
|
@@ -25,9 +27,9 @@ StaticLink = Struct.new(:link, :site) do
|
|
25
27
|
|
26
28
|
def body
|
27
29
|
if File.exist?(link)
|
28
|
-
open(link).read
|
30
|
+
File.open(link).read
|
29
31
|
else
|
30
|
-
open(file_path).read
|
32
|
+
File.open(file_path).read
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Base module ValidateWebsite
|
2
4
|
module ValidateWebsite
|
3
5
|
# Utils class for CSS helpers
|
@@ -10,6 +12,7 @@ module ValidateWebsite
|
|
10
12
|
def self.extract_urls_from_css(page)
|
11
13
|
return Set[] unless page
|
12
14
|
return Set[] if page.body.nil?
|
15
|
+
|
13
16
|
nodes = Crass::Parser.parse_stylesheet(page.body)
|
14
17
|
extract_urls_from_nodes nodes, page
|
15
18
|
end
|
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
|
3
|
-
require '
|
5
|
+
require 'nokogumbo' unless ValidateWebsite.jruby?
|
4
6
|
require 'w3c_validators'
|
5
7
|
|
6
8
|
require 'validate_website/validator_class_methods'
|
@@ -10,27 +12,25 @@ module ValidateWebsite
|
|
10
12
|
class Validator
|
11
13
|
extend ValidatorClassMethods
|
12
14
|
|
13
|
-
@html5_validator_service_url = 'https://
|
15
|
+
@html5_validator_service_url = 'https://validator.nu/'
|
16
|
+
XHTML_SCHEMA_PATH = File.expand_path('../../data/schemas', __dir__)
|
17
|
+
@mutex = Mutex.new
|
18
|
+
|
14
19
|
class << self
|
15
20
|
attr_accessor :html5_validator_service_url
|
16
|
-
end
|
17
|
-
|
18
|
-
XHTML_PATH = File.expand_path('../../data/schemas', __dir__)
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
begin
|
29
|
-
@xsd_schemas[schema_name] = Nokogiri::XML::Schema(schema_content)
|
30
|
-
rescue Nokogiri::XML::SyntaxError
|
31
|
-
STDERR.puts "XSD SCHEMA: #{schema} cannot be loaded"
|
22
|
+
# http://www.w3.org/TR/xhtml1-schema/
|
23
|
+
def schema(namespace)
|
24
|
+
@mutex.synchronize do
|
25
|
+
Dir.chdir(XHTML_SCHEMA_PATH) do
|
26
|
+
if File.exist?("#{namespace}.xsd")
|
27
|
+
Nokogiri::XML::Schema(File.read("#{namespace}.xsd"))
|
28
|
+
end
|
29
|
+
end
|
32
30
|
end
|
33
31
|
end
|
32
|
+
|
33
|
+
alias xsd schema
|
34
34
|
end
|
35
35
|
|
36
36
|
attr_reader :original_doc, :body, :dtd, :doc, :namespace, :html5_validator
|
@@ -65,25 +65,22 @@ module ValidateWebsite
|
|
65
65
|
@ignore ? @errors.reject { |e| @ignore =~ e } : @errors
|
66
66
|
end
|
67
67
|
|
68
|
-
# http://www.w3.org/TR/xhtml1-schema/
|
69
|
-
def self.xsd(namespace)
|
70
|
-
return unless namespace
|
71
|
-
@xsd_schemas[namespace] if @xsd_schemas.key? namespace
|
72
|
-
end
|
73
|
-
|
74
68
|
private
|
75
69
|
|
76
70
|
# http://www.w3.org/TR/xhtml1/#dtds
|
77
71
|
def find_namespace(dtd)
|
78
72
|
return unless dtd.system_id
|
73
|
+
|
79
74
|
dtd_uri = URI.parse(dtd.system_id)
|
80
75
|
return unless dtd_uri.path
|
76
|
+
|
81
77
|
@dtd_uri = dtd_uri
|
82
78
|
File.basename(@dtd_uri.path, '.dtd')
|
83
79
|
end
|
84
80
|
|
85
81
|
def document
|
86
82
|
return @document if @document
|
83
|
+
|
87
84
|
@document = if @dtd_uri && @body.match(@dtd_uri.to_s)
|
88
85
|
@body.sub(@dtd_uri.to_s, @namespace + '.dtd')
|
89
86
|
else
|
@@ -95,11 +92,11 @@ module ValidateWebsite
|
|
95
92
|
def validate
|
96
93
|
if document =~ /^\<!DOCTYPE html\>/i
|
97
94
|
html5_validate
|
98
|
-
elsif self.class.
|
99
|
-
self.class.
|
95
|
+
elsif self.class.schema(@namespace)
|
96
|
+
self.class.schema(@namespace).validate(xhtml_doc)
|
100
97
|
else
|
101
98
|
# dont have xsd fall back to dtd
|
102
|
-
Dir.chdir(
|
99
|
+
Dir.chdir(XHTML_SCHEMA_PATH) do
|
103
100
|
Nokogiri::HTML.parse(document)
|
104
101
|
end.errors
|
105
102
|
end
|
@@ -115,8 +112,10 @@ module ValidateWebsite
|
|
115
112
|
def html5_validate
|
116
113
|
if html5_validator.to_sym == :tidy && self.class.tidy
|
117
114
|
tidy_validate
|
118
|
-
|
115
|
+
elsif html5_validator.to_sym == :nu
|
119
116
|
nu_validate
|
117
|
+
else
|
118
|
+
Nokogiri::HTML5(document, max_errors: -1).errors
|
120
119
|
end
|
121
120
|
end
|
122
121
|
|
@@ -138,7 +137,7 @@ module ValidateWebsite
|
|
138
137
|
end
|
139
138
|
|
140
139
|
def xhtml_doc
|
141
|
-
Dir.chdir(
|
140
|
+
Dir.chdir(XHTML_SCHEMA_PATH) do
|
142
141
|
Nokogiri::XML(document) { |cfg| cfg.nonoent.dtdload.dtdvalid.nonet }
|
143
142
|
end
|
144
143
|
end
|