validate-website 1.1.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +7 -7
- data/lib/validate_website/core.rb +26 -16
- data/lib/validate_website/crawl.rb +4 -3
- data/lib/validate_website/option_parser.rb +10 -2
- data/lib/validate_website/static.rb +42 -13
- data/lib/validate_website/utils.rb +48 -0
- data/lib/validate_website/version.rb +2 -1
- data/lib/validate_website.rb +0 -1
- data/man/man1/validate-website-static.1 +7 -2
- data/man/man1/validate-website.1 +7 -2
- metadata +18 -24
- data/spec/core_spec.rb +0 -10
- data/spec/crawler_spec.rb +0 -91
- data/spec/data/assets/application-92f19110a9d47a56d2ebe744e15af301.css +0 -1
- data/spec/data/html4-strict.html +0 -239
- data/spec/data/html5-linuxfr.html +0 -1286
- data/spec/data/html5.html +0 -46
- data/spec/data/news/ryzom-naissance-du-projet-libre-ryzom-forge.md +0 -0
- data/spec/data/validator.nu-excessive.html +0 -118
- data/spec/data/validator.nu-failure.html +0 -10
- data/spec/data/validator.nu-success.html +0 -2
- data/spec/data/w3.org-xhtml1-strict-errors.html +0 -544
- data/spec/data/xhtml1-strict.html +0 -22
- data/spec/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html +0 -22
- data/spec/spec_helper.rb +0 -10
- data/spec/static_spec.rb +0 -38
- data/spec/validator_spec.rb +0 -137
- data/spec/webmock_helper.rb +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd93ea8bbe188ac4cb33752c93a971f75106971e
|
4
|
+
data.tar.gz: 9322f70f9cd7433c35d201f0d042117ff2974147
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1d587ebf07f0f3f93ee4f7ebfd96112919c435eeddda17874c76423ee0e1500865a0afe0f5112c459cd900023445e818d347380800c306c0d398ed9369de710
|
7
|
+
data.tar.gz: 818c8b7f437a4afb0482914fd799e0e0701013ee9b772db56abe3e3aa2259f08cba9444fff65df278b10a8e4144aa08021e38515419ccfebe832c717d22fa63b
|
data/Rakefile
CHANGED
@@ -1,13 +1,8 @@
|
|
1
|
-
require 'rdoc/task'
|
2
1
|
require 'rake/testtask'
|
2
|
+
require 'rubocop/rake_task'
|
3
3
|
|
4
4
|
task default: [:test]
|
5
5
|
|
6
|
-
RDoc::Task.new do |rd|
|
7
|
-
rd.main = 'README.rdoc'
|
8
|
-
rd.rdoc_files.include('README.rdoc', 'lib/**/*.rb')
|
9
|
-
end
|
10
|
-
|
11
6
|
# install asciidoc libxml2-utils xmlto docbook-xsl docbook-xml
|
12
7
|
desc 'Update manpage from asciidoc file'
|
13
8
|
task :manpage do
|
@@ -15,6 +10,11 @@ task :manpage do
|
|
15
10
|
end
|
16
11
|
|
17
12
|
Rake::TestTask.new do |t|
|
18
|
-
t.pattern = '
|
13
|
+
t.pattern = 'test/**/*_test.rb'
|
19
14
|
end
|
20
15
|
task spec: :test
|
16
|
+
|
17
|
+
desc 'Execute rubocop'
|
18
|
+
RuboCop::RakeTask.new(:rubocop) do |t|
|
19
|
+
t.options = ['-D'] # display cop name
|
20
|
+
end
|
@@ -7,6 +7,7 @@ require 'validate_website/validator'
|
|
7
7
|
require 'validate_website/colorful_messages'
|
8
8
|
|
9
9
|
require 'spidr'
|
10
|
+
require 'crass'
|
10
11
|
|
11
12
|
# Base module ValidateWebsite
|
12
13
|
module ValidateWebsite
|
@@ -62,20 +63,25 @@ module ValidateWebsite
|
|
62
63
|
end
|
63
64
|
end
|
64
65
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
page.body.scan(%r{url\((['".\/\w-]+)\)}).reduce(Set[]) do |result, url|
|
72
|
-
url = url.first.gsub("'", '').gsub('"', '')
|
73
|
-
abs = page.to_absolute(url)
|
74
|
-
result << abs.to_s
|
75
|
-
end
|
66
|
+
private
|
67
|
+
|
68
|
+
def check_css_syntax(page)
|
69
|
+
nodes = Crass::Parser.parse_stylesheet(page.body)
|
70
|
+
return unless any_css_errors?(nodes)
|
71
|
+
handle_validation_error(page.url)
|
76
72
|
end
|
77
73
|
|
78
|
-
|
74
|
+
def any_css_errors?(nodes)
|
75
|
+
nodes.any? do |node|
|
76
|
+
if node[:children]
|
77
|
+
any_css_errors? node.delete(:children)
|
78
|
+
elsif node[:tokens]
|
79
|
+
any_css_errors? node.delete(:tokens)
|
80
|
+
else
|
81
|
+
node[:node] == :error || node[:error] == true
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
79
85
|
|
80
86
|
def print_status_line(total, failures, not_founds, errors)
|
81
87
|
puts "\n\n"
|
@@ -102,16 +108,20 @@ module ValidateWebsite
|
|
102
108
|
if validator.valid?
|
103
109
|
print color(:success, '.', options[:color]) # rspec style
|
104
110
|
else
|
105
|
-
|
111
|
+
handle_html_validation_error(validator, url)
|
106
112
|
end
|
107
113
|
end
|
108
114
|
|
109
|
-
def
|
115
|
+
def handle_html_validation_error(validator, url)
|
116
|
+
handle_validation_error(url)
|
117
|
+
return unless options[:verbose]
|
118
|
+
puts color(:error, validator.errors.join(', '), options[:color])
|
119
|
+
end
|
120
|
+
|
121
|
+
def handle_validation_error(url)
|
110
122
|
@errors_count += 1
|
111
123
|
puts "\n"
|
112
124
|
puts color(:error, "* #{url}", options[:color])
|
113
|
-
return unless options[:verbose]
|
114
|
-
puts color(:error, validator.errors.join(', '), options[:color])
|
115
125
|
end
|
116
126
|
end
|
117
127
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'validate_website/core'
|
2
|
+
require 'validate_website/utils'
|
2
3
|
|
3
4
|
module ValidateWebsite
|
4
5
|
# Class for http website validation
|
@@ -44,13 +45,14 @@ module ValidateWebsite
|
|
44
45
|
crawler.cookies[@host] = default_cookies if options[:cookies]
|
45
46
|
on_every_css_page(crawler)
|
46
47
|
on_every_html_page(crawler)
|
47
|
-
on_every_failed_url(crawler)
|
48
|
+
on_every_failed_url(crawler) if options[:not_found]
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
51
52
|
def on_every_css_page(crawler)
|
52
53
|
crawler.every_css_page do |page|
|
53
|
-
|
54
|
+
check_css_syntax(page) if options[:css_syntax]
|
55
|
+
ValidateWebsite::Utils.extract_urls_from_css(page).each do |u|
|
54
56
|
crawler.enqueue(u)
|
55
57
|
end
|
56
58
|
end
|
@@ -69,7 +71,6 @@ module ValidateWebsite
|
|
69
71
|
end
|
70
72
|
|
71
73
|
def on_every_failed_url(crawler)
|
72
|
-
return unless options[:not_found]
|
73
74
|
crawler.every_failed_url do |url|
|
74
75
|
not_found_error(url)
|
75
76
|
end
|
@@ -12,6 +12,7 @@ module ValidateWebsite
|
|
12
12
|
exclude: nil,
|
13
13
|
user_agent: nil,
|
14
14
|
markup: true,
|
15
|
+
css_syntax: false,
|
15
16
|
# crawler: log not found url (404 status code)
|
16
17
|
# static: log not found url (not on filesystem, `pwd` considered
|
17
18
|
# as root " / ")
|
@@ -39,6 +40,7 @@ module ValidateWebsite
|
|
39
40
|
def self.default_args
|
40
41
|
Slop.parse do |o|
|
41
42
|
yield o if block_given?
|
43
|
+
markup_syntax(o)
|
42
44
|
boolean_options(o)
|
43
45
|
o.regexp('-i', '--ignore',
|
44
46
|
'Validation errors to ignore (ex: "valign|autocorrect")')
|
@@ -48,10 +50,16 @@ module ValidateWebsite
|
|
48
50
|
end
|
49
51
|
end
|
50
52
|
|
51
|
-
def self.
|
53
|
+
def self.markup_syntax(o)
|
52
54
|
o.bool('-m', '--markup',
|
53
55
|
"Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
|
54
56
|
default: DEFAULT_OPTIONS[:markup])
|
57
|
+
o.bool('--css-syntax',
|
58
|
+
"Css validation (default: #{DEFAULT_OPTIONS[:css_syntax]})",
|
59
|
+
default: DEFAULT_OPTIONS[:css_syntax])
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.boolean_options(o)
|
55
63
|
o.bool('-n', '--not-found',
|
56
64
|
"Log not found url (default: #{DEFAULT_OPTIONS[:not_found]})",
|
57
65
|
default: DEFAULT_OPTIONS[:not_found])
|
@@ -94,7 +102,7 @@ module ValidateWebsite
|
|
94
102
|
o.string('-s', '--site',
|
95
103
|
"Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
|
96
104
|
default: DEFAULT_OPTIONS[:site])
|
97
|
-
o.
|
105
|
+
o.string('-p', '--pattern',
|
98
106
|
"Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
|
99
107
|
default: DEFAULT_OPTIONS[:pattern])
|
100
108
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'validate_website/core'
|
2
|
+
require 'validate_website/utils'
|
2
3
|
|
3
4
|
module ValidateWebsite
|
4
5
|
# Class for validation Static website
|
@@ -25,15 +26,17 @@ module ValidateWebsite
|
|
25
26
|
|
26
27
|
private
|
27
28
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
29
|
+
def check_static_file(f)
|
30
|
+
page = StaticLink.new(f, @site).page
|
31
|
+
check_page(f, page)
|
32
|
+
check_css_syntax(page) if page.css? && options[:css_syntax]
|
31
33
|
end
|
32
34
|
|
33
|
-
def
|
34
|
-
page
|
35
|
-
|
36
|
-
|
35
|
+
def check_page(f, page)
|
36
|
+
if page.html? && options[:markup]
|
37
|
+
validate(page.doc, page.body, f, options[:ignore])
|
38
|
+
end
|
39
|
+
check_static_not_found(page.links) if options[:not_found]
|
37
40
|
end
|
38
41
|
|
39
42
|
StaticLink = Struct.new(:link, :site) do
|
@@ -47,12 +50,34 @@ module ValidateWebsite
|
|
47
50
|
URI.parse(site).host == link_uri.host
|
48
51
|
end
|
49
52
|
|
53
|
+
def content_types
|
54
|
+
if css?
|
55
|
+
['text/css']
|
56
|
+
else
|
57
|
+
CONTENT_TYPES
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def body
|
62
|
+
if File.exist?(link)
|
63
|
+
open(link).read
|
64
|
+
else
|
65
|
+
open(file_path).read
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def response
|
70
|
+
@response ||= ValidateWebsite::Static.fake_httpresponse(
|
71
|
+
body,
|
72
|
+
content_types)
|
73
|
+
end
|
74
|
+
|
75
|
+
def page
|
76
|
+
@page ||= Spidr::Page.new(link_uri, response)
|
77
|
+
end
|
78
|
+
|
50
79
|
def extract_urls_from_fake_css_response
|
51
|
-
|
52
|
-
open(file_path).read,
|
53
|
-
['text/css'])
|
54
|
-
css_page = Spidr::Page.new(link_uri, response)
|
55
|
-
ValidateWebsite::Core.extract_urls_from_css(css_page)
|
80
|
+
ValidateWebsite::Utils.extract_urls_from_css(page)
|
56
81
|
end
|
57
82
|
|
58
83
|
def file_path
|
@@ -65,6 +90,10 @@ module ValidateWebsite
|
|
65
90
|
@extname ||= File.extname(file_path)
|
66
91
|
end
|
67
92
|
|
93
|
+
def css?
|
94
|
+
extname == '.css'
|
95
|
+
end
|
96
|
+
|
68
97
|
def check?
|
69
98
|
!link.include?('#') && in_static_domain?
|
70
99
|
end
|
@@ -78,7 +107,7 @@ module ValidateWebsite
|
|
78
107
|
next unless static_link.check?
|
79
108
|
not_found_error(static_link.file_path) &&
|
80
109
|
next unless File.exist?(static_link.file_path)
|
81
|
-
next unless static_link.
|
110
|
+
next unless static_link.css?
|
82
111
|
check_static_not_found static_link.extract_urls_from_fake_css_response
|
83
112
|
end
|
84
113
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Base module ValidateWebsite
|
2
|
+
module ValidateWebsite
|
3
|
+
# Utils class for CSS helpers
|
4
|
+
class Utils
|
5
|
+
# Extract urls from CSS page
|
6
|
+
#
|
7
|
+
# @param [Spidr::Page] a Spidr::Page object
|
8
|
+
# @return [Set] Lists of urls
|
9
|
+
#
|
10
|
+
def self.extract_urls_from_css(page)
|
11
|
+
return Set[] unless page
|
12
|
+
return Set[] if page.body.nil?
|
13
|
+
nodes = Crass::Parser.parse_stylesheet(page.body)
|
14
|
+
extract_urls_from_nodes nodes, page
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return urls as absolute from Crass nodes
|
18
|
+
#
|
19
|
+
# @param [Hash] node from Crass
|
20
|
+
# @param [Spidr::Page] a Spidr::Page object
|
21
|
+
# @return [Set] list of obsolute urls
|
22
|
+
def self.urls_to_absolute(node, page)
|
23
|
+
if node[:node] == :function && node[:name] == 'url' || node[:node] == :url
|
24
|
+
Array(node[:value]).map do |v|
|
25
|
+
url = v.is_a?(String) ? v : v[:value]
|
26
|
+
page.to_absolute(url).to_s
|
27
|
+
end
|
28
|
+
else
|
29
|
+
Set.new
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Extract urls from Crass nodes
|
34
|
+
# @param [Array] Array of nodes from Crass
|
35
|
+
# @param [Spidr::Page] a Spidr::Page object
|
36
|
+
# @param [Set] memo for recursivity
|
37
|
+
# @return [Set] list of urls
|
38
|
+
def self.extract_urls_from_nodes(nodes, page, memo = Set[])
|
39
|
+
nodes.each_with_object(memo) do |node, result|
|
40
|
+
result.merge urls_to_absolute(node, page)
|
41
|
+
if node[:children]
|
42
|
+
extract_urls_from_nodes node.delete(:children), page, result
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/validate_website.rb
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website-static
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 07/21/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE\-S" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE\-S" "1" "07/21/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -59,6 +59,11 @@ Ignore certain validation errors (ex:
|
|
59
59
|
Markup validation (Default: true)
|
60
60
|
.RE
|
61
61
|
.PP
|
62
|
+
\fB\-\-css\-syntax\fR
|
63
|
+
.RS 4
|
64
|
+
Css validation (Default: false)
|
65
|
+
.RE
|
66
|
+
.PP
|
62
67
|
\fB\-n\fR, \fB\-\-not\-found\fR
|
63
68
|
.RS 4
|
64
69
|
Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
|
data/man/man1/validate-website.1
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 07/21/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE" "1" "07/21/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -70,6 +70,11 @@ Set defaults cookies
|
|
70
70
|
Markup validation (Default: true)
|
71
71
|
.RE
|
72
72
|
.PP
|
73
|
+
\fB\-\-css\-syntax\fR
|
74
|
+
.RS 4
|
75
|
+
Css validation (Default: false)
|
76
|
+
.RE
|
77
|
+
.PP
|
73
78
|
\fB\-n\fR, \fB\-\-not\-found\fR
|
74
79
|
.RS 4
|
75
80
|
Log not found url (Default: false)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-07-
|
11
|
+
date: 2015-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.4'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: crass
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: paint
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -246,27 +260,11 @@ files:
|
|
246
260
|
- lib/validate_website/option_parser.rb
|
247
261
|
- lib/validate_website/runner.rb
|
248
262
|
- lib/validate_website/static.rb
|
263
|
+
- lib/validate_website/utils.rb
|
249
264
|
- lib/validate_website/validator.rb
|
250
265
|
- lib/validate_website/version.rb
|
251
266
|
- man/man1/validate-website-static.1
|
252
267
|
- man/man1/validate-website.1
|
253
|
-
- spec/core_spec.rb
|
254
|
-
- spec/crawler_spec.rb
|
255
|
-
- spec/data/assets/application-92f19110a9d47a56d2ebe744e15af301.css
|
256
|
-
- spec/data/html4-strict.html
|
257
|
-
- spec/data/html5-linuxfr.html
|
258
|
-
- spec/data/html5.html
|
259
|
-
- spec/data/news/ryzom-naissance-du-projet-libre-ryzom-forge.md
|
260
|
-
- spec/data/validator.nu-excessive.html
|
261
|
-
- spec/data/validator.nu-failure.html
|
262
|
-
- spec/data/validator.nu-success.html
|
263
|
-
- spec/data/w3.org-xhtml1-strict-errors.html
|
264
|
-
- spec/data/xhtml1-strict.html
|
265
|
-
- spec/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html
|
266
|
-
- spec/spec_helper.rb
|
267
|
-
- spec/static_spec.rb
|
268
|
-
- spec/validator_spec.rb
|
269
|
-
- spec/webmock_helper.rb
|
270
268
|
homepage: http://github.com/spk/validate-website
|
271
269
|
licenses:
|
272
270
|
- MIT
|
@@ -294,9 +292,5 @@ rubygems_version: 2.4.5
|
|
294
292
|
signing_key:
|
295
293
|
specification_version: 4
|
296
294
|
summary: Web crawler for checking the validity of your documents
|
297
|
-
test_files:
|
298
|
-
- spec/static_spec.rb
|
299
|
-
- spec/core_spec.rb
|
300
|
-
- spec/crawler_spec.rb
|
301
|
-
- spec/validator_spec.rb
|
295
|
+
test_files: []
|
302
296
|
has_rdoc:
|