validate-website 1.1.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +7 -7
- data/lib/validate_website/core.rb +26 -16
- data/lib/validate_website/crawl.rb +4 -3
- data/lib/validate_website/option_parser.rb +10 -2
- data/lib/validate_website/static.rb +42 -13
- data/lib/validate_website/utils.rb +48 -0
- data/lib/validate_website/version.rb +2 -1
- data/lib/validate_website.rb +0 -1
- data/man/man1/validate-website-static.1 +7 -2
- data/man/man1/validate-website.1 +7 -2
- metadata +18 -24
- data/spec/core_spec.rb +0 -10
- data/spec/crawler_spec.rb +0 -91
- data/spec/data/assets/application-92f19110a9d47a56d2ebe744e15af301.css +0 -1
- data/spec/data/html4-strict.html +0 -239
- data/spec/data/html5-linuxfr.html +0 -1286
- data/spec/data/html5.html +0 -46
- data/spec/data/news/ryzom-naissance-du-projet-libre-ryzom-forge.md +0 -0
- data/spec/data/validator.nu-excessive.html +0 -118
- data/spec/data/validator.nu-failure.html +0 -10
- data/spec/data/validator.nu-success.html +0 -2
- data/spec/data/w3.org-xhtml1-strict-errors.html +0 -544
- data/spec/data/xhtml1-strict.html +0 -22
- data/spec/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html +0 -22
- data/spec/spec_helper.rb +0 -10
- data/spec/static_spec.rb +0 -38
- data/spec/validator_spec.rb +0 -137
- data/spec/webmock_helper.rb +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd93ea8bbe188ac4cb33752c93a971f75106971e
|
4
|
+
data.tar.gz: 9322f70f9cd7433c35d201f0d042117ff2974147
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1d587ebf07f0f3f93ee4f7ebfd96112919c435eeddda17874c76423ee0e1500865a0afe0f5112c459cd900023445e818d347380800c306c0d398ed9369de710
|
7
|
+
data.tar.gz: 818c8b7f437a4afb0482914fd799e0e0701013ee9b772db56abe3e3aa2259f08cba9444fff65df278b10a8e4144aa08021e38515419ccfebe832c717d22fa63b
|
data/Rakefile
CHANGED
@@ -1,13 +1,8 @@
|
|
1
|
-
require 'rdoc/task'
|
2
1
|
require 'rake/testtask'
|
2
|
+
require 'rubocop/rake_task'
|
3
3
|
|
4
4
|
task default: [:test]
|
5
5
|
|
6
|
-
RDoc::Task.new do |rd|
|
7
|
-
rd.main = 'README.rdoc'
|
8
|
-
rd.rdoc_files.include('README.rdoc', 'lib/**/*.rb')
|
9
|
-
end
|
10
|
-
|
11
6
|
# install asciidoc libxml2-utils xmlto docbook-xsl docbook-xml
|
12
7
|
desc 'Update manpage from asciidoc file'
|
13
8
|
task :manpage do
|
@@ -15,6 +10,11 @@ task :manpage do
|
|
15
10
|
end
|
16
11
|
|
17
12
|
Rake::TestTask.new do |t|
|
18
|
-
t.pattern = '
|
13
|
+
t.pattern = 'test/**/*_test.rb'
|
19
14
|
end
|
20
15
|
task spec: :test
|
16
|
+
|
17
|
+
desc 'Execute rubocop'
|
18
|
+
RuboCop::RakeTask.new(:rubocop) do |t|
|
19
|
+
t.options = ['-D'] # display cop name
|
20
|
+
end
|
@@ -7,6 +7,7 @@ require 'validate_website/validator'
|
|
7
7
|
require 'validate_website/colorful_messages'
|
8
8
|
|
9
9
|
require 'spidr'
|
10
|
+
require 'crass'
|
10
11
|
|
11
12
|
# Base module ValidateWebsite
|
12
13
|
module ValidateWebsite
|
@@ -62,20 +63,25 @@ module ValidateWebsite
|
|
62
63
|
end
|
63
64
|
end
|
64
65
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
page.body.scan(%r{url\((['".\/\w-]+)\)}).reduce(Set[]) do |result, url|
|
72
|
-
url = url.first.gsub("'", '').gsub('"', '')
|
73
|
-
abs = page.to_absolute(url)
|
74
|
-
result << abs.to_s
|
75
|
-
end
|
66
|
+
private
|
67
|
+
|
68
|
+
def check_css_syntax(page)
|
69
|
+
nodes = Crass::Parser.parse_stylesheet(page.body)
|
70
|
+
return unless any_css_errors?(nodes)
|
71
|
+
handle_validation_error(page.url)
|
76
72
|
end
|
77
73
|
|
78
|
-
|
74
|
+
def any_css_errors?(nodes)
|
75
|
+
nodes.any? do |node|
|
76
|
+
if node[:children]
|
77
|
+
any_css_errors? node.delete(:children)
|
78
|
+
elsif node[:tokens]
|
79
|
+
any_css_errors? node.delete(:tokens)
|
80
|
+
else
|
81
|
+
node[:node] == :error || node[:error] == true
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
79
85
|
|
80
86
|
def print_status_line(total, failures, not_founds, errors)
|
81
87
|
puts "\n\n"
|
@@ -102,16 +108,20 @@ module ValidateWebsite
|
|
102
108
|
if validator.valid?
|
103
109
|
print color(:success, '.', options[:color]) # rspec style
|
104
110
|
else
|
105
|
-
|
111
|
+
handle_html_validation_error(validator, url)
|
106
112
|
end
|
107
113
|
end
|
108
114
|
|
109
|
-
def
|
115
|
+
def handle_html_validation_error(validator, url)
|
116
|
+
handle_validation_error(url)
|
117
|
+
return unless options[:verbose]
|
118
|
+
puts color(:error, validator.errors.join(', '), options[:color])
|
119
|
+
end
|
120
|
+
|
121
|
+
def handle_validation_error(url)
|
110
122
|
@errors_count += 1
|
111
123
|
puts "\n"
|
112
124
|
puts color(:error, "* #{url}", options[:color])
|
113
|
-
return unless options[:verbose]
|
114
|
-
puts color(:error, validator.errors.join(', '), options[:color])
|
115
125
|
end
|
116
126
|
end
|
117
127
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'validate_website/core'
|
2
|
+
require 'validate_website/utils'
|
2
3
|
|
3
4
|
module ValidateWebsite
|
4
5
|
# Class for http website validation
|
@@ -44,13 +45,14 @@ module ValidateWebsite
|
|
44
45
|
crawler.cookies[@host] = default_cookies if options[:cookies]
|
45
46
|
on_every_css_page(crawler)
|
46
47
|
on_every_html_page(crawler)
|
47
|
-
on_every_failed_url(crawler)
|
48
|
+
on_every_failed_url(crawler) if options[:not_found]
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
51
52
|
def on_every_css_page(crawler)
|
52
53
|
crawler.every_css_page do |page|
|
53
|
-
|
54
|
+
check_css_syntax(page) if options[:css_syntax]
|
55
|
+
ValidateWebsite::Utils.extract_urls_from_css(page).each do |u|
|
54
56
|
crawler.enqueue(u)
|
55
57
|
end
|
56
58
|
end
|
@@ -69,7 +71,6 @@ module ValidateWebsite
|
|
69
71
|
end
|
70
72
|
|
71
73
|
def on_every_failed_url(crawler)
|
72
|
-
return unless options[:not_found]
|
73
74
|
crawler.every_failed_url do |url|
|
74
75
|
not_found_error(url)
|
75
76
|
end
|
@@ -12,6 +12,7 @@ module ValidateWebsite
|
|
12
12
|
exclude: nil,
|
13
13
|
user_agent: nil,
|
14
14
|
markup: true,
|
15
|
+
css_syntax: false,
|
15
16
|
# crawler: log not found url (404 status code)
|
16
17
|
# static: log not found url (not on filesystem, `pwd` considered
|
17
18
|
# as root " / ")
|
@@ -39,6 +40,7 @@ module ValidateWebsite
|
|
39
40
|
def self.default_args
|
40
41
|
Slop.parse do |o|
|
41
42
|
yield o if block_given?
|
43
|
+
markup_syntax(o)
|
42
44
|
boolean_options(o)
|
43
45
|
o.regexp('-i', '--ignore',
|
44
46
|
'Validation errors to ignore (ex: "valign|autocorrect")')
|
@@ -48,10 +50,16 @@ module ValidateWebsite
|
|
48
50
|
end
|
49
51
|
end
|
50
52
|
|
51
|
-
def self.
|
53
|
+
def self.markup_syntax(o)
|
52
54
|
o.bool('-m', '--markup',
|
53
55
|
"Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
|
54
56
|
default: DEFAULT_OPTIONS[:markup])
|
57
|
+
o.bool('--css-syntax',
|
58
|
+
"Css validation (default: #{DEFAULT_OPTIONS[:css_syntax]})",
|
59
|
+
default: DEFAULT_OPTIONS[:css_syntax])
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.boolean_options(o)
|
55
63
|
o.bool('-n', '--not-found',
|
56
64
|
"Log not found url (default: #{DEFAULT_OPTIONS[:not_found]})",
|
57
65
|
default: DEFAULT_OPTIONS[:not_found])
|
@@ -94,7 +102,7 @@ module ValidateWebsite
|
|
94
102
|
o.string('-s', '--site',
|
95
103
|
"Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
|
96
104
|
default: DEFAULT_OPTIONS[:site])
|
97
|
-
o.
|
105
|
+
o.string('-p', '--pattern',
|
98
106
|
"Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
|
99
107
|
default: DEFAULT_OPTIONS[:pattern])
|
100
108
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'validate_website/core'
|
2
|
+
require 'validate_website/utils'
|
2
3
|
|
3
4
|
module ValidateWebsite
|
4
5
|
# Class for validation Static website
|
@@ -25,15 +26,17 @@ module ValidateWebsite
|
|
25
26
|
|
26
27
|
private
|
27
28
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
29
|
+
def check_static_file(f)
|
30
|
+
page = StaticLink.new(f, @site).page
|
31
|
+
check_page(f, page)
|
32
|
+
check_css_syntax(page) if page.css? && options[:css_syntax]
|
31
33
|
end
|
32
34
|
|
33
|
-
def
|
34
|
-
page
|
35
|
-
|
36
|
-
|
35
|
+
def check_page(f, page)
|
36
|
+
if page.html? && options[:markup]
|
37
|
+
validate(page.doc, page.body, f, options[:ignore])
|
38
|
+
end
|
39
|
+
check_static_not_found(page.links) if options[:not_found]
|
37
40
|
end
|
38
41
|
|
39
42
|
StaticLink = Struct.new(:link, :site) do
|
@@ -47,12 +50,34 @@ module ValidateWebsite
|
|
47
50
|
URI.parse(site).host == link_uri.host
|
48
51
|
end
|
49
52
|
|
53
|
+
def content_types
|
54
|
+
if css?
|
55
|
+
['text/css']
|
56
|
+
else
|
57
|
+
CONTENT_TYPES
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def body
|
62
|
+
if File.exist?(link)
|
63
|
+
open(link).read
|
64
|
+
else
|
65
|
+
open(file_path).read
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def response
|
70
|
+
@response ||= ValidateWebsite::Static.fake_httpresponse(
|
71
|
+
body,
|
72
|
+
content_types)
|
73
|
+
end
|
74
|
+
|
75
|
+
def page
|
76
|
+
@page ||= Spidr::Page.new(link_uri, response)
|
77
|
+
end
|
78
|
+
|
50
79
|
def extract_urls_from_fake_css_response
|
51
|
-
|
52
|
-
open(file_path).read,
|
53
|
-
['text/css'])
|
54
|
-
css_page = Spidr::Page.new(link_uri, response)
|
55
|
-
ValidateWebsite::Core.extract_urls_from_css(css_page)
|
80
|
+
ValidateWebsite::Utils.extract_urls_from_css(page)
|
56
81
|
end
|
57
82
|
|
58
83
|
def file_path
|
@@ -65,6 +90,10 @@ module ValidateWebsite
|
|
65
90
|
@extname ||= File.extname(file_path)
|
66
91
|
end
|
67
92
|
|
93
|
+
def css?
|
94
|
+
extname == '.css'
|
95
|
+
end
|
96
|
+
|
68
97
|
def check?
|
69
98
|
!link.include?('#') && in_static_domain?
|
70
99
|
end
|
@@ -78,7 +107,7 @@ module ValidateWebsite
|
|
78
107
|
next unless static_link.check?
|
79
108
|
not_found_error(static_link.file_path) &&
|
80
109
|
next unless File.exist?(static_link.file_path)
|
81
|
-
next unless static_link.
|
110
|
+
next unless static_link.css?
|
82
111
|
check_static_not_found static_link.extract_urls_from_fake_css_response
|
83
112
|
end
|
84
113
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Base module ValidateWebsite
|
2
|
+
module ValidateWebsite
|
3
|
+
# Utils class for CSS helpers
|
4
|
+
class Utils
|
5
|
+
# Extract urls from CSS page
|
6
|
+
#
|
7
|
+
# @param [Spidr::Page] a Spidr::Page object
|
8
|
+
# @return [Set] Lists of urls
|
9
|
+
#
|
10
|
+
def self.extract_urls_from_css(page)
|
11
|
+
return Set[] unless page
|
12
|
+
return Set[] if page.body.nil?
|
13
|
+
nodes = Crass::Parser.parse_stylesheet(page.body)
|
14
|
+
extract_urls_from_nodes nodes, page
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return urls as absolute from Crass nodes
|
18
|
+
#
|
19
|
+
# @param [Hash] node from Crass
|
20
|
+
# @param [Spidr::Page] a Spidr::Page object
|
21
|
+
# @return [Set] list of obsolute urls
|
22
|
+
def self.urls_to_absolute(node, page)
|
23
|
+
if node[:node] == :function && node[:name] == 'url' || node[:node] == :url
|
24
|
+
Array(node[:value]).map do |v|
|
25
|
+
url = v.is_a?(String) ? v : v[:value]
|
26
|
+
page.to_absolute(url).to_s
|
27
|
+
end
|
28
|
+
else
|
29
|
+
Set.new
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Extract urls from Crass nodes
|
34
|
+
# @param [Array] Array of nodes from Crass
|
35
|
+
# @param [Spidr::Page] a Spidr::Page object
|
36
|
+
# @param [Set] memo for recursivity
|
37
|
+
# @return [Set] list of urls
|
38
|
+
def self.extract_urls_from_nodes(nodes, page, memo = Set[])
|
39
|
+
nodes.each_with_object(memo) do |node, result|
|
40
|
+
result.merge urls_to_absolute(node, page)
|
41
|
+
if node[:children]
|
42
|
+
extract_urls_from_nodes node.delete(:children), page, result
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/validate_website.rb
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website-static
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 07/21/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE\-S" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE\-S" "1" "07/21/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -59,6 +59,11 @@ Ignore certain validation errors (ex:
|
|
59
59
|
Markup validation (Default: true)
|
60
60
|
.RE
|
61
61
|
.PP
|
62
|
+
\fB\-\-css\-syntax\fR
|
63
|
+
.RS 4
|
64
|
+
Css validation (Default: false)
|
65
|
+
.RE
|
66
|
+
.PP
|
62
67
|
\fB\-n\fR, \fB\-\-not\-found\fR
|
63
68
|
.RS 4
|
64
69
|
Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
|
data/man/man1/validate-website.1
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 07/21/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE" "1" "07/21/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -70,6 +70,11 @@ Set defaults cookies
|
|
70
70
|
Markup validation (Default: true)
|
71
71
|
.RE
|
72
72
|
.PP
|
73
|
+
\fB\-\-css\-syntax\fR
|
74
|
+
.RS 4
|
75
|
+
Css validation (Default: false)
|
76
|
+
.RE
|
77
|
+
.PP
|
73
78
|
\fB\-n\fR, \fB\-\-not\-found\fR
|
74
79
|
.RS 4
|
75
80
|
Log not found url (Default: false)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-07-
|
11
|
+
date: 2015-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.4'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: crass
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: paint
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -246,27 +260,11 @@ files:
|
|
246
260
|
- lib/validate_website/option_parser.rb
|
247
261
|
- lib/validate_website/runner.rb
|
248
262
|
- lib/validate_website/static.rb
|
263
|
+
- lib/validate_website/utils.rb
|
249
264
|
- lib/validate_website/validator.rb
|
250
265
|
- lib/validate_website/version.rb
|
251
266
|
- man/man1/validate-website-static.1
|
252
267
|
- man/man1/validate-website.1
|
253
|
-
- spec/core_spec.rb
|
254
|
-
- spec/crawler_spec.rb
|
255
|
-
- spec/data/assets/application-92f19110a9d47a56d2ebe744e15af301.css
|
256
|
-
- spec/data/html4-strict.html
|
257
|
-
- spec/data/html5-linuxfr.html
|
258
|
-
- spec/data/html5.html
|
259
|
-
- spec/data/news/ryzom-naissance-du-projet-libre-ryzom-forge.md
|
260
|
-
- spec/data/validator.nu-excessive.html
|
261
|
-
- spec/data/validator.nu-failure.html
|
262
|
-
- spec/data/validator.nu-success.html
|
263
|
-
- spec/data/w3.org-xhtml1-strict-errors.html
|
264
|
-
- spec/data/xhtml1-strict.html
|
265
|
-
- spec/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html
|
266
|
-
- spec/spec_helper.rb
|
267
|
-
- spec/static_spec.rb
|
268
|
-
- spec/validator_spec.rb
|
269
|
-
- spec/webmock_helper.rb
|
270
268
|
homepage: http://github.com/spk/validate-website
|
271
269
|
licenses:
|
272
270
|
- MIT
|
@@ -294,9 +292,5 @@ rubygems_version: 2.4.5
|
|
294
292
|
signing_key:
|
295
293
|
specification_version: 4
|
296
294
|
summary: Web crawler for checking the validity of your documents
|
297
|
-
test_files:
|
298
|
-
- spec/static_spec.rb
|
299
|
-
- spec/core_spec.rb
|
300
|
-
- spec/crawler_spec.rb
|
301
|
-
- spec/validator_spec.rb
|
295
|
+
test_files: []
|
302
296
|
has_rdoc:
|