validate-website 1.5.7 → 1.5.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +11 -4
- data/lib/validate_website/core.rb +1 -1
- data/lib/validate_website/crawl.rb +6 -0
- data/lib/validate_website/option_parser.rb +1 -0
- data/lib/validate_website/static.rb +5 -0
- data/lib/validate_website/validator.rb +4 -3
- data/lib/validate_website/version.rb +1 -1
- data/man/man1/validate-website-static.1 +9 -3
- data/man/man1/validate-website.1 +3 -3
- data/test/crawler_test.rb +7 -7
- data/test/static_test.rb +12 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b113a6d49dafd4312e90c91b90ef823a7783b2a6
|
4
|
+
data.tar.gz: 81f4d59065bd48c9a2e1deb8fc13454c351269fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0653c9131b9f42af196335d230f80edd08429f5e7405034404e7632d8df71523491c9445e28da1a5aecd5462af23390db3346bbb39b3748042a18ebd060d1e99
|
7
|
+
data.tar.gz: 557e7164a448d99ebda6256f7fb73157a785f509844fc9b02878fafcc103334fbb80737676765d00da27ed3ea47cca3eeccbfc88a99aed5b45cd845465e1fe81
|
data/History.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
|
2
|
+
v1.5.8 / 2016-06-27
|
3
|
+
===================
|
4
|
+
|
5
|
+
* Update manpages
|
6
|
+
* Exclude urls on static crawl
|
7
|
+
* Fix warning: instance variable not initialized
|
8
|
+
|
2
9
|
v1.5.7 / 2016-06-13
|
3
|
-
|
10
|
+
===================
|
4
11
|
|
5
12
|
* Fix JRuby CI build
|
6
13
|
* Update JRuby and other rubies on Travis
|
@@ -8,7 +15,7 @@ v1.5.7 / 2016-06-13
|
|
8
15
|
* Add env variable VALIDATOR_NU_URL support
|
9
16
|
|
10
17
|
v1.5.6 / 2016-03-02
|
11
|
-
|
18
|
+
===================
|
12
19
|
|
13
20
|
* Fix error with redirect and extract images (see #16)
|
14
21
|
* Display version number
|
@@ -16,12 +23,12 @@ v1.5.6 / 2016-03-02
|
|
16
23
|
* Updates deps
|
17
24
|
|
18
25
|
v1.5.4 / 2015-12-08
|
19
|
-
|
26
|
+
===================
|
20
27
|
|
21
28
|
* Fix URI must be ascii only error
|
22
29
|
|
23
30
|
v1.5.3 / 2015-11-08
|
24
|
-
|
31
|
+
===================
|
25
32
|
|
26
33
|
* Fix test_files on gemspec
|
27
34
|
* Use File.expand_path for jruby
|
@@ -17,7 +17,7 @@ module ValidateWebsite
|
|
17
17
|
# Core class for static or website validation
|
18
18
|
class Core
|
19
19
|
attr_accessor :site
|
20
|
-
attr_reader :options, :
|
20
|
+
attr_reader :options, :host, :errors_count, :not_founds_count
|
21
21
|
|
22
22
|
include ColorfulMessages
|
23
23
|
|
@@ -4,10 +4,16 @@ require 'validate_website/utils'
|
|
4
4
|
module ValidateWebsite
|
5
5
|
# Class for http website validation
|
6
6
|
class Crawl < Core
|
7
|
+
attr_reader :crawler
|
8
|
+
|
7
9
|
def initialize(options = {}, validation_type = :crawl)
|
8
10
|
super
|
9
11
|
end
|
10
12
|
|
13
|
+
def history_count
|
14
|
+
crawler.history.size
|
15
|
+
end
|
16
|
+
|
11
17
|
# @param [Hash] options
|
12
18
|
# :color [Boolean] color output (true, false)
|
13
19
|
# :exclude [String] a String used by Regexp.new
|
@@ -117,6 +117,7 @@ module ValidateWebsite
|
|
117
117
|
o.string('-p', '--pattern',
|
118
118
|
"Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
|
119
119
|
default: DEFAULT_OPTIONS[:pattern])
|
120
|
+
o.regexp('-e', '--exclude', 'Url to exclude (ex: "redirect|news")')
|
120
121
|
end
|
121
122
|
end
|
122
123
|
end
|
@@ -6,7 +6,10 @@ module ValidateWebsite
|
|
6
6
|
class Static < Core
|
7
7
|
CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
|
8
8
|
|
9
|
+
attr_reader :history_count
|
10
|
+
|
9
11
|
def initialize(options = {}, validation_type = :static)
|
12
|
+
@history_count = 0
|
10
13
|
super
|
11
14
|
end
|
12
15
|
|
@@ -19,6 +22,8 @@ module ValidateWebsite
|
|
19
22
|
files = Dir.glob(@options[:pattern])
|
20
23
|
files.each do |f|
|
21
24
|
next unless File.file?(f)
|
25
|
+
next if @options[:exclude].is_a?(Regexp) && @options[:exclude].match(f)
|
26
|
+
@history_count += 1
|
22
27
|
check_static_file(f)
|
23
28
|
end
|
24
29
|
print_status_line(files.size, 0, @not_founds_count, @errors_count)
|
@@ -14,7 +14,7 @@ module ValidateWebsite
|
|
14
14
|
attr_accessor :html5_validator_service_url
|
15
15
|
end
|
16
16
|
|
17
|
-
attr_reader :original_doc, :body, :dtd, :doc, :namespace
|
17
|
+
attr_reader :original_doc, :body, :dtd, :doc, :namespace
|
18
18
|
|
19
19
|
##
|
20
20
|
# @param [Nokogiri::HTML::Document] original_doc
|
@@ -22,12 +22,13 @@ module ValidateWebsite
|
|
22
22
|
# @param [Regexp] Errors to ignore
|
23
23
|
#
|
24
24
|
def initialize(original_doc, body, ignore = nil)
|
25
|
+
@errors = []
|
26
|
+
@document, @dtd_uri = nil
|
25
27
|
@original_doc = original_doc
|
26
28
|
@body = body
|
27
29
|
@ignore = ignore
|
28
30
|
@dtd = @original_doc.internal_subset
|
29
31
|
@namespace = init_namespace(@dtd)
|
30
|
-
@errors = []
|
31
32
|
end
|
32
33
|
|
33
34
|
##
|
@@ -74,7 +75,7 @@ module ValidateWebsite
|
|
74
75
|
|
75
76
|
# @return [Array] contain result errors
|
76
77
|
def validate(xml_doc, document_body)
|
77
|
-
if xsd
|
78
|
+
if !xsd.nil?
|
78
79
|
xsd.validate(xml_doc)
|
79
80
|
elsif document_body =~ /^\<!DOCTYPE html\>/i
|
80
81
|
html5_validate(document_body)
|
@@ -1,13 +1,13 @@
|
|
1
1
|
'\" t
|
2
2
|
.\" Title: validate-website-static
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
|
-
.\" Generator: DocBook XSL Stylesheets v1.
|
5
|
-
.\" Date:
|
4
|
+
.\" Generator: DocBook XSL Stylesheets v1.79.1 <http://docbook.sf.net/>
|
5
|
+
.\" Date: 06/19/2016
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE\-S" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE\-S" "1" "06/19/2016" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -48,6 +48,12 @@ http://www\&.example\&.com/)
|
|
48
48
|
Change filenames pattern (Default: **/*\&.html)
|
49
49
|
.RE
|
50
50
|
.PP
|
51
|
+
\fB\-e\fR, \fB\-\-exclude\fR \fIEXCLUDE\fR
|
52
|
+
.RS 4
|
53
|
+
Url to exclude (ex:
|
54
|
+
\fIredirect|news\fR)
|
55
|
+
.RE
|
56
|
+
.PP
|
51
57
|
\fB\-i\fR, \fB\-\-ignore\fR \fIIGNORE\fR
|
52
58
|
.RS 4
|
53
59
|
Ignore certain validation errors (ex:
|
data/man/man1/validate-website.1
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
'\" t
|
2
2
|
.\" Title: validate-website
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
|
-
.\" Generator: DocBook XSL Stylesheets v1.
|
5
|
-
.\" Date:
|
4
|
+
.\" Generator: DocBook XSL Stylesheets v1.79.1 <http://docbook.sf.net/>
|
5
|
+
.\" Date: 06/19/2016
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE" "1" "06/19/2016" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
data/test/crawler_test.rb
CHANGED
@@ -54,7 +54,7 @@ describe ValidateWebsite::Crawl do
|
|
54
54
|
_out, _err = capture_io do
|
55
55
|
@validate_website.crawl
|
56
56
|
end
|
57
|
-
@validate_website.
|
57
|
+
@validate_website.history_count.must_equal 5
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'extract link' do
|
@@ -67,7 +67,7 @@ describe ValidateWebsite::Crawl do
|
|
67
67
|
_out, _err = capture_io do
|
68
68
|
@validate_website.crawl
|
69
69
|
end
|
70
|
-
@validate_website.
|
70
|
+
@validate_website.history_count.must_equal 98
|
71
71
|
end
|
72
72
|
|
73
73
|
it 'crawl when URLs are not ascii only' do
|
@@ -114,7 +114,7 @@ describe ValidateWebsite::Crawl do
|
|
114
114
|
_out, _err = capture_io do
|
115
115
|
@validate_website.crawl
|
116
116
|
end
|
117
|
-
@validate_website.
|
117
|
+
@validate_website.history_count.must_equal 5
|
118
118
|
end
|
119
119
|
|
120
120
|
it 'should extract url with single quote' do
|
@@ -125,7 +125,7 @@ describe ValidateWebsite::Crawl do
|
|
125
125
|
_out, _err = capture_io do
|
126
126
|
@validate_website.crawl
|
127
127
|
end
|
128
|
-
@validate_website.
|
128
|
+
@validate_website.history_count.must_equal 2
|
129
129
|
end
|
130
130
|
|
131
131
|
it 'should extract url with double quote' do
|
@@ -136,7 +136,7 @@ describe ValidateWebsite::Crawl do
|
|
136
136
|
_out, _err = capture_io do
|
137
137
|
@validate_website.crawl
|
138
138
|
end
|
139
|
-
@validate_website.
|
139
|
+
@validate_website.history_count.must_equal 2
|
140
140
|
end
|
141
141
|
|
142
142
|
it 'should extract url with params' do
|
@@ -147,7 +147,7 @@ describe ValidateWebsite::Crawl do
|
|
147
147
|
_out, _err = capture_io do
|
148
148
|
@validate_website.crawl
|
149
149
|
end
|
150
|
-
@validate_website.
|
150
|
+
@validate_website.history_count.must_equal 2
|
151
151
|
end
|
152
152
|
|
153
153
|
it 'should not extract invalid urls' do
|
@@ -158,7 +158,7 @@ describe ValidateWebsite::Crawl do
|
|
158
158
|
_out, _err = capture_io do
|
159
159
|
@validate_website.crawl
|
160
160
|
end
|
161
|
-
@validate_website.
|
161
|
+
@validate_website.history_count.must_equal 1
|
162
162
|
end
|
163
163
|
end
|
164
164
|
|
data/test/static_test.rb
CHANGED
@@ -7,6 +7,18 @@ describe ValidateWebsite::Static do
|
|
7
7
|
end
|
8
8
|
end
|
9
9
|
|
10
|
+
it 'exclude directories' do
|
11
|
+
pattern = File.join(File.dirname(__FILE__), '**/*.html')
|
12
|
+
_out, _err = capture_io do
|
13
|
+
@validate_website.crawl(pattern: pattern,
|
14
|
+
site: 'http://spkdev.net/',
|
15
|
+
markup: false,
|
16
|
+
not_found: false,
|
17
|
+
exclude: /data|example/)
|
18
|
+
end
|
19
|
+
@validate_website.history_count.must_equal 0
|
20
|
+
end
|
21
|
+
|
10
22
|
it 'no space in directory name' do
|
11
23
|
pattern = File.join(File.dirname(__FILE__), 'example/**/*.html')
|
12
24
|
_out, _err = capture_io do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|