validate-website 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +2 -2
- data/lib/validate_website/core.rb +32 -28
- data/lib/validate_website/validator.rb +1 -1
- data/man/man1/validate-website-static.1 +4 -4
- data/man/man1/validate-website.1 +9 -4
- data/spec/core_spec.rb +6 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46efde8e3afaf965afb3a61893bbf6070bf3ee78
|
4
|
+
data.tar.gz: ddcf5e37e699eb74ae6fc299bb7366323c34c2bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e137db2244e39a6f9fd12d36b8ed5c477f59bd22a6edb8f010315fe27c8fe1d60fbdeeb590396b6a25e01993ce69579a8bc8006b06b2b5a4759e739679b3c22
|
7
|
+
data.tar.gz: 75d4c6e34efa984b0bf0b998fc790f89422742ac40d7cf7b7c5a02e744abf66ac0a9eba0f08a99161e30aeaae07276c3f1ca62bb604ee31e6b18424851f0eda9
|
data/Rakefile
CHANGED
@@ -8,10 +8,10 @@ RDoc::Task.new do |rd|
|
|
8
8
|
rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
|
9
9
|
end
|
10
10
|
|
11
|
-
# install asciidoc libxml2-utils xmlto
|
11
|
+
# install asciidoc libxml2-utils xmlto docbook-xsl docbook-xml
|
12
12
|
desc 'Update manpage from asciidoc file'
|
13
13
|
task :manpage do
|
14
|
-
system('find doc/ -type f -exec a2x -f manpage -D man/man1 {} \;')
|
14
|
+
system('find doc/ -type f -exec a2x -d manpage -f manpage -D man/man1 {} \;')
|
15
15
|
end
|
16
16
|
|
17
17
|
Rake::TestTask.new do |t|
|
@@ -44,31 +44,7 @@ module ValidateWebsite
|
|
44
44
|
@options.merge!(ignore_links: @options[:exclude]) if @options[:exclude]
|
45
45
|
puts color(:warning, "No internet connection") unless internet_connection?
|
46
46
|
|
47
|
-
@
|
48
|
-
@crawler = Spidr.site(@site, @options) do |crawler|
|
49
|
-
crawler.cookies[@host] = default_cookies if @options[:cookies]
|
50
|
-
crawler.every_css_page do |page|
|
51
|
-
extract_urls_from_css(page).each do |u|
|
52
|
-
crawler.enqueue(u)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
crawler.every_html_page do |page|
|
57
|
-
extract_imgs_from_page(page).each do |i|
|
58
|
-
crawler.enqueue(i)
|
59
|
-
end
|
60
|
-
|
61
|
-
if @options[:markup] && page.html?
|
62
|
-
validate(page.doc, page.body, page.url, @options[:ignore])
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
if @options[:not_found]
|
67
|
-
crawler.every_failed_url do |url|
|
68
|
-
not_found_error(url)
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
47
|
+
@crawler = spidr_crawler(@site, @options)
|
72
48
|
print_status_line(@crawler.history.size,
|
73
49
|
@crawler.failures.size,
|
74
50
|
@not_founds_count,
|
@@ -115,9 +91,9 @@ module ValidateWebsite
|
|
115
91
|
end
|
116
92
|
|
117
93
|
def default_cookies
|
118
|
-
WEBrick::Cookie.parse(@options[:cookies]).
|
119
|
-
|
120
|
-
|
94
|
+
WEBrick::Cookie.parse(@options[:cookies]).each_with_object({}) do |c, h|
|
95
|
+
h[c.name] = c.value
|
96
|
+
h
|
121
97
|
end
|
122
98
|
end
|
123
99
|
|
@@ -231,5 +207,33 @@ module ValidateWebsite
|
|
231
207
|
"#{not_founds} not founds",
|
232
208
|
"#{errors} errors"].join(', '), @options[:color])
|
233
209
|
end
|
210
|
+
|
211
|
+
def spidr_crawler(site, options)
|
212
|
+
@host = URI(site).host
|
213
|
+
Spidr.site(site, options) do |crawler|
|
214
|
+
crawler.cookies[@host] = default_cookies if options[:cookies]
|
215
|
+
crawler.every_css_page do |page|
|
216
|
+
extract_urls_from_css(page).each do |u|
|
217
|
+
crawler.enqueue(u)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
crawler.every_html_page do |page|
|
222
|
+
extract_imgs_from_page(page).each do |i|
|
223
|
+
crawler.enqueue(i)
|
224
|
+
end
|
225
|
+
|
226
|
+
if options[:markup] && page.html?
|
227
|
+
validate(page.doc, page.body, page.url, options[:ignore])
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
if options[:not_found]
|
232
|
+
crawler.every_failed_url do |url|
|
233
|
+
not_found_error(url)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
234
238
|
end
|
235
239
|
end
|
@@ -6,7 +6,7 @@ module ValidateWebsite
|
|
6
6
|
# Document validation from DTD or XSD (webservice for html5)
|
7
7
|
class Validator
|
8
8
|
XHTML_PATH = File.expand_path('../../../data/schemas', __FILE__)
|
9
|
-
HTML5_VALIDATOR_SERVICE = 'http://
|
9
|
+
HTML5_VALIDATOR_SERVICE = 'http://validator.w3.org/nu/'
|
10
10
|
|
11
11
|
attr_reader :original_doc, :body, :dtd, :doc, :namespace, :xsd, :errors
|
12
12
|
|
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website-static
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 02/12/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE\-S" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE\-S" "1" "02/12/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -59,7 +59,7 @@ Ignore certain validation errors (ex:
|
|
59
59
|
Markup validation (Default: true)
|
60
60
|
.RE
|
61
61
|
.PP
|
62
|
-
\fB\-n\fR, \fB\-\-
|
62
|
+
\fB\-n\fR, \fB\-\-not\-found\fR
|
63
63
|
.RS 4
|
64
64
|
Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
|
65
65
|
.RE
|
@@ -106,4 +106,4 @@ Laurent Arnoud <laurent@spkdev\&.net>
|
|
106
106
|
.sp
|
107
107
|
The MIT License
|
108
108
|
.sp
|
109
|
-
Copyright (c) 2009\-
|
109
|
+
Copyright (c) 2009\-2015 Laurent Arnoud <laurent@spkdev\&.net>
|
data/man/man1/validate-website.1
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 02/12/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE" "1" "02/12/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -43,6 +43,11 @@ Website to crawl (Default:
|
|
43
43
|
http://localhost:3000/)
|
44
44
|
.RE
|
45
45
|
.PP
|
46
|
+
\fB\-u\fR, \fB\-\-user\-agent\fR \fIUSERAGENT\fR
|
47
|
+
.RS 4
|
48
|
+
Change user agent (Default: Spidr\&.user_agent)
|
49
|
+
.RE
|
50
|
+
.PP
|
46
51
|
\fB\-e\fR, \fB\-\-exclude\fR \fIEXCLUDE\fR
|
47
52
|
.RS 4
|
48
53
|
Url to exclude (ex:
|
@@ -65,7 +70,7 @@ Set defaults cookies
|
|
65
70
|
Markup validation (Default: true)
|
66
71
|
.RE
|
67
72
|
.PP
|
68
|
-
\fB\-n\fR, \fB\-\-
|
73
|
+
\fB\-n\fR, \fB\-\-not\-found\fR
|
69
74
|
.RS 4
|
70
75
|
Log not found url (Default: false)
|
71
76
|
.RE
|
@@ -112,4 +117,4 @@ Laurent Arnoud <laurent@spkdev\&.net>
|
|
112
117
|
.sp
|
113
118
|
The MIT License
|
114
119
|
.sp
|
115
|
-
Copyright (c) 2009\-
|
120
|
+
Copyright (c) 2009\-2015 Laurent Arnoud <laurent@spkdev\&.net>
|
data/spec/core_spec.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
require File.expand_path('../spec_helper', __FILE__)
|
3
3
|
|
4
4
|
describe ValidateWebsite::Core do
|
5
|
-
|
6
5
|
before do
|
7
6
|
WebMock.reset!
|
8
7
|
stub_request(:get, ValidateWebsite::Core::PING_URL).to_return(status: 200)
|
@@ -20,8 +19,10 @@ describe ValidateWebsite::Core do
|
|
20
19
|
|
21
20
|
describe 'options' do
|
22
21
|
it 'can change user-agent' do
|
23
|
-
ua =
|
24
|
-
|
22
|
+
ua = %{Linux / Firefox 29: Mozilla/5.0 (X11; Linux x86_64; rv:29.0) \
|
23
|
+
Gecko/20100101 Firefox/29.0}
|
24
|
+
v = ValidateWebsite::Core.new({ site: SPEC_DOMAIN, user_agent: ua },
|
25
|
+
:crawl)
|
25
26
|
v.crawl
|
26
27
|
v.crawler.user_agent.must_equal ua
|
27
28
|
end
|
@@ -30,7 +31,8 @@ describe ValidateWebsite::Core do
|
|
30
31
|
describe('cookies') do
|
31
32
|
it 'can set cookies' do
|
32
33
|
cookies = 'tz=Europe%2FBerlin; guid=ZcpBshbtStgl9VjwTofq'
|
33
|
-
v = ValidateWebsite::Core.new({site: SPEC_DOMAIN, cookies: cookies},
|
34
|
+
v = ValidateWebsite::Core.new({ site: SPEC_DOMAIN, cookies: cookies },
|
35
|
+
:crawl)
|
34
36
|
v.crawl
|
35
37
|
v.crawler.cookies.cookies_for_host(v.host).must_equal v.default_cookies
|
36
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|