validate-website 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +2 -2
- data/lib/validate_website/core.rb +32 -28
- data/lib/validate_website/validator.rb +1 -1
- data/man/man1/validate-website-static.1 +4 -4
- data/man/man1/validate-website.1 +9 -4
- data/spec/core_spec.rb +6 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46efde8e3afaf965afb3a61893bbf6070bf3ee78
|
4
|
+
data.tar.gz: ddcf5e37e699eb74ae6fc299bb7366323c34c2bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e137db2244e39a6f9fd12d36b8ed5c477f59bd22a6edb8f010315fe27c8fe1d60fbdeeb590396b6a25e01993ce69579a8bc8006b06b2b5a4759e739679b3c22
|
7
|
+
data.tar.gz: 75d4c6e34efa984b0bf0b998fc790f89422742ac40d7cf7b7c5a02e744abf66ac0a9eba0f08a99161e30aeaae07276c3f1ca62bb604ee31e6b18424851f0eda9
|
data/Rakefile
CHANGED
@@ -8,10 +8,10 @@ RDoc::Task.new do |rd|
|
|
8
8
|
rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
|
9
9
|
end
|
10
10
|
|
11
|
-
# install asciidoc libxml2-utils xmlto
|
11
|
+
# install asciidoc libxml2-utils xmlto docbook-xsl docbook-xml
|
12
12
|
desc 'Update manpage from asciidoc file'
|
13
13
|
task :manpage do
|
14
|
-
system('find doc/ -type f -exec a2x -f manpage -D man/man1 {} \;')
|
14
|
+
system('find doc/ -type f -exec a2x -d manpage -f manpage -D man/man1 {} \;')
|
15
15
|
end
|
16
16
|
|
17
17
|
Rake::TestTask.new do |t|
|
@@ -44,31 +44,7 @@ module ValidateWebsite
|
|
44
44
|
@options.merge!(ignore_links: @options[:exclude]) if @options[:exclude]
|
45
45
|
puts color(:warning, "No internet connection") unless internet_connection?
|
46
46
|
|
47
|
-
@
|
48
|
-
@crawler = Spidr.site(@site, @options) do |crawler|
|
49
|
-
crawler.cookies[@host] = default_cookies if @options[:cookies]
|
50
|
-
crawler.every_css_page do |page|
|
51
|
-
extract_urls_from_css(page).each do |u|
|
52
|
-
crawler.enqueue(u)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
crawler.every_html_page do |page|
|
57
|
-
extract_imgs_from_page(page).each do |i|
|
58
|
-
crawler.enqueue(i)
|
59
|
-
end
|
60
|
-
|
61
|
-
if @options[:markup] && page.html?
|
62
|
-
validate(page.doc, page.body, page.url, @options[:ignore])
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
if @options[:not_found]
|
67
|
-
crawler.every_failed_url do |url|
|
68
|
-
not_found_error(url)
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
47
|
+
@crawler = spidr_crawler(@site, @options)
|
72
48
|
print_status_line(@crawler.history.size,
|
73
49
|
@crawler.failures.size,
|
74
50
|
@not_founds_count,
|
@@ -115,9 +91,9 @@ module ValidateWebsite
|
|
115
91
|
end
|
116
92
|
|
117
93
|
def default_cookies
|
118
|
-
WEBrick::Cookie.parse(@options[:cookies]).
|
119
|
-
|
120
|
-
|
94
|
+
WEBrick::Cookie.parse(@options[:cookies]).each_with_object({}) do |c, h|
|
95
|
+
h[c.name] = c.value
|
96
|
+
h
|
121
97
|
end
|
122
98
|
end
|
123
99
|
|
@@ -231,5 +207,33 @@ module ValidateWebsite
|
|
231
207
|
"#{not_founds} not founds",
|
232
208
|
"#{errors} errors"].join(', '), @options[:color])
|
233
209
|
end
|
210
|
+
|
211
|
+
def spidr_crawler(site, options)
|
212
|
+
@host = URI(site).host
|
213
|
+
Spidr.site(site, options) do |crawler|
|
214
|
+
crawler.cookies[@host] = default_cookies if options[:cookies]
|
215
|
+
crawler.every_css_page do |page|
|
216
|
+
extract_urls_from_css(page).each do |u|
|
217
|
+
crawler.enqueue(u)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
crawler.every_html_page do |page|
|
222
|
+
extract_imgs_from_page(page).each do |i|
|
223
|
+
crawler.enqueue(i)
|
224
|
+
end
|
225
|
+
|
226
|
+
if options[:markup] && page.html?
|
227
|
+
validate(page.doc, page.body, page.url, options[:ignore])
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
if options[:not_found]
|
232
|
+
crawler.every_failed_url do |url|
|
233
|
+
not_found_error(url)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
234
238
|
end
|
235
239
|
end
|
@@ -6,7 +6,7 @@ module ValidateWebsite
|
|
6
6
|
# Document validation from DTD or XSD (webservice for html5)
|
7
7
|
class Validator
|
8
8
|
XHTML_PATH = File.expand_path('../../../data/schemas', __FILE__)
|
9
|
-
HTML5_VALIDATOR_SERVICE = 'http://
|
9
|
+
HTML5_VALIDATOR_SERVICE = 'http://validator.w3.org/nu/'
|
10
10
|
|
11
11
|
attr_reader :original_doc, :body, :dtd, :doc, :namespace, :xsd, :errors
|
12
12
|
|
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website-static
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 02/12/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE\-S" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE\-S" "1" "02/12/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -59,7 +59,7 @@ Ignore certain validation errors (ex:
|
|
59
59
|
Markup validation (Default: true)
|
60
60
|
.RE
|
61
61
|
.PP
|
62
|
-
\fB\-n\fR, \fB\-\-
|
62
|
+
\fB\-n\fR, \fB\-\-not\-found\fR
|
63
63
|
.RS 4
|
64
64
|
Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
|
65
65
|
.RE
|
@@ -106,4 +106,4 @@ Laurent Arnoud <laurent@spkdev\&.net>
|
|
106
106
|
.sp
|
107
107
|
The MIT License
|
108
108
|
.sp
|
109
|
-
Copyright (c) 2009\-
|
109
|
+
Copyright (c) 2009\-2015 Laurent Arnoud <laurent@spkdev\&.net>
|
data/man/man1/validate-website.1
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
.\" Title: validate-website
|
3
3
|
.\" Author: [see the "AUTHOR" section]
|
4
4
|
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
5
|
-
.\" Date:
|
5
|
+
.\" Date: 02/12/2015
|
6
6
|
.\" Manual: \ \&
|
7
7
|
.\" Source: \ \&
|
8
8
|
.\" Language: English
|
9
9
|
.\"
|
10
|
-
.TH "VALIDATE\-WEBSITE" "1" "
|
10
|
+
.TH "VALIDATE\-WEBSITE" "1" "02/12/2015" "\ \&" "\ \&"
|
11
11
|
.\" -----------------------------------------------------------------
|
12
12
|
.\" * Define some portability stuff
|
13
13
|
.\" -----------------------------------------------------------------
|
@@ -43,6 +43,11 @@ Website to crawl (Default:
|
|
43
43
|
http://localhost:3000/)
|
44
44
|
.RE
|
45
45
|
.PP
|
46
|
+
\fB\-u\fR, \fB\-\-user\-agent\fR \fIUSERAGENT\fR
|
47
|
+
.RS 4
|
48
|
+
Change user agent (Default: Spidr\&.user_agent)
|
49
|
+
.RE
|
50
|
+
.PP
|
46
51
|
\fB\-e\fR, \fB\-\-exclude\fR \fIEXCLUDE\fR
|
47
52
|
.RS 4
|
48
53
|
Url to exclude (ex:
|
@@ -65,7 +70,7 @@ Set defaults cookies
|
|
65
70
|
Markup validation (Default: true)
|
66
71
|
.RE
|
67
72
|
.PP
|
68
|
-
\fB\-n\fR, \fB\-\-
|
73
|
+
\fB\-n\fR, \fB\-\-not\-found\fR
|
69
74
|
.RS 4
|
70
75
|
Log not found url (Default: false)
|
71
76
|
.RE
|
@@ -112,4 +117,4 @@ Laurent Arnoud <laurent@spkdev\&.net>
|
|
112
117
|
.sp
|
113
118
|
The MIT License
|
114
119
|
.sp
|
115
|
-
Copyright (c) 2009\-
|
120
|
+
Copyright (c) 2009\-2015 Laurent Arnoud <laurent@spkdev\&.net>
|
data/spec/core_spec.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
require File.expand_path('../spec_helper', __FILE__)
|
3
3
|
|
4
4
|
describe ValidateWebsite::Core do
|
5
|
-
|
6
5
|
before do
|
7
6
|
WebMock.reset!
|
8
7
|
stub_request(:get, ValidateWebsite::Core::PING_URL).to_return(status: 200)
|
@@ -20,8 +19,10 @@ describe ValidateWebsite::Core do
|
|
20
19
|
|
21
20
|
describe 'options' do
|
22
21
|
it 'can change user-agent' do
|
23
|
-
ua =
|
24
|
-
|
22
|
+
ua = %{Linux / Firefox 29: Mozilla/5.0 (X11; Linux x86_64; rv:29.0) \
|
23
|
+
Gecko/20100101 Firefox/29.0}
|
24
|
+
v = ValidateWebsite::Core.new({ site: SPEC_DOMAIN, user_agent: ua },
|
25
|
+
:crawl)
|
25
26
|
v.crawl
|
26
27
|
v.crawler.user_agent.must_equal ua
|
27
28
|
end
|
@@ -30,7 +31,8 @@ describe ValidateWebsite::Core do
|
|
30
31
|
describe('cookies') do
|
31
32
|
it 'can set cookies' do
|
32
33
|
cookies = 'tz=Europe%2FBerlin; guid=ZcpBshbtStgl9VjwTofq'
|
33
|
-
v = ValidateWebsite::Core.new({site: SPEC_DOMAIN, cookies: cookies},
|
34
|
+
v = ValidateWebsite::Core.new({ site: SPEC_DOMAIN, cookies: cookies },
|
35
|
+
:crawl)
|
34
36
|
v.crawl
|
35
37
|
v.crawler.cookies.cookies_for_host(v.host).must_equal v.default_cookies
|
36
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|