validate-website 0.7.7 → 0.7.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/validate_website/core.rb +13 -0
- data/lib/validate_website/validator.rb +1 -1
- data/spec/core_spec.rb +1 -0
- data/spec/fakeweb_helper.rb +40 -48
- metadata +51 -38
- data/README.rdoc +0 -114
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 508617dba80034c3c180a97bb29ae4b6b20cd346
|
4
|
+
data.tar.gz: 8427dd8ee3e1e3e3de26a404b77820d6dcf6b27f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f3a236e0035c6b5fa3f6136066a28abfff550c4f621d37253680d678d38ef97deaee125c4a0cf9512b234549717fe8fb7847730c3df2faddaca417faf2c5f57f
|
7
|
+
data.tar.gz: 714e6b037aa011031762ced9cdde2083bb18ad39d7cda69883096bd6a016e40876857c32af586e8284187475797d86f783dd1c9150c41e5a984900b957ca4776
|
@@ -22,6 +22,8 @@ module ValidateWebsite
|
|
22
22
|
EXIT_FAILURE_NOT_FOUND = 65
|
23
23
|
EXIT_FAILURE_MARKUP_NOT_FOUND = 66
|
24
24
|
|
25
|
+
PING_URL = 'http://www.google.com/'
|
26
|
+
|
25
27
|
def initialize(options={}, validation_type=:crawl)
|
26
28
|
@markup_error = nil
|
27
29
|
@not_found_error = nil
|
@@ -50,6 +52,8 @@ module ValidateWebsite
|
|
50
52
|
opts = @options.merge(opts)
|
51
53
|
puts color(:note, "validating #{@site}", opts[:color]) unless opts[:quiet]
|
52
54
|
|
55
|
+
puts color(:warning, "No internet connection") unless internet_connection?
|
56
|
+
|
53
57
|
@anemone = Anemone.crawl(@site, opts) do |anemone|
|
54
58
|
anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
|
55
59
|
|
@@ -88,6 +92,15 @@ module ValidateWebsite
|
|
88
92
|
end
|
89
93
|
end
|
90
94
|
|
95
|
+
def internet_connection?
|
96
|
+
begin
|
97
|
+
true if open(ValidateWebsite::Core::PING_URL)
|
98
|
+
rescue
|
99
|
+
false
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
|
91
104
|
def crawl_static(opts={})
|
92
105
|
opts = @options.merge(opts)
|
93
106
|
puts color(:note, "validating #{@site}", opts[:color])
|
@@ -45,7 +45,7 @@ module ValidateWebsite
|
|
45
45
|
# TODO: use a local Java, Python parser... write a Ruby HTML5 parser ?
|
46
46
|
require 'net/http'
|
47
47
|
require 'multipart_body'
|
48
|
-
url = URI.parse('http://validator.nu/')
|
48
|
+
url = URI.parse('http://html5.validator.nu/')
|
49
49
|
multipart = MultipartBody.new(:content => document)
|
50
50
|
http = Net::HTTP.new(url.host)
|
51
51
|
headers = {
|
data/spec/core_spec.rb
CHANGED
data/spec/fakeweb_helper.rb
CHANGED
@@ -1,62 +1,54 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'fakeweb'
|
4
|
-
rescue LoadError
|
5
|
-
warn "You need the 'fakeweb' gem installed to test ValidateWebsite"
|
6
|
-
exit
|
7
|
-
end
|
2
|
+
require 'fakeweb'
|
8
3
|
|
9
4
|
FakeWeb.allow_net_connect = false
|
10
5
|
|
11
6
|
class FakePage
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
7
|
+
attr_accessor :links
|
8
|
+
attr_accessor :hrefs
|
9
|
+
attr_accessor :body
|
10
|
+
|
11
|
+
def initialize(name = '', options = {})
|
12
|
+
@name = name
|
13
|
+
@links = [options[:links]].flatten if options.has_key?(:links)
|
14
|
+
@hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
|
15
|
+
@redirect = options[:redirect] if options.has_key?(:redirect)
|
16
|
+
@content_type = options[:content_type] || "text/html"
|
17
|
+
@body = options[:body]
|
18
|
+
|
19
|
+
create_body unless @body
|
20
|
+
add_to_fakeweb
|
21
|
+
end
|
27
22
|
|
28
|
-
|
29
|
-
|
30
|
-
|
23
|
+
def url
|
24
|
+
SPEC_DOMAIN + @name
|
25
|
+
end
|
31
26
|
|
32
|
-
|
27
|
+
private
|
33
28
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
def add_to_fakeweb
|
42
|
-
options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
|
29
|
+
def create_body
|
30
|
+
@body = "<html><body>"
|
31
|
+
@links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
|
32
|
+
@hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
|
33
|
+
@body += "</body></html>"
|
34
|
+
end
|
43
35
|
|
44
|
-
|
45
|
-
|
36
|
+
def add_to_fakeweb
|
37
|
+
options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
|
46
38
|
|
47
|
-
|
48
|
-
|
49
|
-
options[:location] = redirect_url
|
39
|
+
if @redirect
|
40
|
+
options[:status] = [301, "Permanently Moved"]
|
50
41
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
:status => [200, "OK"]})
|
55
|
-
end
|
42
|
+
# only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
|
43
|
+
redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
|
44
|
+
options[:location] = redirect_url
|
56
45
|
|
57
|
-
|
46
|
+
# register the page this one redirects to
|
47
|
+
FakeWeb.register_uri(:get, redirect_url, {:body => '',
|
48
|
+
:content_type => @content_type,
|
49
|
+
:status => [200, "OK"]})
|
58
50
|
end
|
59
|
-
end
|
60
51
|
|
61
|
-
|
62
|
-
|
52
|
+
FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,82 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
5
|
-
prerelease:
|
4
|
+
version: 0.7.9
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Laurent Arnoud
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-03-18 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: anemone
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 0.6.1
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.1
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rainbow
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: 1.1.1
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.1.1
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: multipart_body
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - '>='
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: 0.2.1
|
44
48
|
type: :runtime
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.2.1
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: rake
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: 0.8.7
|
55
62
|
type: :development
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.8.7
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: minitest
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
|
-
- -
|
73
|
+
- - '>='
|
64
74
|
- !ruby/object:Gem::Version
|
65
75
|
version: 2.1.0
|
66
76
|
type: :development
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 2.1.0
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: fakeweb
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
|
-
- -
|
87
|
+
- - '>='
|
75
88
|
- !ruby/object:Gem::Version
|
76
89
|
version: 1.3.0
|
77
90
|
type: :development
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.3.0
|
80
97
|
description: validate-website is a web crawler for checking the markup validity with
|
81
98
|
XML Schema / DTD and not found urls.
|
82
99
|
email: laurent@spkdev.net
|
@@ -86,7 +103,6 @@ executables:
|
|
86
103
|
extensions: []
|
87
104
|
extra_rdoc_files: []
|
88
105
|
files:
|
89
|
-
- README.rdoc
|
90
106
|
- Rakefile
|
91
107
|
- LICENSE
|
92
108
|
- lib/validate_website.rb
|
@@ -199,27 +215,24 @@ files:
|
|
199
215
|
- data/schemas/xhtml-iframe-1.xsd
|
200
216
|
- data/schemas/xhtml-framework-1.xsd
|
201
217
|
- data/schemas/xhtml-basic10.xsd
|
202
|
-
-
|
203
|
-
|
204
|
-
- !binary |-
|
205
|
-
YmluL3ZhbGlkYXRlLXdlYnNpdGUtc3RhdGlj
|
218
|
+
- bin/validate-website
|
219
|
+
- bin/validate-website-static
|
206
220
|
homepage: http://github.com/spk/validate-website
|
207
221
|
licenses:
|
208
222
|
- MIT
|
223
|
+
metadata: {}
|
209
224
|
post_install_message:
|
210
225
|
rdoc_options: []
|
211
226
|
require_paths:
|
212
227
|
- lib
|
213
228
|
required_ruby_version: !ruby/object:Gem::Requirement
|
214
|
-
none: false
|
215
229
|
requirements:
|
216
|
-
- -
|
230
|
+
- - '>='
|
217
231
|
- !ruby/object:Gem::Version
|
218
232
|
version: '0'
|
219
233
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
|
-
none: false
|
221
234
|
requirements:
|
222
|
-
- -
|
235
|
+
- - '>='
|
223
236
|
- !ruby/object:Gem::Version
|
224
237
|
version: '0'
|
225
238
|
requirements:
|
@@ -227,9 +240,9 @@ requirements:
|
|
227
240
|
- rainbow
|
228
241
|
- multipart_body
|
229
242
|
rubyforge_project:
|
230
|
-
rubygems_version:
|
243
|
+
rubygems_version: 2.0.0
|
231
244
|
signing_key:
|
232
|
-
specification_version:
|
245
|
+
specification_version: 4
|
233
246
|
summary: Web crawler for checking the validity of your documents
|
234
247
|
test_files:
|
235
248
|
- spec/validator_spec.rb
|
data/README.rdoc
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
= validate-website - Web crawler for checking the validity of your documents
|
2
|
-
|
3
|
-
== INSTALLATION
|
4
|
-
|
5
|
-
=== Debian
|
6
|
-
|
7
|
-
aptitude install rubygems ruby-dev libxslt-dev libxml2-dev
|
8
|
-
|
9
|
-
=== RubyGems
|
10
|
-
|
11
|
-
gem install validate-website
|
12
|
-
|
13
|
-
== SYNOPSIS
|
14
|
-
|
15
|
-
validate-website [OPTIONS]
|
16
|
-
validate-website-static [OPTIONS]
|
17
|
-
|
18
|
-
== DESCRIPTION
|
19
|
-
|
20
|
-
validate-website is a web crawler for checking the markup validity with XML
|
21
|
-
Schema / DTD and not found urls (more info doc/validate-website.txt).
|
22
|
-
|
23
|
-
validate-website-static checks the markup validity of your local documents with
|
24
|
-
XML Schema / DTD (more info doc/validate-website-static.txt).
|
25
|
-
|
26
|
-
HTML5 support with Validator.nu Web Service.
|
27
|
-
|
28
|
-
== VALIDATE WEBSITE OPTIONS
|
29
|
-
|
30
|
-
-s, --site SITE
|
31
|
-
Website to crawl (Default: http://localhost:3000/)
|
32
|
-
-u, --user-agent USERAGENT
|
33
|
-
Change user agent (Default: Anemone/VERSION)
|
34
|
-
-e, --exclude EXCLUDE
|
35
|
-
Url to exclude (ex: redirect|news)
|
36
|
-
-i, --ignore-errors IGNORE
|
37
|
-
Ignore certain validation errors (ex: autocorrect)
|
38
|
-
-f, --file FILE
|
39
|
-
Save not well formed or not found (with -n used) urls
|
40
|
-
-c, --cookies COOKIES
|
41
|
-
Set defaults cookies
|
42
|
-
-m, --[no-]markup-validation
|
43
|
-
Markup validation (Default: true)
|
44
|
-
-n, --not-found
|
45
|
-
Log not found url (Default: false)
|
46
|
-
--[no-]color
|
47
|
-
Show colored output (Default: true)
|
48
|
-
-v, --verbose
|
49
|
-
Show detail of validator errors (Default: false).
|
50
|
-
-q, --quiet
|
51
|
-
Only report errors (Default: false).
|
52
|
-
-d, --debug
|
53
|
-
Show anemone log (Default: false)
|
54
|
-
-h, --help
|
55
|
-
Show help message and exit.
|
56
|
-
|
57
|
-
== EXIT STATUS
|
58
|
-
0::
|
59
|
-
Markup is valid and no 404 found.
|
60
|
-
64::
|
61
|
-
Not valid markup found.
|
62
|
-
65::
|
63
|
-
There are pages not found.
|
64
|
-
66::
|
65
|
-
There are not valid markup and pages not found.
|
66
|
-
|
67
|
-
== On your application
|
68
|
-
|
69
|
-
require 'validate_website/validator'
|
70
|
-
body = '<!DOCTYPE html><html></html>'
|
71
|
-
v = ValidateWebsite::Validator.new(Nokogiri::HTML(body), body)
|
72
|
-
v.valid? # => false
|
73
|
-
|
74
|
-
== With RSpec
|
75
|
-
|
76
|
-
On spec/spec_helper.rb:
|
77
|
-
|
78
|
-
require 'validate_website/validator'
|
79
|
-
require 'validate_website/rspec'
|
80
|
-
|
81
|
-
On your spec/controllers:
|
82
|
-
|
83
|
-
it 'should be valid' do
|
84
|
-
response.body.should be_w3c_valid
|
85
|
-
end
|
86
|
-
|
87
|
-
== REQUIREMENTS
|
88
|
-
|
89
|
-
See validate-website.gemspec file.
|
90
|
-
|
91
|
-
== CREDITS
|
92
|
-
|
93
|
-
* Thanks tenderlove for Nokogiri, this tool is inspired from markup_validity.
|
94
|
-
* And Chris Kite for Anemone web-spider framework.
|
95
|
-
|
96
|
-
== MORE INFO
|
97
|
-
|
98
|
-
The HTML5 support is done by using the Validator.nu Web Service, so the content
|
99
|
-
of your webpage is logged by a tier. It's not the case for other validation
|
100
|
-
because validate-website use the XML Schema or DTD stored on the data/ directory.
|
101
|
-
|
102
|
-
Please read http://about.validator.nu/#tos for more info on the HTML5
|
103
|
-
validation service.
|
104
|
-
|
105
|
-
== CONTRIBUTORS
|
106
|
-
|
107
|
-
* François de Metz (francois2metz)
|
108
|
-
* Bruno Michel (nono)
|
109
|
-
* Matt Brictson (mbrictson)
|
110
|
-
|
111
|
-
== LICENSE
|
112
|
-
The MIT License
|
113
|
-
|
114
|
-
Copyright (c) 2009-2012 Laurent Arnoud <laurent@spkdev.net>
|