w3clove 0.7.1 → 0.7.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +15 -0
- data/bin/w3clove +4 -2
- data/lib/w3clove/page.rb +11 -4
- data/lib/w3clove/sitemap.rb +6 -4
- data/lib/w3clove/validator.rb +2 -2
- data/lib/w3clove/version.rb +1 -1
- data/w3clove.gemspec +1 -1
- metadata +8 -8
data/README.rdoc
CHANGED
@@ -22,6 +22,21 @@ This will validate all the internal URLs found on the starting URL, up to a maxi
|
|
22
22
|
|
23
23
|
You can pass w3clove an XML sitemap or the URL of a website; it will scrape it in search of URLs to validate.
|
24
24
|
|
25
|
+
= Timeouts:
|
26
|
+
|
27
|
+
By default, the w3clove gem will set a 20 seconds timeout for each individual request. If you want to set a different timeout, pass it as a third parameter like this:
|
28
|
+
|
29
|
+
w3clove http://ryanair.com/sitemap.xml report.html 60
|
30
|
+
|
31
|
+
= Using an alternate validation server
|
32
|
+
|
33
|
+
By default, the w3clove gem will use the official W3C Validator server at http://validator.w3.org but you can use an alternate server if you want.
|
34
|
+
To do this, define an environment variable on your machine, like:
|
35
|
+
|
36
|
+
ENV['W3C_MARKUP_VALIDATOR_URI'] = 'http://example.com/validator'
|
37
|
+
|
38
|
+
Follow this guide to know how to setup your own validation server: https://github.com/tlvince/w3c-validator-guide
|
39
|
+
|
25
40
|
= Notes:
|
26
41
|
|
27
42
|
This gem requires Ruby 1.9, and has been tested on Ruby 1.9.2-p0
|
data/bin/w3clove
CHANGED
@@ -4,10 +4,12 @@
|
|
4
4
|
require_relative '../lib/w3clove'
|
5
5
|
|
6
6
|
begin
|
7
|
-
if ARGV.length
|
8
|
-
W3Clove::Validator.check(ARGV[0], ARGV[1])
|
7
|
+
if ARGV.length >= 2
|
8
|
+
W3Clove::Validator.check(ARGV[0], ARGV[1], ARGV[2])
|
9
9
|
else
|
10
10
|
puts "USAGE: w3clove url_of_sitemap output_file.html"
|
11
|
+
puts " OR "
|
12
|
+
puts "USAGE: w3clove url_of_sitemap output_file.html timeout_in_seconds"
|
11
13
|
end
|
12
14
|
rescue
|
13
15
|
puts "There was an error processing your request"
|
data/lib/w3clove/page.rb
CHANGED
@@ -10,10 +10,11 @@ module W3Clove
|
|
10
10
|
# In case of an exception happens when validating, it is tracked
|
11
11
|
#
|
12
12
|
class Page
|
13
|
-
attr_accessor :url, :exception
|
13
|
+
attr_accessor :url, :timeout, :exception
|
14
14
|
|
15
|
-
def initialize(url)
|
16
|
-
@url
|
15
|
+
def initialize(url, timeout = 20)
|
16
|
+
@url = url
|
17
|
+
@timeout = timeout
|
17
18
|
end
|
18
19
|
|
19
20
|
##
|
@@ -61,7 +62,13 @@ module W3Clove
|
|
61
62
|
##
|
62
63
|
# Gets the validations for this page, ensuring it times out soon
|
63
64
|
def validations
|
64
|
-
@validations ||= Timeout::timeout(
|
65
|
+
@validations ||= Timeout::timeout(timeout) { markup_validator.validate_uri(url) }
|
66
|
+
end
|
67
|
+
|
68
|
+
##
|
69
|
+
# Returns an instance of MarkupValidator, with the URL set to the one in ENV or its default
|
70
|
+
def markup_validator
|
71
|
+
@markup_validator ||= MarkupValidator.new(:validator_uri => ENV['W3C_MARKUP_VALIDATOR_URI'] || 'http://validator.w3.org/check')
|
65
72
|
end
|
66
73
|
end
|
67
74
|
end
|
data/lib/w3clove/sitemap.rb
CHANGED
@@ -3,16 +3,18 @@
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'nokogiri'
|
5
5
|
require 'metainspector'
|
6
|
+
require 'timeout'
|
6
7
|
|
7
8
|
module W3Clove
|
8
9
|
##
|
9
10
|
# A sitemap has an URL, and holds a collection of pages to be validated
|
10
11
|
#
|
11
12
|
class Sitemap
|
12
|
-
attr_accessor :url
|
13
|
+
attr_accessor :url, :timeout
|
13
14
|
|
14
|
-
def initialize(url)
|
15
|
-
@url
|
15
|
+
def initialize(url, timeout = 20)
|
16
|
+
@url = url
|
17
|
+
@timeout = timeout
|
16
18
|
end
|
17
19
|
|
18
20
|
##
|
@@ -53,7 +55,7 @@ module W3Clove
|
|
53
55
|
def pages_in_sitemap
|
54
56
|
pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
|
55
57
|
if pages.empty?
|
56
|
-
m = MetaInspector.new(url)
|
58
|
+
m = MetaInspector.new(url, timeout)
|
57
59
|
links = m.absolute_links.select {|l| l.start_with?(m.url) && looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq
|
58
60
|
links << m.url unless (links.include?(m.url) || links.include?("#{m.url}/"))
|
59
61
|
pages = links.map {|link| W3Clove::Page.new(link)}
|
data/lib/w3clove/validator.rb
CHANGED
@@ -15,8 +15,8 @@ module W3Clove
|
|
15
15
|
# Shows progress on dot-style (...F...FFE..). A dot is a valid page,
|
16
16
|
# an F is a page with errors, and an E is an exception
|
17
17
|
# After the checking is done, a detailed summary is written to filename
|
18
|
-
def check(url, filename)
|
19
|
-
sitemap = W3Clove::Sitemap.new(url)
|
18
|
+
def check(url, filename, timeout)
|
19
|
+
sitemap = W3Clove::Sitemap.new(url, timeout.to_f)
|
20
20
|
say "Validating #{sitemap.pages.length} pages"
|
21
21
|
|
22
22
|
sitemap.pages.each do |page|
|
data/lib/w3clove/version.rb
CHANGED
data/w3clove.gemspec
CHANGED
@@ -18,7 +18,7 @@ and outputs a detailed report with all errors and warnings}
|
|
18
18
|
|
19
19
|
gem.add_dependency 'w3c_validators', '~> 1.2'
|
20
20
|
gem.add_dependency 'nokogiri', '~> 1.5.3'
|
21
|
-
gem.add_dependency 'metainspector', '
|
21
|
+
gem.add_dependency 'metainspector', '1.9.1'
|
22
22
|
|
23
23
|
gem.add_development_dependency 'rspec', '~> 2.5.0'
|
24
24
|
gem.add_development_dependency 'mocha', '~> 0.11.4'
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: w3clove
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 7
|
9
|
-
-
|
10
|
-
version: 0.7.
|
9
|
+
- 2
|
10
|
+
version: 0.7.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-07-12 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: w3c_validators
|
@@ -54,14 +54,14 @@ dependencies:
|
|
54
54
|
requirement: &id003 !ruby/object:Gem::Requirement
|
55
55
|
none: false
|
56
56
|
requirements:
|
57
|
-
- -
|
57
|
+
- - "="
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
hash:
|
59
|
+
hash: 49
|
60
60
|
segments:
|
61
61
|
- 1
|
62
62
|
- 9
|
63
|
-
-
|
64
|
-
version: 1.9.
|
63
|
+
- 1
|
64
|
+
version: 1.9.1
|
65
65
|
type: :runtime
|
66
66
|
version_requirements: *id003
|
67
67
|
- !ruby/object:Gem::Dependency
|