w3clove 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +15 -0
- data/bin/w3clove +4 -2
- data/lib/w3clove/page.rb +11 -4
- data/lib/w3clove/sitemap.rb +6 -4
- data/lib/w3clove/validator.rb +2 -2
- data/lib/w3clove/version.rb +1 -1
- data/w3clove.gemspec +1 -1
- metadata +8 -8
data/README.rdoc
CHANGED
@@ -22,6 +22,21 @@ This will validate all the internal URLs found on the starting URL, up to a maxi
|
|
22
22
|
|
23
23
|
You can pass w3clove an XML sitemap or the URL of a website; it will scrape it in search of URLs to validate.
|
24
24
|
|
25
|
+
= Timeouts:
|
26
|
+
|
27
|
+
By default, the w3clove gem will set a 20 seconds timeout for each individual request. If you want to set a different timeout, pass it as a third parameter like this:
|
28
|
+
|
29
|
+
w3clove http://ryanair.com/sitemap.xml report.html 60
|
30
|
+
|
31
|
+
= Using an alternate validation server
|
32
|
+
|
33
|
+
By default, the w3clove gem will use the official W3C Validator server at http://validator.w3.org but you can use an alternate server if you want.
|
34
|
+
To do this, define an environment variable on your machine, like:
|
35
|
+
|
36
|
+
ENV['W3C_MARKUP_VALIDATOR_URI'] = 'http://example.com/validator'
|
37
|
+
|
38
|
+
Follow this guide to know how to setup your own validation server: https://github.com/tlvince/w3c-validator-guide
|
39
|
+
|
25
40
|
= Notes:
|
26
41
|
|
27
42
|
This gem requires Ruby 1.9, and has been tested on Ruby 1.9.2-p0
|
data/bin/w3clove
CHANGED
@@ -4,10 +4,12 @@
|
|
4
4
|
require_relative '../lib/w3clove'
|
5
5
|
|
6
6
|
begin
|
7
|
-
if ARGV.length
|
8
|
-
W3Clove::Validator.check(ARGV[0], ARGV[1])
|
7
|
+
if ARGV.length >= 2
|
8
|
+
W3Clove::Validator.check(ARGV[0], ARGV[1], ARGV[2])
|
9
9
|
else
|
10
10
|
puts "USAGE: w3clove url_of_sitemap output_file.html"
|
11
|
+
puts " OR "
|
12
|
+
puts "USAGE: w3clove url_of_sitemap output_file.html timeout_in_seconds"
|
11
13
|
end
|
12
14
|
rescue
|
13
15
|
puts "There was an error processing your request"
|
data/lib/w3clove/page.rb
CHANGED
@@ -10,10 +10,11 @@ module W3Clove
|
|
10
10
|
# In case of an exception happens when validating, it is tracked
|
11
11
|
#
|
12
12
|
class Page
|
13
|
-
attr_accessor :url, :exception
|
13
|
+
attr_accessor :url, :timeout, :exception
|
14
14
|
|
15
|
-
def initialize(url)
|
16
|
-
@url
|
15
|
+
def initialize(url, timeout = 20)
|
16
|
+
@url = url
|
17
|
+
@timeout = timeout
|
17
18
|
end
|
18
19
|
|
19
20
|
##
|
@@ -61,7 +62,13 @@ module W3Clove
|
|
61
62
|
##
|
62
63
|
# Gets the validations for this page, ensuring it times out soon
|
63
64
|
def validations
|
64
|
-
@validations ||= Timeout::timeout(
|
65
|
+
@validations ||= Timeout::timeout(timeout) { markup_validator.validate_uri(url) }
|
66
|
+
end
|
67
|
+
|
68
|
+
##
|
69
|
+
# Returns an instance of MarkupValidator, with the URL set to the one in ENV or its default
|
70
|
+
def markup_validator
|
71
|
+
@markup_validator ||= MarkupValidator.new(:validator_uri => ENV['W3C_MARKUP_VALIDATOR_URI'] || 'http://validator.w3.org/check')
|
65
72
|
end
|
66
73
|
end
|
67
74
|
end
|
data/lib/w3clove/sitemap.rb
CHANGED
@@ -3,16 +3,18 @@
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'nokogiri'
|
5
5
|
require 'metainspector'
|
6
|
+
require 'timeout'
|
6
7
|
|
7
8
|
module W3Clove
|
8
9
|
##
|
9
10
|
# A sitemap has an URL, and holds a collection of pages to be validated
|
10
11
|
#
|
11
12
|
class Sitemap
|
12
|
-
attr_accessor :url
|
13
|
+
attr_accessor :url, :timeout
|
13
14
|
|
14
|
-
def initialize(url)
|
15
|
-
@url
|
15
|
+
def initialize(url, timeout = 20)
|
16
|
+
@url = url
|
17
|
+
@timeout = timeout
|
16
18
|
end
|
17
19
|
|
18
20
|
##
|
@@ -53,7 +55,7 @@ module W3Clove
|
|
53
55
|
def pages_in_sitemap
|
54
56
|
pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
|
55
57
|
if pages.empty?
|
56
|
-
m = MetaInspector.new(url)
|
58
|
+
m = MetaInspector.new(url, timeout)
|
57
59
|
links = m.absolute_links.select {|l| l.start_with?(m.url) && looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq
|
58
60
|
links << m.url unless (links.include?(m.url) || links.include?("#{m.url}/"))
|
59
61
|
pages = links.map {|link| W3Clove::Page.new(link)}
|
data/lib/w3clove/validator.rb
CHANGED
@@ -15,8 +15,8 @@ module W3Clove
|
|
15
15
|
# Shows progress on dot-style (...F...FFE..). A dot is a valid page,
|
16
16
|
# an F is a page with errors, and an E is an exception
|
17
17
|
# After the checking is done, a detailed summary is written to filename
|
18
|
-
def check(url, filename)
|
19
|
-
sitemap = W3Clove::Sitemap.new(url)
|
18
|
+
def check(url, filename, timeout)
|
19
|
+
sitemap = W3Clove::Sitemap.new(url, timeout.to_f)
|
20
20
|
say "Validating #{sitemap.pages.length} pages"
|
21
21
|
|
22
22
|
sitemap.pages.each do |page|
|
data/lib/w3clove/version.rb
CHANGED
data/w3clove.gemspec
CHANGED
@@ -18,7 +18,7 @@ and outputs a detailed report with all errors and warnings}
|
|
18
18
|
|
19
19
|
gem.add_dependency 'w3c_validators', '~> 1.2'
|
20
20
|
gem.add_dependency 'nokogiri', '~> 1.5.3'
|
21
|
-
gem.add_dependency 'metainspector', '
|
21
|
+
gem.add_dependency 'metainspector', '1.9.1'
|
22
22
|
|
23
23
|
gem.add_development_dependency 'rspec', '~> 2.5.0'
|
24
24
|
gem.add_development_dependency 'mocha', '~> 0.11.4'
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: w3clove
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 7
|
9
|
-
-
|
10
|
-
version: 0.7.
|
9
|
+
- 2
|
10
|
+
version: 0.7.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-07-12 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: w3c_validators
|
@@ -54,14 +54,14 @@ dependencies:
|
|
54
54
|
requirement: &id003 !ruby/object:Gem::Requirement
|
55
55
|
none: false
|
56
56
|
requirements:
|
57
|
-
- -
|
57
|
+
- - "="
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
hash:
|
59
|
+
hash: 49
|
60
60
|
segments:
|
61
61
|
- 1
|
62
62
|
- 9
|
63
|
-
-
|
64
|
-
version: 1.9.
|
63
|
+
- 1
|
64
|
+
version: 1.9.1
|
65
65
|
type: :runtime
|
66
66
|
version_requirements: *id003
|
67
67
|
- !ruby/object:Gem::Dependency
|