w3clove 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -22,6 +22,21 @@ This will validate all the internal URLs found on the starting URL, up to a maxi
22
22
 
23
23
  You can pass w3clove an XML sitemap or the URL of a website; it will scrape it in search of URLs to validate.
24
24
 
25
+ = Timeouts:
26
+
27
+ By default, the w3clove gem will set a 20 seconds timeout for each individual request. If you want to set a different timeout, pass it as a third parameter like this:
28
+
29
+ w3clove http://ryanair.com/sitemap.xml report.html 60
30
+
31
+ = Using an alternate validation server
32
+
33
+ By default, the w3clove gem will use the official W3C Validator server at http://validator.w3.org but you can use an alternate server if you want.
34
+ To do this, define an environment variable on your machine, like:
35
+
36
+ ENV['W3C_MARKUP_VALIDATOR_URI'] = 'http://example.com/validator'
37
+
38
+ Follow this guide to know how to setup your own validation server: https://github.com/tlvince/w3c-validator-guide
39
+
25
40
  = Notes:
26
41
 
27
42
  This gem requires Ruby 1.9, and has been tested on Ruby 1.9.2-p0
data/bin/w3clove CHANGED
@@ -4,10 +4,12 @@
4
4
  require_relative '../lib/w3clove'
5
5
 
6
6
  begin
7
- if ARGV.length == 2
8
- W3Clove::Validator.check(ARGV[0], ARGV[1])
7
+ if ARGV.length >= 2
8
+ W3Clove::Validator.check(ARGV[0], ARGV[1], ARGV[2])
9
9
  else
10
10
  puts "USAGE: w3clove url_of_sitemap output_file.html"
11
+ puts " OR "
12
+ puts "USAGE: w3clove url_of_sitemap output_file.html timeout_in_seconds"
11
13
  end
12
14
  rescue
13
15
  puts "There was an error processing your request"
data/lib/w3clove/page.rb CHANGED
@@ -10,10 +10,11 @@ module W3Clove
10
10
  # In case of an exception happens when validating, it is tracked
11
11
  #
12
12
  class Page
13
- attr_accessor :url, :exception
13
+ attr_accessor :url, :timeout, :exception
14
14
 
15
- def initialize(url)
16
- @url = url
15
+ def initialize(url, timeout = 20)
16
+ @url = url
17
+ @timeout = timeout
17
18
  end
18
19
 
19
20
  ##
@@ -61,7 +62,13 @@ module W3Clove
61
62
  ##
62
63
  # Gets the validations for this page, ensuring it times out soon
63
64
  def validations
64
- @validations ||= Timeout::timeout(10) { MarkupValidator.new.validate_uri(url) }
65
+ @validations ||= Timeout::timeout(timeout) { markup_validator.validate_uri(url) }
66
+ end
67
+
68
+ ##
69
+ # Returns an instance of MarkupValidator, with the URL set to the one in ENV or its default
70
+ def markup_validator
71
+ @markup_validator ||= MarkupValidator.new(:validator_uri => ENV['W3C_MARKUP_VALIDATOR_URI'] || 'http://validator.w3.org/check')
65
72
  end
66
73
  end
67
74
  end
@@ -3,16 +3,18 @@
3
3
  require 'open-uri'
4
4
  require 'nokogiri'
5
5
  require 'metainspector'
6
+ require 'timeout'
6
7
 
7
8
  module W3Clove
8
9
  ##
9
10
  # A sitemap has an URL, and holds a collection of pages to be validated
10
11
  #
11
12
  class Sitemap
12
- attr_accessor :url
13
+ attr_accessor :url, :timeout
13
14
 
14
- def initialize(url)
15
- @url = url
15
+ def initialize(url, timeout = 20)
16
+ @url = url
17
+ @timeout = timeout
16
18
  end
17
19
 
18
20
  ##
@@ -53,7 +55,7 @@ module W3Clove
53
55
  def pages_in_sitemap
54
56
  pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
55
57
  if pages.empty?
56
- m = MetaInspector.new(url)
58
+ m = MetaInspector.new(url, timeout)
57
59
  links = m.absolute_links.select {|l| l.start_with?(m.url) && looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq
58
60
  links << m.url unless (links.include?(m.url) || links.include?("#{m.url}/"))
59
61
  pages = links.map {|link| W3Clove::Page.new(link)}
@@ -15,8 +15,8 @@ module W3Clove
15
15
  # Shows progress on dot-style (...F...FFE..). A dot is a valid page,
16
16
  # an F is a page with errors, and an E is an exception
17
17
  # After the checking is done, a detailed summary is written to filename
18
- def check(url, filename)
19
- sitemap = W3Clove::Sitemap.new(url)
18
+ def check(url, filename, timeout)
19
+ sitemap = W3Clove::Sitemap.new(url, timeout.to_f)
20
20
  say "Validating #{sitemap.pages.length} pages"
21
21
 
22
22
  sitemap.pages.each do |page|
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module W3Clove
4
- VERSION = "0.7.1"
4
+ VERSION = "0.7.2"
5
5
  end
data/w3clove.gemspec CHANGED
@@ -18,7 +18,7 @@ and outputs a detailed report with all errors and warnings}
18
18
 
19
19
  gem.add_dependency 'w3c_validators', '~> 1.2'
20
20
  gem.add_dependency 'nokogiri', '~> 1.5.3'
21
- gem.add_dependency 'metainspector', '~> 1.9.0'
21
+ gem.add_dependency 'metainspector', '1.9.1'
22
22
 
23
23
  gem.add_development_dependency 'rspec', '~> 2.5.0'
24
24
  gem.add_development_dependency 'mocha', '~> 0.11.4'
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: w3clove
3
3
  version: !ruby/object:Gem::Version
4
- hash: 1
4
+ hash: 7
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 7
9
- - 1
10
- version: 0.7.1
9
+ - 2
10
+ version: 0.7.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-06-22 00:00:00 Z
18
+ date: 2012-07-12 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: w3c_validators
@@ -54,14 +54,14 @@ dependencies:
54
54
  requirement: &id003 !ruby/object:Gem::Requirement
55
55
  none: false
56
56
  requirements:
57
- - - ~>
57
+ - - "="
58
58
  - !ruby/object:Gem::Version
59
- hash: 51
59
+ hash: 49
60
60
  segments:
61
61
  - 1
62
62
  - 9
63
- - 0
64
- version: 1.9.0
63
+ - 1
64
+ version: 1.9.1
65
65
  type: :runtime
66
66
  version_requirements: *id003
67
67
  - !ruby/object:Gem::Dependency