site_validator 1.4.1 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4757d9e89c4e2e314b5b235dcba1d39ebfce20e6
4
- data.tar.gz: 3e7afcda9986403a293f422dfbf5e925e4516cb7
3
+ metadata.gz: d4dbe8f5a7a4fff1b83b982a23ccf67918e06723
4
+ data.tar.gz: b7bf3908fd7ae4d1ede2362161589b9428632e73
5
5
  SHA512:
6
- metadata.gz: c5e1ee9640cfbf9a273c63b0ee0c2f0f54f4cb6770e45f535d70ad079142d42f2b66c132317d4c299270246112a0212076120ba5a2b218893aae3fbc090e84e8
7
- data.tar.gz: b4990b9aa1ffc3e279eb9f0f86ba409f7d13ec8e1f99e7cd60c6d294a70d5887447d6eeef3d82412e74f6108b38d5b0bb0555913a772906751ac24ef2c10519b
6
+ metadata.gz: a44be80ecbcc4da84f1f86ad809f7ef8467c1e9d06ca6911d203b71b7abb758c68a43776f84af3743ebc70dd05c1e97540c155e1d9d902f23e5ee8e2f9079bcf
7
+ data.tar.gz: 37b1fb6c80178ee3d9c98cb4abec0d7b2a9ab4a90e9f2960cd0c3a02e2a4840b97699c5881069300e40a1626e396f85edf7106ae420f4349fe0a922598166149
data/README.md CHANGED
@@ -24,9 +24,9 @@ This will validate all the internal URLs found on the starting URL, up to a maxi
24
24
 
25
25
  You can pass site_validator an XML sitemap or the URL of a website; it will scrape it in search of URLs to validate.
26
26
 
27
- ##Timeouts
27
+ ##Max pages
28
28
 
29
- By default, site_validator will set a 20 seconds timeout for each individual request. If you want to set a different timeout, pass it as a third parameter like this:
29
+ By default, site_validator will validate up to 100 pages per sitemap. If you want to set a different value, pass it as a third parameter like this:
30
30
 
31
31
  site_validator http://validationhell.com report.html 60
32
32
 
@@ -9,8 +9,8 @@ begin
9
9
  else
10
10
  puts "USAGE: site_validator url_of_sitemap output_file.html"
11
11
  puts " OR "
12
- puts "USAGE: site_validator url_of_sitemap output_file.html timeout_in_seconds"
12
+ puts "USAGE: site_validator url_of_sitemap output_file.html max_pages"
13
13
  end
14
14
  rescue
15
15
  puts "There was an error processing your request"
16
- end
16
+ end
@@ -2,24 +2,23 @@
2
2
 
3
3
  require 'nokogiri'
4
4
  require 'metainspector'
5
- require 'timeout'
6
5
 
7
6
  module SiteValidator
8
7
  ##
9
8
  # A sitemap has an URL, and holds a collection of pages to be validated
10
9
  #
11
10
  class Sitemap
12
- attr_accessor :url, :timeout
11
+ attr_accessor :url, :max_pages
13
12
 
14
- def initialize(url, timeout = 20)
15
- @url = url
16
- @timeout = timeout
13
+ def initialize(url, max_pages = 100)
14
+ @url = url
15
+ @max_pages = max_pages
17
16
  end
18
17
 
19
18
  ##
20
19
  # Returns the first 250 unique URLs from the sitemap
21
20
  def pages
22
- @pages ||= pages_in_sitemap.uniq {|p| p.url}[0..249]
21
+ @pages ||= pages_in_sitemap.uniq {|p| p.url}[0..max_pages-1]
23
22
  end
24
23
 
25
24
  ##
@@ -55,7 +54,7 @@ module SiteValidator
55
54
  pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
56
55
 
57
56
  if pages.empty?
58
- m = MetaInspector.new(url, :timeout => timeout, :allow_redirections => :all)
57
+ m = MetaInspector.new(url, :timeout => 20, :allow_redirections => :all)
59
58
  links = [m.url]
60
59
 
61
60
  m.internal_links.select {|l| looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq.each do |link|
@@ -15,8 +15,8 @@ module SiteValidator
15
15
  # Shows progress on dot-style (...F...FFE..). A dot is a valid page,
16
16
  # an F is a page with errors, and an E is an exception
17
17
  # After the checking is done, a detailed summary is written to filename
18
- def check(url, filename, timeout)
19
- sitemap = SiteValidator::Sitemap.new(url, timeout.to_f)
18
+ def check(url, filename, max_pages)
19
+ sitemap = SiteValidator::Sitemap.new(url, max_pages.to_i)
20
20
  say "Validating #{sitemap.pages.length} pages"
21
21
 
22
22
  sitemap.pages.each do |page|
@@ -45,4 +45,4 @@ module SiteValidator
45
45
  printer.print text
46
46
  end
47
47
  end
48
- end
48
+ end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module SiteValidator
4
- VERSION = "1.4.1"
4
+ VERSION = "1.5.0"
5
5
  end
@@ -184,6 +184,11 @@ describe SiteValidator::Sitemap do
184
184
  @sitemap_with_protocol_relative.pages.map {|p| p.url}.should_not include 'http://yahoo.com'
185
185
  end
186
186
  end
187
+
188
+ it "should be able to limit the number of pages" do
189
+ sitemap = SiteValidator::Sitemap.new('http://guides.rubyonrails.org', 10)
190
+ sitemap.pages.length.should == 10
191
+ end
187
192
  end
188
193
 
189
194
  describe "validations" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_validator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-20 00:00:00.000000000 Z
11
+ date: 2014-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: w3c_validators