site_validator 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4757d9e89c4e2e314b5b235dcba1d39ebfce20e6
4
- data.tar.gz: 3e7afcda9986403a293f422dfbf5e925e4516cb7
3
+ metadata.gz: d4dbe8f5a7a4fff1b83b982a23ccf67918e06723
4
+ data.tar.gz: b7bf3908fd7ae4d1ede2362161589b9428632e73
5
5
  SHA512:
6
- metadata.gz: c5e1ee9640cfbf9a273c63b0ee0c2f0f54f4cb6770e45f535d70ad079142d42f2b66c132317d4c299270246112a0212076120ba5a2b218893aae3fbc090e84e8
7
- data.tar.gz: b4990b9aa1ffc3e279eb9f0f86ba409f7d13ec8e1f99e7cd60c6d294a70d5887447d6eeef3d82412e74f6108b38d5b0bb0555913a772906751ac24ef2c10519b
6
+ metadata.gz: a44be80ecbcc4da84f1f86ad809f7ef8467c1e9d06ca6911d203b71b7abb758c68a43776f84af3743ebc70dd05c1e97540c155e1d9d902f23e5ee8e2f9079bcf
7
+ data.tar.gz: 37b1fb6c80178ee3d9c98cb4abec0d7b2a9ab4a90e9f2960cd0c3a02e2a4840b97699c5881069300e40a1626e396f85edf7106ae420f4349fe0a922598166149
data/README.md CHANGED
@@ -24,9 +24,9 @@ This will validate all the internal URLs found on the starting URL, up to a maxi
24
24
 
25
25
  You can pass site_validator an XML sitemap or the URL of a website; it will scrape it in search of URLs to validate.
26
26
 
27
- ##Timeouts
27
+ ##Max pages
28
28
 
29
- By default, site_validator will set a 20 seconds timeout for each individual request. If you want to set a different timeout, pass it as a third parameter like this:
29
+ By default, site_validator will validate up to 100 pages per sitemap. If you want to set a different value, pass it as a third parameter like this:
30
30
 
31
31
  site_validator http://validationhell.com report.html 60
32
32
 
@@ -9,8 +9,8 @@ begin
9
9
  else
10
10
  puts "USAGE: site_validator url_of_sitemap output_file.html"
11
11
  puts " OR "
12
- puts "USAGE: site_validator url_of_sitemap output_file.html timeout_in_seconds"
12
+ puts "USAGE: site_validator url_of_sitemap output_file.html max_pages"
13
13
  end
14
14
  rescue
15
15
  puts "There was an error processing your request"
16
- end
16
+ end
@@ -2,24 +2,23 @@
2
2
 
3
3
  require 'nokogiri'
4
4
  require 'metainspector'
5
- require 'timeout'
6
5
 
7
6
  module SiteValidator
8
7
  ##
9
8
  # A sitemap has an URL, and holds a collection of pages to be validated
10
9
  #
11
10
  class Sitemap
12
- attr_accessor :url, :timeout
11
+ attr_accessor :url, :max_pages
13
12
 
14
- def initialize(url, timeout = 20)
15
- @url = url
16
- @timeout = timeout
13
+ def initialize(url, max_pages = 100)
14
+ @url = url
15
+ @max_pages = max_pages
17
16
  end
18
17
 
19
18
  ##
20
19
  # Returns the first 250 unique URLs from the sitemap
21
20
  def pages
22
- @pages ||= pages_in_sitemap.uniq {|p| p.url}[0..249]
21
+ @pages ||= pages_in_sitemap.uniq {|p| p.url}[0..max_pages-1]
23
22
  end
24
23
 
25
24
  ##
@@ -55,7 +54,7 @@ module SiteValidator
55
54
  pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
56
55
 
57
56
  if pages.empty?
58
- m = MetaInspector.new(url, :timeout => timeout, :allow_redirections => :all)
57
+ m = MetaInspector.new(url, :timeout => 20, :allow_redirections => :all)
59
58
  links = [m.url]
60
59
 
61
60
  m.internal_links.select {|l| looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq.each do |link|
@@ -15,8 +15,8 @@ module SiteValidator
15
15
  # Shows progress on dot-style (...F...FFE..). A dot is a valid page,
16
16
  # an F is a page with errors, and an E is an exception
17
17
  # After the checking is done, a detailed summary is written to filename
18
- def check(url, filename, timeout)
19
- sitemap = SiteValidator::Sitemap.new(url, timeout.to_f)
18
+ def check(url, filename, max_pages)
19
+ sitemap = SiteValidator::Sitemap.new(url, max_pages.to_i)
20
20
  say "Validating #{sitemap.pages.length} pages"
21
21
 
22
22
  sitemap.pages.each do |page|
@@ -45,4 +45,4 @@ module SiteValidator
45
45
  printer.print text
46
46
  end
47
47
  end
48
- end
48
+ end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module SiteValidator
4
- VERSION = "1.4.1"
4
+ VERSION = "1.5.0"
5
5
  end
@@ -184,6 +184,11 @@ describe SiteValidator::Sitemap do
184
184
  @sitemap_with_protocol_relative.pages.map {|p| p.url}.should_not include 'http://yahoo.com'
185
185
  end
186
186
  end
187
+
188
+ it "should be able to limit the number of pages" do
189
+ sitemap = SiteValidator::Sitemap.new('http://guides.rubyonrails.org', 10)
190
+ sitemap.pages.length.should == 10
191
+ end
187
192
  end
188
193
 
189
194
  describe "validations" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_validator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-20 00:00:00.000000000 Z
11
+ date: 2014-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: w3c_validators