site_validator 1.4.1 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/bin/site_validator +2 -2
- data/lib/site_validator/sitemap.rb +6 -7
- data/lib/site_validator/validator.rb +3 -3
- data/lib/site_validator/version.rb +1 -1
- data/spec/sitemap_spec.rb +5 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4dbe8f5a7a4fff1b83b982a23ccf67918e06723
|
4
|
+
data.tar.gz: b7bf3908fd7ae4d1ede2362161589b9428632e73
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a44be80ecbcc4da84f1f86ad809f7ef8467c1e9d06ca6911d203b71b7abb758c68a43776f84af3743ebc70dd05c1e97540c155e1d9d902f23e5ee8e2f9079bcf
|
7
|
+
data.tar.gz: 37b1fb6c80178ee3d9c98cb4abec0d7b2a9ab4a90e9f2960cd0c3a02e2a4840b97699c5881069300e40a1626e396f85edf7106ae420f4349fe0a922598166149
|
data/README.md
CHANGED
@@ -24,9 +24,9 @@ This will validate all the internal URLs found on the starting URL, up to a maxi
|
|
24
24
|
|
25
25
|
You can pass site_validator an XML sitemap or the URL of a website; it will scrape it in search of URLs to validate.
|
26
26
|
|
27
|
-
##
|
27
|
+
##Max pages
|
28
28
|
|
29
|
-
By default, site_validator will
|
29
|
+
By default, site_validator will validate up to 100 pages per sitemap. If you want to set a different value, pass it as a third parameter like this:
|
30
30
|
|
31
31
|
site_validator http://validationhell.com report.html 60
|
32
32
|
|
data/bin/site_validator
CHANGED
@@ -9,8 +9,8 @@ begin
|
|
9
9
|
else
|
10
10
|
puts "USAGE: site_validator url_of_sitemap output_file.html"
|
11
11
|
puts " OR "
|
12
|
-
puts "USAGE: site_validator url_of_sitemap output_file.html
|
12
|
+
puts "USAGE: site_validator url_of_sitemap output_file.html max_pages"
|
13
13
|
end
|
14
14
|
rescue
|
15
15
|
puts "There was an error processing your request"
|
16
|
-
end
|
16
|
+
end
|
@@ -2,24 +2,23 @@
|
|
2
2
|
|
3
3
|
require 'nokogiri'
|
4
4
|
require 'metainspector'
|
5
|
-
require 'timeout'
|
6
5
|
|
7
6
|
module SiteValidator
|
8
7
|
##
|
9
8
|
# A sitemap has an URL, and holds a collection of pages to be validated
|
10
9
|
#
|
11
10
|
class Sitemap
|
12
|
-
attr_accessor :url, :
|
11
|
+
attr_accessor :url, :max_pages
|
13
12
|
|
14
|
-
def initialize(url,
|
15
|
-
@url
|
16
|
-
@
|
13
|
+
def initialize(url, max_pages = 100)
|
14
|
+
@url = url
|
15
|
+
@max_pages = max_pages
|
17
16
|
end
|
18
17
|
|
19
18
|
##
|
20
19
|
# Returns the first 250 unique URLs from the sitemap
|
21
20
|
def pages
|
22
|
-
@pages ||= pages_in_sitemap.uniq {|p| p.url}[0..
|
21
|
+
@pages ||= pages_in_sitemap.uniq {|p| p.url}[0..max_pages-1]
|
23
22
|
end
|
24
23
|
|
25
24
|
##
|
@@ -55,7 +54,7 @@ module SiteValidator
|
|
55
54
|
pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
|
56
55
|
|
57
56
|
if pages.empty?
|
58
|
-
m = MetaInspector.new(url, :timeout =>
|
57
|
+
m = MetaInspector.new(url, :timeout => 20, :allow_redirections => :all)
|
59
58
|
links = [m.url]
|
60
59
|
|
61
60
|
m.internal_links.select {|l| looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq.each do |link|
|
@@ -15,8 +15,8 @@ module SiteValidator
|
|
15
15
|
# Shows progress on dot-style (...F...FFE..). A dot is a valid page,
|
16
16
|
# an F is a page with errors, and an E is an exception
|
17
17
|
# After the checking is done, a detailed summary is written to filename
|
18
|
-
def check(url, filename,
|
19
|
-
sitemap = SiteValidator::Sitemap.new(url,
|
18
|
+
def check(url, filename, max_pages)
|
19
|
+
sitemap = SiteValidator::Sitemap.new(url, max_pages.to_i)
|
20
20
|
say "Validating #{sitemap.pages.length} pages"
|
21
21
|
|
22
22
|
sitemap.pages.each do |page|
|
@@ -45,4 +45,4 @@ module SiteValidator
|
|
45
45
|
printer.print text
|
46
46
|
end
|
47
47
|
end
|
48
|
-
end
|
48
|
+
end
|
data/spec/sitemap_spec.rb
CHANGED
@@ -184,6 +184,11 @@ describe SiteValidator::Sitemap do
|
|
184
184
|
@sitemap_with_protocol_relative.pages.map {|p| p.url}.should_not include 'http://yahoo.com'
|
185
185
|
end
|
186
186
|
end
|
187
|
+
|
188
|
+
it "should be able to limit the number of pages" do
|
189
|
+
sitemap = SiteValidator::Sitemap.new('http://guides.rubyonrails.org', 10)
|
190
|
+
sitemap.pages.length.should == 10
|
191
|
+
end
|
187
192
|
end
|
188
193
|
|
189
194
|
describe "validations" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: site_validator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: w3c_validators
|