site_validator 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/bin/site_validator +2 -2
- data/lib/site_validator/sitemap.rb +6 -7
- data/lib/site_validator/validator.rb +3 -3
- data/lib/site_validator/version.rb +1 -1
- data/spec/sitemap_spec.rb +5 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4dbe8f5a7a4fff1b83b982a23ccf67918e06723
|
4
|
+
data.tar.gz: b7bf3908fd7ae4d1ede2362161589b9428632e73
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a44be80ecbcc4da84f1f86ad809f7ef8467c1e9d06ca6911d203b71b7abb758c68a43776f84af3743ebc70dd05c1e97540c155e1d9d902f23e5ee8e2f9079bcf
|
7
|
+
data.tar.gz: 37b1fb6c80178ee3d9c98cb4abec0d7b2a9ab4a90e9f2960cd0c3a02e2a4840b97699c5881069300e40a1626e396f85edf7106ae420f4349fe0a922598166149
|
data/README.md
CHANGED
@@ -24,9 +24,9 @@ This will validate all the internal URLs found on the starting URL, up to a maxi
|
|
24
24
|
|
25
25
|
You can pass site_validator an XML sitemap or the URL of a website; it will scrape it in search of URLs to validate.
|
26
26
|
|
27
|
-
##
|
27
|
+
##Max pages
|
28
28
|
|
29
|
-
By default, site_validator will
|
29
|
+
By default, site_validator will validate up to 100 pages per sitemap. If you want to set a different value, pass it as a third parameter like this:
|
30
30
|
|
31
31
|
site_validator http://validationhell.com report.html 60
|
32
32
|
|
data/bin/site_validator
CHANGED
@@ -9,8 +9,8 @@ begin
|
|
9
9
|
else
|
10
10
|
puts "USAGE: site_validator url_of_sitemap output_file.html"
|
11
11
|
puts " OR "
|
12
|
-
puts "USAGE: site_validator url_of_sitemap output_file.html
|
12
|
+
puts "USAGE: site_validator url_of_sitemap output_file.html max_pages"
|
13
13
|
end
|
14
14
|
rescue
|
15
15
|
puts "There was an error processing your request"
|
16
|
-
end
|
16
|
+
end
|
@@ -2,24 +2,23 @@
|
|
2
2
|
|
3
3
|
require 'nokogiri'
|
4
4
|
require 'metainspector'
|
5
|
-
require 'timeout'
|
6
5
|
|
7
6
|
module SiteValidator
|
8
7
|
##
|
9
8
|
# A sitemap has an URL, and holds a collection of pages to be validated
|
10
9
|
#
|
11
10
|
class Sitemap
|
12
|
-
attr_accessor :url, :
|
11
|
+
attr_accessor :url, :max_pages
|
13
12
|
|
14
|
-
def initialize(url,
|
15
|
-
@url
|
16
|
-
@
|
13
|
+
def initialize(url, max_pages = 100)
|
14
|
+
@url = url
|
15
|
+
@max_pages = max_pages
|
17
16
|
end
|
18
17
|
|
19
18
|
##
|
20
19
|
# Returns the first 250 unique URLs from the sitemap
|
21
20
|
def pages
|
22
|
-
@pages ||= pages_in_sitemap.uniq {|p| p.url}[0..
|
21
|
+
@pages ||= pages_in_sitemap.uniq {|p| p.url}[0..max_pages-1]
|
23
22
|
end
|
24
23
|
|
25
24
|
##
|
@@ -55,7 +54,7 @@ module SiteValidator
|
|
55
54
|
pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
|
56
55
|
|
57
56
|
if pages.empty?
|
58
|
-
m = MetaInspector.new(url, :timeout =>
|
57
|
+
m = MetaInspector.new(url, :timeout => 20, :allow_redirections => :all)
|
59
58
|
links = [m.url]
|
60
59
|
|
61
60
|
m.internal_links.select {|l| looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq.each do |link|
|
@@ -15,8 +15,8 @@ module SiteValidator
|
|
15
15
|
# Shows progress on dot-style (...F...FFE..). A dot is a valid page,
|
16
16
|
# an F is a page with errors, and an E is an exception
|
17
17
|
# After the checking is done, a detailed summary is written to filename
|
18
|
-
def check(url, filename,
|
19
|
-
sitemap = SiteValidator::Sitemap.new(url,
|
18
|
+
def check(url, filename, max_pages)
|
19
|
+
sitemap = SiteValidator::Sitemap.new(url, max_pages.to_i)
|
20
20
|
say "Validating #{sitemap.pages.length} pages"
|
21
21
|
|
22
22
|
sitemap.pages.each do |page|
|
@@ -45,4 +45,4 @@ module SiteValidator
|
|
45
45
|
printer.print text
|
46
46
|
end
|
47
47
|
end
|
48
|
-
end
|
48
|
+
end
|
data/spec/sitemap_spec.rb
CHANGED
@@ -184,6 +184,11 @@ describe SiteValidator::Sitemap do
|
|
184
184
|
@sitemap_with_protocol_relative.pages.map {|p| p.url}.should_not include 'http://yahoo.com'
|
185
185
|
end
|
186
186
|
end
|
187
|
+
|
188
|
+
it "should be able to limit the number of pages" do
|
189
|
+
sitemap = SiteValidator::Sitemap.new('http://guides.rubyonrails.org', 10)
|
190
|
+
sitemap.pages.length.should == 10
|
191
|
+
end
|
187
192
|
end
|
188
193
|
|
189
194
|
describe "validations" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: site_validator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: w3c_validators
|