sitemap_checker 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +6 -1
- data/lib/sitemap_checker.rb +12 -3
- data/lib/sitemap_checker/version.rb +1 -1
- data/spec/sitemap_checker_spec.rb +15 -12
- metadata +4 -4
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# SitemapChecker
|
2
2
|
|
3
|
-
Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of
|
3
|
+
Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of urls contained within.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,8 +18,13 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
+
Get list of urls from xml or xml.gz sitemap url
|
22
|
+
|
21
23
|
SitemapChecker::Checker.new(url)
|
22
24
|
|
25
|
+
Get status of url
|
26
|
+
|
27
|
+
SitemapChecker::Checker.get_status(url)
|
23
28
|
|
24
29
|
## Contributing
|
25
30
|
|
data/lib/sitemap_checker.rb
CHANGED
@@ -5,14 +5,23 @@ require 'zlib'
|
|
5
5
|
|
6
6
|
module SitemapChecker
|
7
7
|
class Checker
|
8
|
-
attr_reader :
|
8
|
+
attr_reader :url_list
|
9
9
|
|
10
10
|
def initialize(url,schema='')
|
11
11
|
@url = url
|
12
|
+
@url_list = Array.new
|
12
13
|
@status_list = Array.new
|
13
14
|
process_xml
|
14
15
|
end
|
15
16
|
|
17
|
+
def self.get_status(url)
|
18
|
+
begin
|
19
|
+
status = [url.content,open(url).status[0]]
|
20
|
+
rescue OpenURI::HTTPError => e
|
21
|
+
status = [url.content,e.io.status[0]]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
16
25
|
private
|
17
26
|
|
18
27
|
def get_xml_from_url(url)
|
@@ -28,12 +37,12 @@ module SitemapChecker
|
|
28
37
|
ixsd = Nokogiri::XML::Schema(open('http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd'))
|
29
38
|
xml = get_xml_from_url(@url)
|
30
39
|
if mxsd.valid?(xml)
|
31
|
-
@
|
40
|
+
@url_list = urls(xml)
|
32
41
|
elsif ixsd.valid?(xml)
|
33
42
|
maps = urls(xml)
|
34
43
|
maps.each do |map|
|
35
44
|
xml = get_xml_from_url(map)
|
36
|
-
@
|
45
|
+
@url_list = urls(xml)
|
37
46
|
end
|
38
47
|
else raise 'Invalid Schema'
|
39
48
|
false
|
@@ -16,32 +16,35 @@ describe SitemapChecker do
|
|
16
16
|
stub_request(:get, "http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xsd'), :headers => {})
|
17
17
|
end
|
18
18
|
|
19
|
-
it "accepts xml
|
19
|
+
it "accepts xml siteindexes" do
|
20
20
|
@xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
|
21
|
+
@xml_sitemap.url_list.size.should eq(2)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "accepts gzipped siteindexes" do
|
21
25
|
@gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml.gz')
|
22
|
-
@
|
23
|
-
|
26
|
+
@gz_sitemap.url_list.size.should eq(2)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "accepts xml sitemaps" do
|
30
|
+
@xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
|
31
|
+
@xml_sitemap.url_list.size.should eq(2)
|
24
32
|
end
|
25
33
|
|
26
34
|
it "accepts xml and gzipped sitemaps" do
|
27
35
|
@xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
|
28
36
|
@gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml.gz')
|
29
|
-
@xml_sitemap.
|
30
|
-
@gz_sitemap.
|
37
|
+
@xml_sitemap.url_list.size.should eq(2)
|
38
|
+
@gz_sitemap.url_list.size.should eq(2)
|
31
39
|
end
|
32
40
|
|
33
41
|
it "Errors if input doc does not match sitemap schema" do
|
34
42
|
lambda {SitemapChecker::Checker.new('http://www.github.com')}.should raise_error(RuntimeError, 'Invalid Schema')
|
35
43
|
end
|
36
44
|
|
37
|
-
it "returns
|
45
|
+
it "returns status if given a url" do
|
38
46
|
@sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
|
39
|
-
@sitemap.
|
40
|
-
end
|
41
|
-
|
42
|
-
it "returns list of urls with responses from siteindex" do
|
43
|
-
@siteindex = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
|
44
|
-
@siteindex.status_list.should eq([['http://www.github.com','200'], ['http://www.github.com/404','404'], ['http://www.github.com','200'], ['http://www.github.com/404','404']])
|
47
|
+
SitemapChecker::Checker.get_status(@sitemap.url_list.first).should eq(['http://www.github.com','200'])
|
45
48
|
end
|
46
49
|
|
47
50
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 1889055196096400351
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gerlando Piro
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-08-22 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: nokogiri
|