sitemap_checker 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -1
- data/lib/sitemap_checker.rb +12 -3
- data/lib/sitemap_checker/version.rb +1 -1
- data/spec/sitemap_checker_spec.rb +15 -12
- metadata +4 -4
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# SitemapChecker
|
2
2
|
|
3
|
-
Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of
|
3
|
+
Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of urls contained within.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,8 +18,13 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
+
Get list of urls from xml or xml.gz sitemap url
|
22
|
+
|
21
23
|
SitemapChecker::Checker.new(url)
|
22
24
|
|
25
|
+
Get status of url
|
26
|
+
|
27
|
+
SitemapChecker::Checker.get_status(url)
|
23
28
|
|
24
29
|
## Contributing
|
25
30
|
|
data/lib/sitemap_checker.rb
CHANGED
@@ -5,14 +5,23 @@ require 'zlib'
|
|
5
5
|
|
6
6
|
module SitemapChecker
|
7
7
|
class Checker
|
8
|
-
attr_reader :
|
8
|
+
attr_reader :url_list
|
9
9
|
|
10
10
|
def initialize(url,schema='')
|
11
11
|
@url = url
|
12
|
+
@url_list = Array.new
|
12
13
|
@status_list = Array.new
|
13
14
|
process_xml
|
14
15
|
end
|
15
16
|
|
17
|
+
def self.get_status(url)
|
18
|
+
begin
|
19
|
+
status = [url.content,open(url).status[0]]
|
20
|
+
rescue OpenURI::HTTPError => e
|
21
|
+
status = [url.content,e.io.status[0]]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
16
25
|
private
|
17
26
|
|
18
27
|
def get_xml_from_url(url)
|
@@ -28,12 +37,12 @@ module SitemapChecker
|
|
28
37
|
ixsd = Nokogiri::XML::Schema(open('http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd'))
|
29
38
|
xml = get_xml_from_url(@url)
|
30
39
|
if mxsd.valid?(xml)
|
31
|
-
@
|
40
|
+
@url_list = urls(xml)
|
32
41
|
elsif ixsd.valid?(xml)
|
33
42
|
maps = urls(xml)
|
34
43
|
maps.each do |map|
|
35
44
|
xml = get_xml_from_url(map)
|
36
|
-
@
|
45
|
+
@url_list = urls(xml)
|
37
46
|
end
|
38
47
|
else raise 'Invalid Schema'
|
39
48
|
false
|
@@ -16,32 +16,35 @@ describe SitemapChecker do
|
|
16
16
|
stub_request(:get, "http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xsd'), :headers => {})
|
17
17
|
end
|
18
18
|
|
19
|
-
it "accepts xml
|
19
|
+
it "accepts xml siteindexes" do
|
20
20
|
@xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
|
21
|
+
@xml_sitemap.url_list.size.should eq(2)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "accepts gzipped siteindexes" do
|
21
25
|
@gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml.gz')
|
22
|
-
@
|
23
|
-
|
26
|
+
@gz_sitemap.url_list.size.should eq(2)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "accepts xml sitemaps" do
|
30
|
+
@xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
|
31
|
+
@xml_sitemap.url_list.size.should eq(2)
|
24
32
|
end
|
25
33
|
|
26
34
|
it "accepts xml and gzipped sitemaps" do
|
27
35
|
@xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
|
28
36
|
@gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml.gz')
|
29
|
-
@xml_sitemap.
|
30
|
-
@gz_sitemap.
|
37
|
+
@xml_sitemap.url_list.size.should eq(2)
|
38
|
+
@gz_sitemap.url_list.size.should eq(2)
|
31
39
|
end
|
32
40
|
|
33
41
|
it "Errors if input doc does not match sitemap schema" do
|
34
42
|
lambda {SitemapChecker::Checker.new('http://www.github.com')}.should raise_error(RuntimeError, 'Invalid Schema')
|
35
43
|
end
|
36
44
|
|
37
|
-
it "returns
|
45
|
+
it "returns status if given a url" do
|
38
46
|
@sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
|
39
|
-
@sitemap.
|
40
|
-
end
|
41
|
-
|
42
|
-
it "returns list of urls with responses from siteindex" do
|
43
|
-
@siteindex = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
|
44
|
-
@siteindex.status_list.should eq([['http://www.github.com','200'], ['http://www.github.com/404','404'], ['http://www.github.com','200'], ['http://www.github.com/404','404']])
|
47
|
+
SitemapChecker::Checker.get_status(@sitemap.url_list.first).should eq(['http://www.github.com','200'])
|
45
48
|
end
|
46
49
|
|
47
50
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 1889055196096400351
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gerlando Piro
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-08-22 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: nokogiri
|