sitemap_checker 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # SitemapChecker
2
2
 
3
- Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of status messages for urls contained within.
3
+ Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of urls contained within.
4
4
 
5
5
  ## Installation
6
6
 
@@ -18,8 +18,13 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
+ Get list of urls from xml or xml.gz sitemap url
22
+
21
23
  SitemapChecker::Checker.new(url)
22
24
 
25
+ Get status of url
26
+
27
+ SitemapChecker::Checker.get_status(url)
23
28
 
24
29
  ## Contributing
25
30
 
@@ -5,14 +5,23 @@ require 'zlib'
5
5
 
6
6
  module SitemapChecker
7
7
  class Checker
8
- attr_reader :status_list
8
+ attr_reader :url_list
9
9
 
10
10
  def initialize(url,schema='')
11
11
  @url = url
12
+ @url_list = Array.new
12
13
  @status_list = Array.new
13
14
  process_xml
14
15
  end
15
16
 
17
+ def self.get_status(url)
18
+ begin
19
+ status = [url.content,open(url).status[0]]
20
+ rescue OpenURI::HTTPError => e
21
+ status = [url.content,e.io.status[0]]
22
+ end
23
+ end
24
+
16
25
  private
17
26
 
18
27
  def get_xml_from_url(url)
@@ -28,12 +37,12 @@ module SitemapChecker
28
37
  ixsd = Nokogiri::XML::Schema(open('http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd'))
29
38
  xml = get_xml_from_url(@url)
30
39
  if mxsd.valid?(xml)
31
- @status_list = get_status_list(urls(xml))
40
+ @url_list = urls(xml)
32
41
  elsif ixsd.valid?(xml)
33
42
  maps = urls(xml)
34
43
  maps.each do |map|
35
44
  xml = get_xml_from_url(map)
36
- @status_list += get_status_list(urls(xml))
45
+ @url_list = urls(xml)
37
46
  end
38
47
  else raise 'Invalid Schema'
39
48
  false
@@ -1,3 +1,3 @@
1
1
  module SitemapChecker
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -16,32 +16,35 @@ describe SitemapChecker do
16
16
  stub_request(:get, "http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xsd'), :headers => {})
17
17
  end
18
18
 
19
- it "accepts xml and gzipped siteindexes" do
19
+ it "accepts xml siteindexes" do
20
20
  @xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
21
+ @xml_sitemap.url_list.size.should eq(2)
22
+ end
23
+
24
+ it "accepts gzipped siteindexes" do
21
25
  @gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml.gz')
22
- @xml_sitemap.status_list.size.should eq(4)
23
- @gz_sitemap.status_list.size.should eq(4)
26
+ @gz_sitemap.url_list.size.should eq(2)
27
+ end
28
+
29
+ it "accepts xml sitemaps" do
30
+ @xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
31
+ @xml_sitemap.url_list.size.should eq(2)
24
32
  end
25
33
 
26
34
  it "accepts xml and gzipped sitemaps" do
27
35
  @xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
28
36
  @gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml.gz')
29
- @xml_sitemap.status_list.size.should eq(2)
30
- @gz_sitemap.status_list.size.should eq(2)
37
+ @xml_sitemap.url_list.size.should eq(2)
38
+ @gz_sitemap.url_list.size.should eq(2)
31
39
  end
32
40
 
33
41
  it "Errors if input doc does not match sitemap schema" do
34
42
  lambda {SitemapChecker::Checker.new('http://www.github.com')}.should raise_error(RuntimeError, 'Invalid Schema')
35
43
  end
36
44
 
37
- it "returns list of urls with responses from sitemap" do
45
+ it "returns status if given a url" do
38
46
  @sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
39
- @sitemap.status_list.should eq([['http://www.github.com','200'], ['http://www.github.com/404','404']])
40
- end
41
-
42
- it "returns list of urls with responses from siteindex" do
43
- @siteindex = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
44
- @siteindex.status_list.should eq([['http://www.github.com','200'], ['http://www.github.com/404','404'], ['http://www.github.com','200'], ['http://www.github.com/404','404']])
47
+ SitemapChecker::Checker.get_status(@sitemap.url_list.first).should eq(['http://www.github.com','200'])
45
48
  end
46
49
 
47
50
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_checker
3
3
  version: !ruby/object:Gem::Version
4
- hash: 1317335842608397244
4
+ hash: 1889055196096400351
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Gerlando Piro
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-07-31 00:00:00 Z
18
+ date: 2012-08-22 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri