sitemap_checker 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # SitemapChecker
2
2
 
3
- Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of status messages for urls contained within.
3
+ Takes a url pointing to an xml or xml.gz sitemap or siteindex file and returns array of urls contained within.
4
4
 
5
5
  ## Installation
6
6
 
@@ -18,8 +18,13 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
+ Get list of urls from xml or xml.gz sitemap url
22
+
21
23
  SitemapChecker::Checker.new(url)
22
24
 
25
+ Get status of url
26
+
27
+ SitemapChecker::Checker.get_status(url)
23
28
 
24
29
  ## Contributing
25
30
 
@@ -5,14 +5,23 @@ require 'zlib'
5
5
 
6
6
  module SitemapChecker
7
7
  class Checker
8
- attr_reader :status_list
8
+ attr_reader :url_list
9
9
 
10
10
  def initialize(url,schema='')
11
11
  @url = url
12
+ @url_list = Array.new
12
13
  @status_list = Array.new
13
14
  process_xml
14
15
  end
15
16
 
17
+ def self.get_status(url)
18
+ begin
19
+ status = [url.content,open(url).status[0]]
20
+ rescue OpenURI::HTTPError => e
21
+ status = [url.content,e.io.status[0]]
22
+ end
23
+ end
24
+
16
25
  private
17
26
 
18
27
  def get_xml_from_url(url)
@@ -28,12 +37,12 @@ module SitemapChecker
28
37
  ixsd = Nokogiri::XML::Schema(open('http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd'))
29
38
  xml = get_xml_from_url(@url)
30
39
  if mxsd.valid?(xml)
31
- @status_list = get_status_list(urls(xml))
40
+ @url_list = urls(xml)
32
41
  elsif ixsd.valid?(xml)
33
42
  maps = urls(xml)
34
43
  maps.each do |map|
35
44
  xml = get_xml_from_url(map)
36
- @status_list += get_status_list(urls(xml))
45
+ @url_list = urls(xml)
37
46
  end
38
47
  else raise 'Invalid Schema'
39
48
  false
@@ -1,3 +1,3 @@
1
1
  module SitemapChecker
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -16,32 +16,35 @@ describe SitemapChecker do
16
16
  stub_request(:get, "http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xsd'), :headers => {})
17
17
  end
18
18
 
19
- it "accepts xml and gzipped siteindexes" do
19
+ it "accepts xml siteindexes" do
20
20
  @xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
21
+ @xml_sitemap.url_list.size.should eq(2)
22
+ end
23
+
24
+ it "accepts gzipped siteindexes" do
21
25
  @gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml.gz')
22
- @xml_sitemap.status_list.size.should eq(4)
23
- @gz_sitemap.status_list.size.should eq(4)
26
+ @gz_sitemap.url_list.size.should eq(2)
27
+ end
28
+
29
+ it "accepts xml sitemaps" do
30
+ @xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
31
+ @xml_sitemap.url_list.size.should eq(2)
24
32
  end
25
33
 
26
34
  it "accepts xml and gzipped sitemaps" do
27
35
  @xml_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
28
36
  @gz_sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml.gz')
29
- @xml_sitemap.status_list.size.should eq(2)
30
- @gz_sitemap.status_list.size.should eq(2)
37
+ @xml_sitemap.url_list.size.should eq(2)
38
+ @gz_sitemap.url_list.size.should eq(2)
31
39
  end
32
40
 
33
41
  it "Errors if input doc does not match sitemap schema" do
34
42
  lambda {SitemapChecker::Checker.new('http://www.github.com')}.should raise_error(RuntimeError, 'Invalid Schema')
35
43
  end
36
44
 
37
- it "returns list of urls with responses from sitemap" do
45
+ it "returns status if given a url" do
38
46
  @sitemap = SitemapChecker::Checker.new('http://www.github.com/sitemap.xml')
39
- @sitemap.status_list.should eq([['http://www.github.com','200'], ['http://www.github.com/404','404']])
40
- end
41
-
42
- it "returns list of urls with responses from siteindex" do
43
- @siteindex = SitemapChecker::Checker.new('http://www.github.com/siteindex.xml')
44
- @siteindex.status_list.should eq([['http://www.github.com','200'], ['http://www.github.com/404','404'], ['http://www.github.com','200'], ['http://www.github.com/404','404']])
47
+ SitemapChecker::Checker.get_status(@sitemap.url_list.first).should eq(['http://www.github.com','200'])
45
48
  end
46
49
 
47
50
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_checker
3
3
  version: !ruby/object:Gem::Version
4
- hash: 1317335842608397244
4
+ hash: 1889055196096400351
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Gerlando Piro
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-07-31 00:00:00 Z
18
+ date: 2012-08-22 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri