sitemap_checker 0.1.8 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +7 -0
- data/README.md +0 -4
- data/lib/sitemap_checker.rb +2 -2
- data/lib/sitemap_checker/sitemap.rb +19 -31
- data/lib/sitemap_checker/uri.rb +38 -0
- data/lib/sitemap_checker/version.rb +1 -1
- data/spec/fixtures/apple-index.xml +9 -0
- data/spec/fixtures/apple-robots.txt +3 -0
- data/spec/fixtures/apple-sitemap-new.xml +9023 -0
- data/spec/fixtures/apple-sitemap.xml +1287 -0
- data/spec/sitemap_spec.rb +49 -0
- data/spec/uri_spec.rb +27 -0
- metadata +16 -5
- data/lib/sitemap_checker/path.rb +0 -24
- data/spec/sitemap_checker_spec.rb +0 -48
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'webmock/rspec'
|
3
|
+
require './lib/sitemap_checker/sitemap'
|
4
|
+
require './lib/sitemap_checker/uri'
|
5
|
+
WebMock.disable_net_connect!(:allow_localhost => true)
|
6
|
+
|
7
|
+
describe 'SitemapChecker::Sitemap' do
|
8
|
+
before(:each) do
|
9
|
+
@dir = Pathname.new(File.dirname(__FILE__))
|
10
|
+
stub_request(:any, "http://www.github.com").to_return(:status => 200, :body => 'foo', headers: {'Content-type' => 'text/html'})
|
11
|
+
stub_request(:any, "http://www.github.com/404").to_return(:status => 404, :body => 'foo', headers: {'Content-type' => 'text/html'})
|
12
|
+
stub_request(:any, "http://www.github.com/sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml'), headers: {'Content-type' => 'application/xml'})
|
13
|
+
stub_request(:any, "http://www.github.com/sitemap.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml.gz'), headers: {'Content-type' => 'application/octet-stream'})
|
14
|
+
stub_request(:any, "http://www.github.com/siteindex.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml'), headers: {'Content-type' => 'application/xml'})
|
15
|
+
stub_request(:any, "http://www.github.com/siteindex.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml.gz'), headers: {'Content-type' => 'application/octet-stream'})
|
16
|
+
stub_request(:any, "http://store.apple.com/apple-robots.txt").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-robots.txt'), headers: {'Content-type' => 'text/plain'})
|
17
|
+
stub_request(:any, "http://store.apple.com/apple-index.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-index.xml'), headers: {'Content-type' => 'application/xml'})
|
18
|
+
stub_request(:any, "http://store.apple.com/apple-sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-sitemap.xml'), headers: {'Content-type' => 'application/xml'})
|
19
|
+
stub_request(:any, "http://store.apple.com/apple-sitemap-new.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-sitemap-new.xml'), headers: {'Content-type' => 'application/xml'})
|
20
|
+
end
|
21
|
+
|
22
|
+
it "Sitemap gracefully handles 404s" do
|
23
|
+
lambda { SitemapChecker::Sitemap.new('http://www.github.com/404') }.should_not raise_error
|
24
|
+
end
|
25
|
+
|
26
|
+
it "Sitemap accepts xml siteindexes" do
|
27
|
+
SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml').locs.size.should eq(4)
|
28
|
+
SitemapChecker::Sitemap.new('http://store.apple.com/apple-index.xml').locs.size.should eq(419)
|
29
|
+
SitemapChecker::Sitemap.new('http://store.apple.com/apple-sitemap.xml').locs.size.should eq(214)
|
30
|
+
SitemapChecker::Sitemap.new('http://store.apple.com/apple-sitemap-new.xml').locs.size.should eq(205)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "Sitemap accepts gzipped siteindexes" do
|
34
|
+
SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml.gz').locs.size.should eq(4)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "Sitemap accepts xml sitemaps" do
|
38
|
+
SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml').locs.size.should eq(2)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "Sitemap accepts gzipped sitemaps" do
|
42
|
+
SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml.gz').locs.size.should eq(2)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "Sitemap locs are String objects" do
|
46
|
+
SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml').locs.first.class.should eq(String)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/spec/uri_spec.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'webmock/rspec'
|
3
|
+
require './lib/sitemap_checker/uri'
|
4
|
+
WebMock.disable_net_connect!(:allow_localhost => true)
|
5
|
+
|
6
|
+
describe 'SitemapChecker::Uri' do
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
@dir = Pathname.new(File.dirname(__FILE__))
|
10
|
+
stub_request(:any, "http://www.github.com/404").to_return(:status => 404)
|
11
|
+
stub_request(:any, "http://www.github.com/sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml'), headers: {'Content-type' => 'application/xml'})
|
12
|
+
stub_request(:any, "http://www.github.com/sitemap.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml.gz'), headers: {'Content-type' => 'application/octet-stream'})
|
13
|
+
end
|
14
|
+
|
15
|
+
it "Accepts XML" do
|
16
|
+
SitemapChecker::Uri.new('http://www.github.com/sitemap.xml').xml.class.should eq(Nokogiri::XML::Document)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "Accepts Gzipped XML" do
|
20
|
+
SitemapChecker::Uri.new('http://www.github.com/sitemap.xml.gz').xml.class.should eq(Nokogiri::XML::Document)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "does not contain IO object if not xml or gz" do
|
24
|
+
SitemapChecker::Uri.new('http://www.github.com/404').xml.class.should eq(NilClass)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -41,18 +41,23 @@ files:
|
|
41
41
|
- Rakefile
|
42
42
|
- lib/sitemap_checker.rb
|
43
43
|
- lib/sitemap_checker/open_uri.rb
|
44
|
-
- lib/sitemap_checker/path.rb
|
45
44
|
- lib/sitemap_checker/sitemap.rb
|
45
|
+
- lib/sitemap_checker/uri.rb
|
46
46
|
- lib/sitemap_checker/version.rb
|
47
47
|
- sitemap_checker.gemspec
|
48
|
+
- spec/fixtures/apple-index.xml
|
49
|
+
- spec/fixtures/apple-robots.txt
|
50
|
+
- spec/fixtures/apple-sitemap-new.xml
|
51
|
+
- spec/fixtures/apple-sitemap.xml
|
48
52
|
- spec/fixtures/siteindex.xml
|
49
53
|
- spec/fixtures/siteindex.xml.gz
|
50
54
|
- spec/fixtures/siteindex.xsd
|
51
55
|
- spec/fixtures/sitemap.xml
|
52
56
|
- spec/fixtures/sitemap.xml.gz
|
53
57
|
- spec/fixtures/sitemap.xsd
|
54
|
-
- spec/
|
58
|
+
- spec/sitemap_spec.rb
|
55
59
|
- spec/spec_helper.rb
|
60
|
+
- spec/uri_spec.rb
|
56
61
|
homepage: https://github.com/gerlandop/sitemap_checker
|
57
62
|
licenses: []
|
58
63
|
post_install_message:
|
@@ -78,11 +83,17 @@ signing_key:
|
|
78
83
|
specification_version: 3
|
79
84
|
summary: Gets status of Urls in SiteMap
|
80
85
|
test_files:
|
86
|
+
- spec/fixtures/apple-index.xml
|
87
|
+
- spec/fixtures/apple-robots.txt
|
88
|
+
- spec/fixtures/apple-sitemap-new.xml
|
89
|
+
- spec/fixtures/apple-sitemap.xml
|
81
90
|
- spec/fixtures/siteindex.xml
|
82
91
|
- spec/fixtures/siteindex.xml.gz
|
83
92
|
- spec/fixtures/siteindex.xsd
|
84
93
|
- spec/fixtures/sitemap.xml
|
85
94
|
- spec/fixtures/sitemap.xml.gz
|
86
95
|
- spec/fixtures/sitemap.xsd
|
87
|
-
- spec/
|
96
|
+
- spec/sitemap_spec.rb
|
88
97
|
- spec/spec_helper.rb
|
98
|
+
- spec/uri_spec.rb
|
99
|
+
has_rdoc:
|
data/lib/sitemap_checker/path.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
module SitemapChecker
|
2
|
-
class Path
|
3
|
-
attr_accessor :url, :status
|
4
|
-
|
5
|
-
def initialize(url)
|
6
|
-
@url = url
|
7
|
-
@status = nil
|
8
|
-
end
|
9
|
-
|
10
|
-
def get_status_from_xml(url)
|
11
|
-
status(url.content)
|
12
|
-
end
|
13
|
-
|
14
|
-
def status
|
15
|
-
begin
|
16
|
-
open(@url, "Accept" => @url[/\.xml$/] ? 'application/xml' : 'text/html').status[0]
|
17
|
-
rescue RuntimeError => e
|
18
|
-
e
|
19
|
-
rescue OpenURI::HTTPError => e
|
20
|
-
e.io.status[0]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'webmock/rspec'
|
3
|
-
require './lib/sitemap_checker'
|
4
|
-
WebMock.disable_net_connect!(:allow_localhost => true)
|
5
|
-
|
6
|
-
describe SitemapChecker do
|
7
|
-
before(:each) do
|
8
|
-
@dir = Pathname.new(File.dirname(__FILE__))
|
9
|
-
stub_request(:any, "http://www.github.com").to_return(:status => 200, :body => 'foo')
|
10
|
-
stub_request(:any, "http://www.github.com/404").to_return(:status => 404, :body => 'foo')
|
11
|
-
stub_request(:any, "http://www.github.com/sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml'))
|
12
|
-
stub_request(:any, "http://www.github.com/sitemap.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml.gz'))
|
13
|
-
stub_request(:any, "http://www.github.com/siteindex.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml'))
|
14
|
-
stub_request(:any, "http://www.github.com/siteindex.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml.gz'))
|
15
|
-
end
|
16
|
-
|
17
|
-
it "Sitemap accepts xml siteindexes" do
|
18
|
-
@list = SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml')
|
19
|
-
@list.locs.size.should eq(4)
|
20
|
-
end
|
21
|
-
|
22
|
-
it "Sitemap accepts gzipped siteindexes" do
|
23
|
-
@list = SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml.gz')
|
24
|
-
@list.locs.size.should eq(4)
|
25
|
-
end
|
26
|
-
|
27
|
-
it "Sitemap accepts xml sitemaps" do
|
28
|
-
@list = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml')
|
29
|
-
@list.locs.size.should eq(2)
|
30
|
-
end
|
31
|
-
|
32
|
-
it "Sitemap accepts xml and gzipped sitemaps" do
|
33
|
-
@xml_sitemap = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml')
|
34
|
-
@gz_sitemap = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml.gz')
|
35
|
-
@xml_sitemap.locs.size.should eq(2)
|
36
|
-
@gz_sitemap.locs.size.should eq(2)
|
37
|
-
end
|
38
|
-
|
39
|
-
it "Sitemap locs are Path objects" do
|
40
|
-
@xml_sitemap = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml')
|
41
|
-
@xml_sitemap.locs.first.class.should eq(SitemapChecker::Path)
|
42
|
-
end
|
43
|
-
|
44
|
-
it "Path#status returns status code" do
|
45
|
-
SitemapChecker::Path.new('http://www.github.com').status.should eq('200')
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|