sitemap_checker 0.1.8 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -0
- data/README.md +0 -4
- data/lib/sitemap_checker.rb +2 -2
- data/lib/sitemap_checker/sitemap.rb +19 -31
- data/lib/sitemap_checker/uri.rb +38 -0
- data/lib/sitemap_checker/version.rb +1 -1
- data/spec/fixtures/apple-index.xml +9 -0
- data/spec/fixtures/apple-robots.txt +3 -0
- data/spec/fixtures/apple-sitemap-new.xml +9023 -0
- data/spec/fixtures/apple-sitemap.xml +1287 -0
- data/spec/sitemap_spec.rb +49 -0
- data/spec/uri_spec.rb +27 -0
- metadata +16 -5
- data/lib/sitemap_checker/path.rb +0 -24
- data/spec/sitemap_checker_spec.rb +0 -48
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'webmock/rspec'
|
3
|
+
require './lib/sitemap_checker/sitemap'
|
4
|
+
require './lib/sitemap_checker/uri'
|
5
|
+
WebMock.disable_net_connect!(:allow_localhost => true)
|
6
|
+
|
7
|
+
describe 'SitemapChecker::Sitemap' do
|
8
|
+
before(:each) do
|
9
|
+
@dir = Pathname.new(File.dirname(__FILE__))
|
10
|
+
stub_request(:any, "http://www.github.com").to_return(:status => 200, :body => 'foo', headers: {'Content-type' => 'text/html'})
|
11
|
+
stub_request(:any, "http://www.github.com/404").to_return(:status => 404, :body => 'foo', headers: {'Content-type' => 'text/html'})
|
12
|
+
stub_request(:any, "http://www.github.com/sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml'), headers: {'Content-type' => 'application/xml'})
|
13
|
+
stub_request(:any, "http://www.github.com/sitemap.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml.gz'), headers: {'Content-type' => 'application/octet-stream'})
|
14
|
+
stub_request(:any, "http://www.github.com/siteindex.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml'), headers: {'Content-type' => 'application/xml'})
|
15
|
+
stub_request(:any, "http://www.github.com/siteindex.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml.gz'), headers: {'Content-type' => 'application/octet-stream'})
|
16
|
+
stub_request(:any, "http://store.apple.com/apple-robots.txt").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-robots.txt'), headers: {'Content-type' => 'text/plain'})
|
17
|
+
stub_request(:any, "http://store.apple.com/apple-index.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-index.xml'), headers: {'Content-type' => 'application/xml'})
|
18
|
+
stub_request(:any, "http://store.apple.com/apple-sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-sitemap.xml'), headers: {'Content-type' => 'application/xml'})
|
19
|
+
stub_request(:any, "http://store.apple.com/apple-sitemap-new.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/apple-sitemap-new.xml'), headers: {'Content-type' => 'application/xml'})
|
20
|
+
end
|
21
|
+
|
22
|
+
it "Sitemap gracefully handles 404s" do
|
23
|
+
lambda { SitemapChecker::Sitemap.new('http://www.github.com/404') }.should_not raise_error
|
24
|
+
end
|
25
|
+
|
26
|
+
it "Sitemap accepts xml siteindexes" do
|
27
|
+
SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml').locs.size.should eq(4)
|
28
|
+
SitemapChecker::Sitemap.new('http://store.apple.com/apple-index.xml').locs.size.should eq(419)
|
29
|
+
SitemapChecker::Sitemap.new('http://store.apple.com/apple-sitemap.xml').locs.size.should eq(214)
|
30
|
+
SitemapChecker::Sitemap.new('http://store.apple.com/apple-sitemap-new.xml').locs.size.should eq(205)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "Sitemap accepts gzipped siteindexes" do
|
34
|
+
SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml.gz').locs.size.should eq(4)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "Sitemap accepts xml sitemaps" do
|
38
|
+
SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml').locs.size.should eq(2)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "Sitemap accepts gzipped sitemaps" do
|
42
|
+
SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml.gz').locs.size.should eq(2)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "Sitemap locs are String objects" do
|
46
|
+
SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml').locs.first.class.should eq(String)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/spec/uri_spec.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'webmock/rspec'
|
3
|
+
require './lib/sitemap_checker/uri'
|
4
|
+
WebMock.disable_net_connect!(:allow_localhost => true)
|
5
|
+
|
6
|
+
describe 'SitemapChecker::Uri' do
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
@dir = Pathname.new(File.dirname(__FILE__))
|
10
|
+
stub_request(:any, "http://www.github.com/404").to_return(:status => 404)
|
11
|
+
stub_request(:any, "http://www.github.com/sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml'), headers: {'Content-type' => 'application/xml'})
|
12
|
+
stub_request(:any, "http://www.github.com/sitemap.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml.gz'), headers: {'Content-type' => 'application/octet-stream'})
|
13
|
+
end
|
14
|
+
|
15
|
+
it "Accepts XML" do
|
16
|
+
SitemapChecker::Uri.new('http://www.github.com/sitemap.xml').xml.class.should eq(Nokogiri::XML::Document)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "Accepts Gzipped XML" do
|
20
|
+
SitemapChecker::Uri.new('http://www.github.com/sitemap.xml.gz').xml.class.should eq(Nokogiri::XML::Document)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "does not contain IO object if not xml or gz" do
|
24
|
+
SitemapChecker::Uri.new('http://www.github.com/404').xml.class.should eq(NilClass)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -41,18 +41,23 @@ files:
|
|
41
41
|
- Rakefile
|
42
42
|
- lib/sitemap_checker.rb
|
43
43
|
- lib/sitemap_checker/open_uri.rb
|
44
|
-
- lib/sitemap_checker/path.rb
|
45
44
|
- lib/sitemap_checker/sitemap.rb
|
45
|
+
- lib/sitemap_checker/uri.rb
|
46
46
|
- lib/sitemap_checker/version.rb
|
47
47
|
- sitemap_checker.gemspec
|
48
|
+
- spec/fixtures/apple-index.xml
|
49
|
+
- spec/fixtures/apple-robots.txt
|
50
|
+
- spec/fixtures/apple-sitemap-new.xml
|
51
|
+
- spec/fixtures/apple-sitemap.xml
|
48
52
|
- spec/fixtures/siteindex.xml
|
49
53
|
- spec/fixtures/siteindex.xml.gz
|
50
54
|
- spec/fixtures/siteindex.xsd
|
51
55
|
- spec/fixtures/sitemap.xml
|
52
56
|
- spec/fixtures/sitemap.xml.gz
|
53
57
|
- spec/fixtures/sitemap.xsd
|
54
|
-
- spec/
|
58
|
+
- spec/sitemap_spec.rb
|
55
59
|
- spec/spec_helper.rb
|
60
|
+
- spec/uri_spec.rb
|
56
61
|
homepage: https://github.com/gerlandop/sitemap_checker
|
57
62
|
licenses: []
|
58
63
|
post_install_message:
|
@@ -78,11 +83,17 @@ signing_key:
|
|
78
83
|
specification_version: 3
|
79
84
|
summary: Gets status of Urls in SiteMap
|
80
85
|
test_files:
|
86
|
+
- spec/fixtures/apple-index.xml
|
87
|
+
- spec/fixtures/apple-robots.txt
|
88
|
+
- spec/fixtures/apple-sitemap-new.xml
|
89
|
+
- spec/fixtures/apple-sitemap.xml
|
81
90
|
- spec/fixtures/siteindex.xml
|
82
91
|
- spec/fixtures/siteindex.xml.gz
|
83
92
|
- spec/fixtures/siteindex.xsd
|
84
93
|
- spec/fixtures/sitemap.xml
|
85
94
|
- spec/fixtures/sitemap.xml.gz
|
86
95
|
- spec/fixtures/sitemap.xsd
|
87
|
-
- spec/
|
96
|
+
- spec/sitemap_spec.rb
|
88
97
|
- spec/spec_helper.rb
|
98
|
+
- spec/uri_spec.rb
|
99
|
+
has_rdoc:
|
data/lib/sitemap_checker/path.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
module SitemapChecker
|
2
|
-
class Path
|
3
|
-
attr_accessor :url, :status
|
4
|
-
|
5
|
-
def initialize(url)
|
6
|
-
@url = url
|
7
|
-
@status = nil
|
8
|
-
end
|
9
|
-
|
10
|
-
def get_status_from_xml(url)
|
11
|
-
status(url.content)
|
12
|
-
end
|
13
|
-
|
14
|
-
def status
|
15
|
-
begin
|
16
|
-
open(@url, "Accept" => @url[/\.xml$/] ? 'application/xml' : 'text/html').status[0]
|
17
|
-
rescue RuntimeError => e
|
18
|
-
e
|
19
|
-
rescue OpenURI::HTTPError => e
|
20
|
-
e.io.status[0]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'webmock/rspec'
|
3
|
-
require './lib/sitemap_checker'
|
4
|
-
WebMock.disable_net_connect!(:allow_localhost => true)
|
5
|
-
|
6
|
-
describe SitemapChecker do
|
7
|
-
before(:each) do
|
8
|
-
@dir = Pathname.new(File.dirname(__FILE__))
|
9
|
-
stub_request(:any, "http://www.github.com").to_return(:status => 200, :body => 'foo')
|
10
|
-
stub_request(:any, "http://www.github.com/404").to_return(:status => 404, :body => 'foo')
|
11
|
-
stub_request(:any, "http://www.github.com/sitemap.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml'))
|
12
|
-
stub_request(:any, "http://www.github.com/sitemap.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/sitemap.xml.gz'))
|
13
|
-
stub_request(:any, "http://www.github.com/siteindex.xml").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml'))
|
14
|
-
stub_request(:any, "http://www.github.com/siteindex.xml.gz").to_return(:status => 200, :body => File.read(@dir + 'fixtures/siteindex.xml.gz'))
|
15
|
-
end
|
16
|
-
|
17
|
-
it "Sitemap accepts xml siteindexes" do
|
18
|
-
@list = SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml')
|
19
|
-
@list.locs.size.should eq(4)
|
20
|
-
end
|
21
|
-
|
22
|
-
it "Sitemap accepts gzipped siteindexes" do
|
23
|
-
@list = SitemapChecker::Sitemap.new('http://www.github.com/siteindex.xml.gz')
|
24
|
-
@list.locs.size.should eq(4)
|
25
|
-
end
|
26
|
-
|
27
|
-
it "Sitemap accepts xml sitemaps" do
|
28
|
-
@list = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml')
|
29
|
-
@list.locs.size.should eq(2)
|
30
|
-
end
|
31
|
-
|
32
|
-
it "Sitemap accepts xml and gzipped sitemaps" do
|
33
|
-
@xml_sitemap = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml')
|
34
|
-
@gz_sitemap = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml.gz')
|
35
|
-
@xml_sitemap.locs.size.should eq(2)
|
36
|
-
@gz_sitemap.locs.size.should eq(2)
|
37
|
-
end
|
38
|
-
|
39
|
-
it "Sitemap locs are Path objects" do
|
40
|
-
@xml_sitemap = SitemapChecker::Sitemap.new('http://www.github.com/sitemap.xml')
|
41
|
-
@xml_sitemap.locs.first.class.should eq(SitemapChecker::Path)
|
42
|
-
end
|
43
|
-
|
44
|
-
it "Path#status returns status code" do
|
45
|
-
SitemapChecker::Path.new('http://www.github.com').status.should eq('200')
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|