sitemap_generator 4.0.alpha → 4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,15 +14,15 @@ module SitemapGenerator
14
14
  end
15
15
  end
16
16
 
17
- # If no +filename+ or +namer+ is provided, the default namer is used. For sitemap
18
- # files this generates names like <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt> and so on,
17
+ # If no +filename+ or +namer+ is provided, the default namer is used, which
18
+ # generates names like <tt>sitemap.xml.gz</tt>, <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt> and so on.
19
19
  #
20
20
  # === Options
21
21
  # * <tt>adapter</tt> - SitemapGenerator::Adapter subclass
22
22
  # * <tt>filename</tt> - full name of the file e.g. <tt>'sitemap1.xml.gz'<tt>
23
23
  # * <tt>host</tt> - host name for URLs. The full URL to the file is then constructed from
24
24
  # the <tt>host</tt>, <tt>sitemaps_path</tt> and <tt>filename</tt>
25
- # * <tt>namer</tt> - a SitemapGenerator::SitemapNamer instance. Can be passed instead of +filename+.
25
+ # * <tt>namer</tt> - a SitemapGenerator::SimpleNamer instance. Can be passed instead of +filename+.
26
26
  # * <tt>public_path</tt> - path to the "public" directory, or the directory you want to
27
27
  # write sitemaps in. Default is a directory <tt>public/</tt>
28
28
  # in the current working directory, or relative to the Rails root
@@ -30,7 +30,7 @@ module SitemapGenerator
30
30
  # * <tt>sitemaps_path</tt> - gives the path relative to the <tt>public_path</tt> in which to
31
31
  # write sitemaps e.g. <tt>sitemaps/</tt>.
32
32
  # * <tt>verbose</tt> - whether to output summary into to STDOUT. Default +false+.
33
- # * <tt>create_index</tt> - whether to create a sitemap index. Default +true+. See LinkSet.
33
+ # * <tt>create_index</tt> - whether to create a sitemap index. Default `:auto`. See LinkSet.
34
34
  # Only applies to the SitemapIndexLocation object.
35
35
  def initialize(opts={})
36
36
  SitemapGenerator::Utilities.assert_valid_keys(opts, [:adapter, :public_path, :sitemaps_path, :host, :filename, :namer, :verbose, :create_index])
@@ -127,7 +127,7 @@ module SitemapGenerator
127
127
  class SitemapIndexLocation < SitemapLocation
128
128
  def initialize(opts={})
129
129
  if !opts[:filename] && !opts[:namer]
130
- opts[:namer] = SitemapGenerator::SitemapIndexNamer.new(:sitemap_index)
130
+ opts[:namer] = SitemapGenerator::SitemapIndexNamer.new(:sitemap)
131
131
  end
132
132
  super(opts)
133
133
  end
@@ -1,5 +1,7 @@
1
1
  module SitemapGenerator
2
2
  # A class for generating sitemap names given the base for the filename.
3
+ # Deprecated. Rather use the <tt>SitemapGenerator::SimpleNamer</tt> class and the
4
+ # +namer+ option on your sitemap object.
3
5
  #
4
6
  # === Example
5
7
  # namer = SitemapNamer.new(:sitemap)
@@ -50,7 +52,9 @@ module SitemapGenerator
50
52
  end
51
53
  end
52
54
 
53
- # A Namer for Sitemap Indexes. The name never changes.
55
+ # A Namer for Sitemap Indexes.
56
+ # Deprecated. Rather use the <tt>SitemapGenerator::SimpleNamer</tt> class and the
57
+ # +namer+ option on your sitemap object.
54
58
  class SitemapIndexNamer < SitemapNamer
55
59
  def to_s
56
60
  "#{@base}#{@options[:extension]}"
@@ -68,14 +72,19 @@ module SitemapGenerator
68
72
  # * sitemap3.xml.gz
69
73
  # * ...
70
74
  #
75
+ # Arguments:
76
+ # base - string or symbol that forms the base of the generated filename e.g.
77
+ # if `:geo` files are generated like `geo.xml.gz`, `geo1.xml.gz`, `geo2.xml.gz` etc.
78
+ #
71
79
  # Options:
72
80
  # :extension - Default: '.xml.gz'. File extension to append.
73
81
  # :start - Default: 1. Numerical index at which to start counting.
74
- # :zero - Default: nil. Could be a string or number that gives part
75
- # of the first name in the sequence. So in the old naming scheme
76
- # setting this to '_index' would produce 'sitemap_index.xml.gz' as
82
+ # :zero - Default: nil. A string or number that is appended to +base+
83
+ # to create the first name in the sequence. So setting this
84
+ # to '_index' would produce 'sitemap_index.xml.gz' as
77
85
  # the first name. Thereafter, the numerical index defined by +start+
78
- # is used.
86
+ # is used, and subsequent names would be 'sitemap1.xml.gz', 'sitemap2.xml.gz', etc.
87
+ # In these examples the `base` string is assumed to be 'sitemap'.
79
88
  class SimpleNamer < SitemapNamer
80
89
  def initialize(base, options={})
81
90
  super_options = SitemapGenerator::Utilities.reverse_merge(options,
@@ -1,5 +1,7 @@
1
1
  SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
2
  SitemapGenerator::Sitemap.yahoo_app_id = false
3
+ SitemapGenerator::Sitemap.create_index = true
4
+ SitemapGenerator::Sitemap.namer = SitemapGenerator::SimpleNamer.new(:sitemap, :zero => '_index')
3
5
 
4
6
  SitemapGenerator::Sitemap.add_links do |sitemap|
5
7
  sitemap.add '/contents', :priority => 0.7, :changefreq => 'daily'
@@ -1,4 +1,5 @@
1
1
  SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
+
2
3
  SitemapGenerator::Sitemap.create(
3
4
  :include_root => true, :include_index => true,
4
5
  :filename => :new_sitemaps, :sitemaps_path => 'fr/') do
@@ -13,16 +14,25 @@ SitemapGenerator::Sitemap.create(
13
14
  add '/three'
14
15
  end
15
16
 
16
- # Test a namer
17
+ # Test a deprecated namer
17
18
  group(:sitemaps_namer => SitemapGenerator::SitemapNamer.new(:abc, :start => 3)) do
18
19
  add '/four'
19
20
  add '/five'
20
21
  add '/six'
21
22
  end
22
23
 
24
+ # Test a simple namer
25
+ group(:sitemaps_namer => SitemapGenerator::SimpleNamer.new(:def)) do
26
+ add '/four'
27
+ add '/five'
28
+ add '/six'
29
+ end
30
+
23
31
  add '/seven'
24
32
 
25
- # This should be in a file of its own
33
+ # This should be in a file of its own.
34
+ # Not technically valid to have a link with a different host, but people like
35
+ # to do strange things sometimes.
26
36
  group(:sitemaps_host => "http://exceptional.com") do
27
37
  add '/eight'
28
38
  add '/nine'
@@ -33,5 +43,7 @@ SitemapGenerator::Sitemap.create(
33
43
  # This should have no effect. Already added default links.
34
44
  group(:include_root => true, :include_index => true) {}
35
45
 
46
+ # Not technically valid to have a link with a different host, but people like
47
+ # to do strange things sometimes
36
48
  add "/merchant_path", :host => "https://www.merchanthost.com"
37
49
  end
@@ -25,4 +25,29 @@ describe "SitemapGenerator" do
25
25
  alternate.attribute('href').value.should == 'http://www.example.de/link_with_alternate.html'
26
26
  end
27
27
 
28
+ it "should add alternate links to sitemap with rel nofollow" do
29
+ xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html',
30
+ :host => 'http://www.example.com',
31
+ :alternates => [
32
+ {
33
+ :lang => 'de',
34
+ :href => 'http://www.example.de/link_with_alternate.html',
35
+ :nofollow => true
36
+ }
37
+ ]
38
+ ).to_xml
39
+
40
+ doc = Nokogiri::XML.parse("<root xmlns='http://www.sitemaps.org/schemas/sitemap/0.9' xmlns:xhtml='http://www.w3.org/1999/xhtml'>#{xml_fragment}</root>")
41
+ url = doc.css('url')
42
+ url.should_not be_nil
43
+ url.css('loc').text.should == 'http://www.example.com/link_with_alternates.html'
44
+
45
+ alternate = url.at_xpath('xhtml:link')
46
+ alternate.should_not be_nil
47
+ alternate.attribute('rel').value.should == 'alternate nofollow'
48
+ alternate.attribute('hreflang').value.should == 'de'
49
+ alternate.attribute('href').value.should == 'http://www.example.de/link_with_alternate.html'
50
+ end
51
+
28
52
  end
53
+
@@ -1,35 +1,33 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe 'SitemapGenerator::Builder::SitemapFile' do
4
- before :each do
5
- @loc = SitemapGenerator::SitemapLocation.new(:namer => SitemapGenerator::SitemapNamer.new(:sitemap), :public_path => 'tmp/', :sitemaps_path => 'test/', :host => 'http://example.com/')
6
- @s = SitemapGenerator::Builder::SitemapFile.new(@loc)
7
- end
4
+ let(:location) { SitemapGenerator::SitemapLocation.new(:namer => SitemapGenerator::SitemapNamer.new(:sitemap), :public_path => 'tmp/', :sitemaps_path => 'test/', :host => 'http://example.com/') }
5
+ let(:sitemap) { SitemapGenerator::Builder::SitemapFile.new(location) }
8
6
 
9
7
  it "should have a default namer" do
10
- @s = SitemapGenerator::Builder::SitemapFile.new
11
- @s.location.filename.should == 'sitemap1.xml.gz'
8
+ sitemap = SitemapGenerator::Builder::SitemapFile.new
9
+ sitemap.location.filename.should == 'sitemap1.xml.gz'
12
10
  end
13
11
 
14
12
  it "should return the name of the sitemap file" do
15
- @s.location.filename.should == 'sitemap1.xml.gz'
13
+ sitemap.location.filename.should == 'sitemap1.xml.gz'
16
14
  end
17
15
 
18
16
  it "should return the URL" do
19
- @s.location.url.should == 'http://example.com/test/sitemap1.xml.gz'
17
+ sitemap.location.url.should == 'http://example.com/test/sitemap1.xml.gz'
20
18
  end
21
19
 
22
20
  it "should return the path" do
23
- @s.location.path.should == File.expand_path('tmp/test/sitemap1.xml.gz')
21
+ sitemap.location.path.should == File.expand_path('tmp/test/sitemap1.xml.gz')
24
22
  end
25
23
 
26
24
  it "should be empty" do
27
- @s.empty?.should be_true
28
- @s.link_count.should == 0
25
+ sitemap.empty?.should be_true
26
+ sitemap.link_count.should == 0
29
27
  end
30
28
 
31
29
  it "should not be finalized" do
32
- @s.finalized?.should be_false
30
+ sitemap.finalized?.should be_false
33
31
  end
34
32
 
35
33
  it "should raise if no default host is set" do
@@ -39,41 +37,96 @@ describe 'SitemapGenerator::Builder::SitemapFile' do
39
37
  describe "lastmod" do
40
38
  it "should be the file last modified time" do
41
39
  lastmod = (Time.now - 1209600)
42
- @s.location.reserve_name
43
- File.expects(:mtime).with(@s.location.path).returns(lastmod)
44
- @s.lastmod.should == lastmod
40
+ sitemap.location.reserve_name
41
+ File.expects(:mtime).with(sitemap.location.path).returns(lastmod)
42
+ sitemap.lastmod.should == lastmod
45
43
  end
46
44
 
47
45
  it "should be nil if the location has not reserved a name" do
48
46
  File.expects(:mtime).never
49
- @s.lastmod.should be_nil
47
+ sitemap.lastmod.should be_nil
50
48
  end
51
-
49
+
52
50
  it "should be nil if location has reserved a name and the file DNE" do
53
- @s.location.reserve_name
51
+ sitemap.location.reserve_name
54
52
  File.expects(:mtime).raises(Errno::ENOENT)
55
- @s.lastmod.should be_nil
53
+ sitemap.lastmod.should be_nil
56
54
  end
57
55
  end
58
56
 
59
57
  describe "new" do
58
+ let(:original_sitemap) { sitemap }
59
+ let(:new_sitemap) { sitemap.new }
60
+
60
61
  before :each do
61
- @orig_s = @s
62
- @s = @s.new
62
+ original_sitemap
63
+ new_sitemap
63
64
  end
64
65
 
65
66
  it "should inherit the same options" do
66
67
  # The name is the same because the original sitemap was not finalized
67
- @s.location.url.should == 'http://example.com/test/sitemap1.xml.gz'
68
- @s.location.path.should == File.expand_path('tmp/test/sitemap1.xml.gz')
68
+ new_sitemap.location.url.should == 'http://example.com/test/sitemap1.xml.gz'
69
+ new_sitemap.location.path.should == File.expand_path('tmp/test/sitemap1.xml.gz')
69
70
  end
70
71
 
71
72
  it "should not share the same location instance" do
72
- @s.location.should_not be(@orig_s.location)
73
+ new_sitemap.location.should_not be(original_sitemap.location)
73
74
  end
74
75
 
75
76
  it "should inherit the same namer instance" do
76
- @s.location.namer.should == @orig_s.location.namer
77
+ new_sitemap.location.namer.should == original_sitemap.location.namer
78
+ end
79
+ end
80
+
81
+ describe "reserve_name" do
82
+ it "should reserve the name from the location" do
83
+ sitemap.reserved_name?.should be_false
84
+ sitemap.location.expects(:reserve_name).returns('name')
85
+ sitemap.reserve_name
86
+ sitemap.reserved_name?.should be_true
87
+ sitemap.instance_variable_get(:@reserved_name).should == 'name'
88
+ end
89
+
90
+ it "should be safe to call multiple times" do
91
+ sitemap.location.expects(:reserve_name).returns('name').once
92
+ sitemap.reserve_name
93
+ sitemap.reserve_name
94
+ end
95
+ end
96
+
97
+ describe "add" do
98
+ it "should use the host provided" do
99
+ url = SitemapGenerator::Builder::SitemapUrl.new('/one', :host => 'http://newhost.com/')
100
+ SitemapGenerator::Builder::SitemapUrl.expects(:new).with('/one', :host => 'http://newhost.com').returns(url)
101
+ sitemap.add '/one', :host => 'http://newhost.com'
102
+ end
103
+
104
+ it "should use the host from the location" do
105
+ url = SitemapGenerator::Builder::SitemapUrl.new('/one', :host => 'http://example.com/')
106
+ SitemapGenerator::Builder::SitemapUrl.expects(:new).with('/one', :host => 'http://example.com/').returns(url)
107
+ sitemap.add '/one'
108
+ end
109
+ end
110
+
111
+ describe "ellipsis" do
112
+ it "should not modify when less than or equal to max" do
113
+ (1..10).each do |i|
114
+ string = 'a'*i
115
+ sitemap.send(:ellipsis, string, 10).should == string
116
+ end
117
+ end
118
+
119
+ it "should replace last 3 characters with ellipsis when greater than max" do
120
+ (1..5).each do |i|
121
+ string = 'aaaaa' + 'a'*i
122
+ sitemap.send(:ellipsis, string, 5).should == 'aa...'
123
+ end
124
+ end
125
+
126
+ it "should not freak out when string too small" do
127
+ sitemap.send(:ellipsis, 'a', 1).should == 'a'
128
+ sitemap.send(:ellipsis, 'aa', 1).should == '...'
129
+ sitemap.send(:ellipsis, 'aaa', 1).should == '...'
77
130
  end
78
131
  end
79
132
  end
@@ -1,38 +1,101 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe 'SitemapGenerator::Builder::SitemapIndexFile' do
4
+ let(:location) { SitemapGenerator::SitemapLocation.new(:filename => 'sitemap.xml.gz', :public_path => '/public/', :host => 'http://example.com/') }
5
+ let(:index) { SitemapGenerator::Builder::SitemapIndexFile.new(location) }
6
+
4
7
  before :each do
5
- @loc = SitemapGenerator::SitemapLocation.new(:filename => 'sitemap_index.xml.gz', :public_path => '/public/', :sitemaps_path => 'test/', :host => 'http://example.com/')
6
- @s = SitemapGenerator::Builder::SitemapIndexFile.new(@loc)
8
+ index.location[:sitemaps_path] = 'test/'
7
9
  end
8
10
 
9
11
  it "should return the URL" do
10
- @s.location.url.should == 'http://example.com/test/sitemap_index.xml.gz'
12
+ index.location.url.should == 'http://example.com/test/sitemap.xml.gz'
11
13
  end
12
14
 
13
15
  it "should return the path" do
14
- @s.location.path.should == '/public/test/sitemap_index.xml.gz'
16
+ index.location.path.should == '/public/test/sitemap.xml.gz'
15
17
  end
16
18
 
17
19
  it "should be empty" do
18
- @s.empty?.should be_true
19
- @s.link_count.should == 0
20
+ index.empty?.should be_true
21
+ index.link_count.should == 0
20
22
  end
21
23
 
22
24
  it "should not have a last modification data" do
23
- @s.lastmod.should be_nil
25
+ index.lastmod.should be_nil
24
26
  end
25
27
 
26
28
  it "should not be finalized" do
27
- @s.finalized?.should be_false
29
+ index.finalized?.should be_false
28
30
  end
29
31
 
30
- it "filename should default to sitemap_index" do
31
- @s.location.filename.should == 'sitemap_index.xml.gz'
32
+ it "filename should be set" do
33
+ index.location.filename.should == 'sitemap.xml.gz'
32
34
  end
33
35
 
34
36
  it "should have a default namer" do
35
- @s = SitemapGenerator::Builder::SitemapIndexFile.new
36
- @s.location.filename.should == 'sitemap_index.xml.gz'
37
+ index = SitemapGenerator::Builder::SitemapIndexFile.new
38
+ index.location.filename.should == 'sitemap.xml.gz'
39
+ end
40
+
41
+ describe "link_count" do
42
+ it "should return the link count" do
43
+ index.instance_variable_set(:@link_count, 10)
44
+ index.link_count.should == 10
45
+ end
46
+ end
47
+
48
+ describe "create_index?" do
49
+ it "should return false" do
50
+ index.location[:create_index] = false
51
+ index.create_index?.should be_false
52
+
53
+ index.instance_variable_set(:@link_count, 10)
54
+ index.create_index?.should be_false
55
+ end
56
+
57
+ it "should return true" do
58
+ index.location[:create_index] = true
59
+ index.create_index?.should be_true
60
+
61
+ index.instance_variable_set(:@link_count, 1)
62
+ index.create_index?.should be_true
63
+ end
64
+
65
+ it "when :auto, should be true if more than one link" do
66
+ index.instance_variable_set(:@link_count, 1)
67
+ index.location[:create_index] = :auto
68
+ index.create_index?.should be_false
69
+
70
+ index.instance_variable_set(:@link_count, 2)
71
+ index.create_index?.should be_true
72
+ end
73
+ end
74
+
75
+ describe "add" do
76
+ it "should use the host provided" do
77
+ url = SitemapGenerator::Builder::SitemapIndexUrl.new('/one', :host => 'http://newhost.com/')
78
+ SitemapGenerator::Builder::SitemapIndexUrl.expects(:new).with('/one', :host => 'http://newhost.com').returns(url)
79
+ index.add '/one', :host => 'http://newhost.com'
80
+ end
81
+
82
+ it "should use the host from the location" do
83
+ url = SitemapGenerator::Builder::SitemapIndexUrl.new('/one', :host => 'http://example.com/')
84
+ SitemapGenerator::Builder::SitemapIndexUrl.expects(:new).with('/one', :host => 'http://example.com/').returns(url)
85
+ index.add '/one'
86
+ end
87
+
88
+ describe "when adding manually" do
89
+ it "should reserve a name" do
90
+ index.expects(:reserve_name)
91
+ index.add '/link'
92
+ end
93
+
94
+ it "should create index" do
95
+ index.create_index?.should be_false
96
+ index.add '/one'
97
+ index.create_index?.should be_true
98
+ end
99
+ end
37
100
  end
38
101
  end
@@ -1,16 +1,28 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe SitemapGenerator::Builder::SitemapIndexUrl do
4
- before :all do
5
- @s = SitemapGenerator::Builder::SitemapIndexFile.new(
4
+ let(:index) {
5
+ SitemapGenerator::Builder::SitemapIndexFile.new(
6
6
  :sitemaps_path => 'sitemaps/',
7
7
  :host => 'http://test.com',
8
8
  :filename => 'sitemap_index.xml.gz'
9
9
  )
10
- end
10
+ }
11
+ let(:url) { SitemapGenerator::Builder::SitemapUrl.new(index) }
11
12
 
12
13
  it "should return the correct url" do
13
- @u = SitemapGenerator::Builder::SitemapUrl.new(@s)
14
- @u[:loc].should == 'http://test.com/sitemaps/sitemap_index.xml.gz'
14
+ url[:loc].should == 'http://test.com/sitemaps/sitemap_index.xml.gz'
15
+ end
16
+
17
+ it "should use the host from the index" do
18
+ host = 'http://myexample.com'
19
+ index.location.expects(:host).returns(host)
20
+ url[:host].should == host
21
+ end
22
+
23
+ it "should use the public path for the link" do
24
+ path = '/path'
25
+ index.location.expects(:path_in_public).returns(path)
26
+ url[:loc].should == 'http://test.com/path'
15
27
  end
16
28
  end