big_sitemap 0.8.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,35 +14,36 @@ class BigSitemap
14
14
  @gzip = options.delete(:gzip)
15
15
  @max_urls = options.delete(:max_urls) || MAX_URLS
16
16
  @type = options.delete(:type)
17
- @paths = []
17
+ @filepaths = []
18
18
  @parts = options.delete(:start_part_id) || 0
19
- @custom_part_nr = options.delete(:partial_update)
19
+ @partial_update = options.delete(:partial_update)
20
20
 
21
- @filename = options.delete(:filename)
21
+ @filename = options.delete(:filename)
22
22
  @current_filename = nil
23
23
  @tmp_filename = nil
24
- @target = _get_writer
24
+ @target = _get_writer
25
25
 
26
26
  @level = 0
27
27
  @opened_tags = []
28
28
  _init_document
29
29
  end
30
30
 
31
- def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil)
32
- _rotate(part_nr) if @max_urls == @urls
33
-
31
+ def add_url!(location, options={})
32
+ _rotate(options[:id]) if @max_urls == @urls
34
33
  _open_tag 'url'
35
- tag! 'loc', url
36
- tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
37
- tag! 'changefreq', frequency if frequency
38
- tag! 'priority', priority if priority
34
+
35
+ tag! 'loc', location
36
+ tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
37
+ tag! 'changefreq', options[:change_frequency] || 'weekly'
38
+ tag! 'priority', options[:priority] if options[:priority]
39
+
39
40
  _close_tag 'url'
40
41
 
41
42
  @urls += 1
42
43
  end
43
44
 
44
- def paths!
45
- @paths
45
+ def filepaths!
46
+ @filepaths
46
47
  end
47
48
 
48
49
  def close!
@@ -60,7 +61,7 @@ class BigSitemap
60
61
 
61
62
  def _get_writer
62
63
  filename = @filename.dup
63
- filename << "_#{@parts}" if @parts > 0
64
+ filename << "_#{@parts}" if @parts > 0 && @type != 'index'
64
65
  filename << '.xml'
65
66
  filename << '.gz' if @gzip
66
67
  _open_writer(filename)
@@ -69,28 +70,28 @@ class BigSitemap
69
70
  def _open_writer(filename)
70
71
  @current_filename = filename
71
72
  @tmp_filename = filename + ".tmp"
72
- @paths << filename
73
- file = ::File.open(@tmp_filename, 'w+')
73
+ @filepaths << filename
74
+ file = ::File.open(@tmp_filename, 'w+:ASCII-8BIT')
74
75
  @gzip ? ::Zlib::GzipWriter.new(file) : file
75
76
  end
76
77
 
77
- def _init_document( name = 'urlset', attrs = HEADER_ATTRIBUTES)
78
+ def _init_document(name='urlset', attrs=HEADER_ATTRIBUTES)
78
79
  @urls = 0
79
80
  target!.print '<?xml version="1.0" encoding="UTF-8"?>'
80
81
  _newline
81
82
  _open_tag name, attrs
82
83
  end
83
84
 
84
- def _rotate(part_nr = nil)
85
+ def _rotate(part_nr=nil)
85
86
  # write out the current document and start writing into a new file
86
87
  close!
87
- @parts = (part_nr && @custom_part_nr) ? part_nr : @parts + 1
88
+ @parts = part_nr || @parts + 1
88
89
  @target = _get_writer
89
90
  _init_document
90
91
  end
91
92
 
92
93
  # opens a tag, bumps up level but doesn't require a block
93
- def _open_tag(name, attrs = {})
94
+ def _open_tag(name, attrs={})
94
95
  _indent
95
96
  _start_tag(name, attrs)
96
97
  _newline
@@ -98,8 +99,8 @@ class BigSitemap
98
99
  @opened_tags << name
99
100
  end
100
101
 
101
- def _start_tag(name, attrs = {})
102
- attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('')
102
+ def _start_tag(name, attrs={})
103
+ attrs = attrs.map { |attr, value| %Q( #{attr}="#{value}") }.join('')
103
104
  target!.print "<#{name}#{attrs}>"
104
105
  end
105
106
 
@@ -131,12 +132,10 @@ class BigSitemap
131
132
  end
132
133
 
133
134
  def _indent
134
- return if @gzip
135
135
  target!.print " " * @level
136
136
  end
137
137
 
138
138
  def _newline
139
- return if @gzip
140
139
  target!.puts ''
141
140
  end
142
141
  end
@@ -147,10 +146,12 @@ class BigSitemap
147
146
  super(name, attrs)
148
147
  end
149
148
 
150
- def add_url!(url, time = nil)
149
+ def add_url!(location, options={})
151
150
  _open_tag 'sitemap'
152
- tag! 'loc', url
153
- tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
151
+
152
+ tag! 'loc', location
153
+ tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
154
+
154
155
  _close_tag 'sitemap'
155
156
  end
156
157
  end
@@ -31,379 +31,271 @@ class BigSitemapTest < Test::Unit::TestCase
31
31
  assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
32
32
  end
33
33
 
34
- should 'generate a sitemap index file' do
35
- generate_sitemap_files
36
- assert File.exists?(sitemaps_index_file)
34
+ should 'generate sitemap index file' do
35
+ generate_sitemap { add '/foo' }
36
+ assert File.exists? first_sitemap_file
37
37
  end
38
38
 
39
- should 'generate a single sitemap model file' do
40
- create_sitemap
41
- add_model
42
- @sitemap.generate
43
- assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
39
+ should 'generate static file' do
40
+ generate_sitemap { add '/foo' }
41
+ assert File.exists? first_sitemap_file
44
42
  end
45
43
 
46
- should 'generate two sitemap model files' do
47
- generate_two_model_sitemap_files
48
- assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
49
- assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
50
- assert !File.exists?(third_sitemaps_model_file), "#{third_sitemaps_model_file} does not exist"
51
- end
52
-
53
- should 'generate two sitemap model files for the same model with different options' do
54
- create_sitemap
55
- add_model(:path => 'foo')
56
- add_model(:path => 'bar')
57
- @sitemap.generate
44
+ should 'should add paths' do
45
+ generate_sitemap do
46
+ add '/', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
47
+ add '/about', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
48
+ end
58
49
 
59
- assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
60
- assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
50
+ elems = elements first_sitemap_file, 'loc'
51
+ assert_equal 'http://example.com/', elems.first.text
52
+ assert_equal 'http://example.com/about', elems.last.text
61
53
  end
62
54
 
63
55
  context 'Sitemap index file' do
64
56
  should 'contain one sitemapindex element' do
65
- generate_sitemap_files
57
+ generate_sitemap { add '/' }
66
58
  assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
67
59
  end
68
60
 
69
61
  should 'contain one sitemap element' do
70
- generate_sitemap_files
62
+ generate_sitemap { add '/' }
71
63
  assert_equal 1, num_elements(sitemaps_index_file, 'sitemap')
72
64
  end
73
65
 
74
66
  should 'contain one loc element' do
75
- generate_one_sitemap_model_file
67
+ generate_sitemap { add '/' }
76
68
  assert_equal 1, num_elements(sitemaps_index_file, 'loc')
77
69
  end
78
70
 
79
71
  should 'contain one lastmod element' do
80
- generate_one_sitemap_model_file
72
+ generate_sitemap { add '/' }
81
73
  assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
82
74
  end
83
75
 
84
76
  should 'contain two loc elements' do
85
- generate_two_model_sitemap_files
77
+ generate_sitemap(:max_per_sitemap => 2) do
78
+ 4.times { add '/' }
79
+ end
80
+
86
81
  assert_equal 2, num_elements(sitemaps_index_file, 'loc')
87
82
  end
88
83
 
89
84
  should 'contain two lastmod elements' do
90
- generate_two_model_sitemap_files
85
+ generate_sitemap(:max_per_sitemap => 2) do
86
+ 4.times { add '/' }
87
+ end
88
+
91
89
  assert_equal 2, num_elements(sitemaps_index_file, 'lastmod')
92
90
  end
93
91
 
94
92
  should 'not be gzipped' do
95
- generate_sitemap_files(:gzip => false)
93
+ generate_sitemap(:gzip => false) { add '/' }
96
94
  assert File.exists?(unzipped_sitemaps_index_file)
97
95
  end
98
96
  end
99
97
 
100
- context 'Sitemap model file' do
98
+ context 'Sitemap file' do
101
99
  should 'contain one urlset element' do
102
- generate_one_sitemap_model_file
103
- assert_equal 1, num_elements(first_sitemaps_model_file, 'urlset')
100
+ generate_sitemap { add '/' }
101
+ assert_equal 1, num_elements(first_sitemap_file, 'urlset')
104
102
  end
105
103
 
106
104
  should 'contain several loc elements' do
107
- generate_one_sitemap_model_file
108
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'loc')
105
+ generate_sitemap do
106
+ 3.times { add '/' }
107
+ end
108
+
109
+ assert_equal 3, num_elements(first_sitemap_file, 'loc')
109
110
  end
110
111
 
111
112
  should 'contain several lastmod elements' do
112
- generate_one_sitemap_model_file
113
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'lastmod')
113
+ generate_sitemap do
114
+ 3.times { add '/', :last_modified => Time.now }
115
+ end
116
+
117
+ assert_equal 3, num_elements(first_sitemap_file, 'lastmod')
114
118
  end
115
119
 
116
120
  should 'contain several changefreq elements' do
117
- generate_one_sitemap_model_file
118
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'changefreq')
121
+ generate_sitemap do
122
+ 3.times { add '/' }
123
+ end
124
+
125
+ assert_equal 3, num_elements(first_sitemap_file, 'changefreq')
119
126
  end
120
127
 
121
128
  should 'contain several priority elements' do
122
- generate_one_sitemap_model_file(:priority => 0.2)
123
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'priority')
129
+ generate_sitemap do
130
+ 3.times { add '/', :priority => 0.2 }
131
+ end
132
+
133
+ assert_equal 3, num_elements(first_sitemap_file, 'priority')
124
134
  end
125
135
 
126
136
  should 'have a change frequency of weekly by default' do
127
- generate_one_sitemap_model_file
128
- assert_equal 'weekly', elements(first_sitemaps_model_file, 'changefreq').first.text
129
- end
137
+ generate_sitemap do
138
+ 3.times { add '/' }
139
+ end
130
140
 
131
- should 'have a change frequency of daily' do
132
- generate_one_sitemap_model_file(:change_frequency => 'daily')
133
- assert_equal 'daily', elements(first_sitemaps_model_file, 'changefreq').first.text
141
+ assert_equal 'weekly', elements(first_sitemap_file, 'changefreq').first.text
134
142
  end
135
143
 
136
- should 'be able to use a lambda to specify change frequency' do
137
- generate_one_sitemap_model_file(:change_frequency => lambda {|m| m.change_frequency})
138
- assert_equal TestModel.new.change_frequency, elements(first_sitemaps_model_file, 'changefreq').first.text
144
+ should 'have a change frequency of daily' do
145
+ generate_sitemap { add '/', :change_frequency => 'daily' }
146
+ assert_equal 'daily', elements(first_sitemap_file, 'changefreq').first.text
139
147
  end
140
148
 
141
149
  should 'have a priority of 0.2' do
142
- generate_one_sitemap_model_file(:priority => 0.2)
143
- assert_equal '0.2', elements(first_sitemaps_model_file, 'priority').first.text
144
- end
145
-
146
- should 'be able to use a lambda to specify priority' do
147
- generate_one_sitemap_model_file(:priority => lambda {|m| m.priority})
148
- assert_equal TestModel.new.priority.to_s, elements(first_sitemaps_model_file, 'priority').first.text
149
- end
150
-
151
- should 'be able to use a lambda to specify lastmod' do
152
- generate_one_sitemap_model_file(:last_modified => lambda {|m| m.updated_at})
153
- assert_equal TestModel.new.updated_at.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00'), elements(first_sitemaps_model_file, 'lastmod').first.text
150
+ generate_sitemap { add '/', :priority => 0.2 }
151
+ assert_equal '0.2', elements(first_sitemap_file, 'priority').first.text
154
152
  end
155
153
 
156
154
  should 'contain two loc element' do
157
- generate_two_model_sitemap_files
158
- assert_equal 2, num_elements(first_sitemaps_model_file, 'loc')
159
- assert_equal 2, num_elements(second_sitemaps_model_file, 'loc')
160
- end
155
+ generate_sitemap(:max_per_sitemap => 2) do
156
+ 4.times { add '/' }
157
+ end
161
158
 
162
- should 'contain two lastmod element' do
163
- generate_two_model_sitemap_files
164
- assert_equal 2, num_elements(first_sitemaps_model_file, 'lastmod')
165
- assert_equal 2, num_elements(second_sitemaps_model_file, 'lastmod')
159
+ assert_equal 2, num_elements(first_sitemap_file, 'loc')
160
+ assert_equal 2, num_elements(second_sitemap_file, 'loc')
166
161
  end
167
162
 
168
163
  should 'contain two changefreq elements' do
169
- generate_two_model_sitemap_files
170
- assert_equal 2, num_elements(first_sitemaps_model_file, 'changefreq')
171
- assert_equal 2, num_elements(second_sitemaps_model_file, 'changefreq')
164
+ generate_sitemap(:max_per_sitemap => 2) do
165
+ 4.times { add '/' }
166
+ end
167
+
168
+ assert_equal 2, num_elements(first_sitemap_file, 'changefreq')
169
+ assert_equal 2, num_elements(second_sitemap_file, 'changefreq')
172
170
  end
173
171
 
174
172
  should 'contain two priority element' do
175
- generate_two_model_sitemap_files(:priority => 0.2)
176
- assert_equal 2, num_elements(first_sitemaps_model_file, 'priority')
177
- assert_equal 2, num_elements(second_sitemaps_model_file, 'priority')
178
- end
173
+ generate_sitemap(:max_per_sitemap => 2) do
174
+ 4.times { add '/', :priority => 0.2 }
175
+ end
179
176
 
180
- should 'strip leading slashes from controller paths' do
181
- create_sitemap
182
- add_model
183
- @sitemap.add(TestModel, :path => '/test_controller').generate
184
- assert(
185
- !elements(first_sitemaps_model_file, 'loc').first.text.match(/\/\/test_controller\//),
186
- 'URL does not contain a double-slash before the controller path'
187
- )
177
+ assert_equal 2, num_elements(first_sitemap_file, 'priority')
178
+ assert_equal 2, num_elements(second_sitemap_file, 'priority')
188
179
  end
189
180
 
190
181
  should 'not be gzipped' do
191
- generate_one_sitemap_model_file(:gzip => false)
192
- assert File.exists?(unzipped_first_sitemaps_model_file)
193
- end
194
- end
195
-
196
- context 'add method' do
197
- should 'be chainable' do
198
- create_sitemap
199
- assert_equal BigSitemap, @sitemap.add(TestModel).class
200
- end
201
- end
202
-
203
- context 'add static method' do
204
- should 'should generate static content' do
205
- create_sitemap
206
- @sitemap.add_static('/', Time.now, 'weekly', 0.5)
207
- @sitemap.add_static('/about', Time.now, 'weekly', 0.5)
208
- @sitemap.generate_static
209
- elems = elements(static_sitemaps_file, 'loc')
210
- assert_equal "/", elems.first.text
211
- assert_equal "/about", elems.last.text
182
+ generate_sitemap(:gzip => false) { add '/' }
183
+ assert File.exists?(unzipped_first_sitemap_file)
212
184
  end
213
185
  end
214
186
 
215
187
  context 'sanatize XML chars' do
216
188
  should 'should transform ampersands' do
217
- create_sitemap
218
- @sitemap.add_static('/something&else', Time.now, 'weekly', 0.5)
219
- @sitemap.generate_static
220
- elems = elements(static_sitemaps_file, 'loc')
221
- assert Zlib::GzipReader.open(static_sitemaps_file).read.include?("/something&amp;else")
222
- assert_equal "/something&else", elems.first.text
189
+ generate_sitemap { add '/something&else' }
190
+ elems = elements(first_sitemap_file, 'loc')
191
+
192
+ assert Zlib::GzipReader.open(first_sitemap_file).read.include?("/something&amp;else")
193
+ assert_equal 'http://example.com/something&else', elems.first.text
223
194
  end
224
195
  end
225
196
 
226
197
  context 'clean method' do
227
198
  should 'be chainable' do
228
- create_sitemap
229
- assert_equal BigSitemap, @sitemap.clean.class
199
+ sitemap = generate_sitemap { add '/' }
200
+ assert_equal BigSitemap, sitemap.clean.class
230
201
  end
231
202
 
232
203
  should 'clean all sitemap files' do
233
- generate_sitemap_files
234
- assert Dir.entries(sitemaps_dir).size > 2, "#{sitemaps_dir} is not empty" # ['.', '..'].size == 2
235
- @sitemap.clean
236
- assert_equal 2, Dir.entries(sitemaps_dir).size, "#{sitemaps_dir} is empty"
237
- end
238
- end
239
-
240
- context 'generate method' do
241
- should 'be chainable' do
242
- create_sitemap
243
- assert_equal BigSitemap, @sitemap.generate.class
204
+ sitemap = generate_sitemap { add '/' }
205
+ assert Dir["#{sitemaps_dir}/sitemap*"].size > 0, "#{sitemaps_dir} has sitemap files"
206
+ sitemap.clean
207
+ assert_equal 0, Dir["#{sitemaps_dir}/sitemap*"].size, "#{sitemaps_dir} is empty of sitemap files"
244
208
  end
245
209
  end
246
210
 
247
211
  context 'sitemap index' do
248
212
  should 'generate for all xml files in directory' do
249
- create_sitemap
250
- @sitemap.clean
213
+ sitemap = generate_sitemap {}
251
214
  File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
252
215
  File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
253
216
  File.open("#{sitemaps_dir}/sitemap_file3.txt", 'w')
254
217
  File.open("#{sitemaps_dir}/file4.xml", 'w')
255
218
  File.open(unzipped_sitemaps_index_file, 'w')
256
- @sitemap.send :generate_sitemap_index
219
+ sitemap.send :generate_sitemap_index
257
220
 
258
221
  elem = elements(sitemaps_index_file, 'loc')
259
222
  assert_equal 2, elem.size #no index and file3 and file4 found
260
- assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
261
- assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
223
+ assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
224
+ assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
262
225
  end
263
226
 
264
227
  should 'generate for all for given file' do
265
- create_sitemap
266
- @sitemap.clean
228
+ sitemap = generate_sitemap {}
267
229
  File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
268
230
  File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
269
231
  files = ["#{sitemaps_dir}/sitemap_file1.xml", "#{sitemaps_dir}/sitemap_file2.xml.gz"]
270
- @sitemap.send :generate_sitemap_index, files
232
+ sitemap.send :generate_sitemap_index, files
271
233
 
272
234
  elem = elements(sitemaps_index_file, 'loc')
273
235
  assert_equal 2, elem.size
274
- assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
275
- assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
276
- end
277
- end
278
-
279
- context 'get_last_id' do
280
- should 'return last id' do
281
- create_sitemap.clean
282
- filename = "#{sitemaps_dir}/sitemap_file"
283
- File.open("#{filename}_1.xml", 'w')
284
- File.open("#{filename}_23.xml", 'w')
285
- File.open("#{filename}_42.xml.gz", 'w')
286
- File.open("#{filename}_9.xml", 'w')
287
- assert_equal 42, @sitemap.send(:get_last_id, filename)
288
- end
289
-
290
- should 'return nil' do
291
- create_sitemap.clean
292
- filename = "#{sitemaps_dir}/sitemap_file"
293
- assert_equal nil, @sitemap.send(:get_last_id, filename)
236
+ assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
237
+ assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
294
238
  end
295
239
  end
296
240
 
297
241
  context 'partial update' do
298
-
299
- context 'prepare_update' do
300
- should 'generate correct condition for partial update' do
301
- filename = "#{sitemaps_dir}/sitemap_test_models"
302
-
303
- create_sitemap(:partial_update => true).clean
304
- add_model(:num_items => 50) #TestModel
305
-
306
- File.open("#{filename}_23.xml", 'w')
307
- assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
308
-
309
- File.open("#{filename}_42.xml", 'w')
310
- assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
242
+ should 'not recreate old files' do
243
+ # The first run should generate all the files
244
+ generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
245
+ [10, 20, 30, 40, 50].each do |i|
246
+ add "/#{i}", :id => i
247
+ end
311
248
  end
312
249
 
313
- should 'generate correct condition for partial update with custom column' do
314
- filename = "#{sitemaps_dir}/sitemap_test_models"
250
+ filename = "#{sitemaps_dir}/sitemap"
315
251
 
316
- create_sitemap(:partial_update => true).clean
317
- add_model(:num_items => 50, :primary_column => 'name') #TestModel
252
+ assert File.exists? "#{filename}.xml" # ids 10 and 20
253
+ assert File.exists? "#{filename}_30.xml" # ids 30 and 40
254
+ assert File.exists? "#{filename}_50.xml" # id 50
318
255
 
319
- File.open("#{filename}_666.xml", 'w')
320
- assert_equal "(name >= 666)", @sitemap.send(:prepare_update).first.last[:conditions]
321
- end
322
- end
256
+ # Move the files so we can test if they are re-created
257
+ FileUtils.mv "#{filename}.xml", "#{filename}.bak.xml"
258
+ FileUtils.mv "#{filename}_30.xml", "#{filename}_30.bak.xml"
323
259
 
324
- should 'generate for all xml files in directory and delete last file' do
325
- TestModel.current_id = last_id = 27
326
- filename = "#{sitemaps_dir}/sitemap_test_models"
260
+ # Store the original file size so we can compare it later
261
+ original_size = File.size "#{filename}_50.xml"
327
262
 
328
- create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
329
- add_model(:num_items => 50 - last_id) #TestModel
263
+ start_id = nil
330
264
 
331
- File.open("#{filename}.xml", 'w')
332
- File.open("#{filename}_5.xml", 'w')
333
- File.open("#{filename}_9.xml", 'w')
334
- File.open("#{filename}_23.xml", 'w')
335
- File.open("#{filename}_#{last_id}.xml", 'w')
336
- @sitemap.generate
265
+ # Run a new update starting from the first ID of the last sitemap
266
+ generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
267
+ start_id = first_id_of_last_sitemap
337
268
 
338
- # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
339
-
340
- assert File.exists?("#{filename}_48.xml")
341
- assert File.exists?("#{filename}_#{last_id}.xml")
342
- elems = elements("#{filename}_#{last_id}.xml", 'loc').map(&:text)
343
-
344
- assert_equal 5, elems.size
345
- (28..32).each do |i|
346
- assert elems.include? "http://example.com/test_models/#{i}"
269
+ [50, 60, 70, 80].each do |i|
270
+ add "/#{i}", :id => i
271
+ end
347
272
  end
348
273
 
349
- elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
350
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models.xml"
351
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models_9.xml"
352
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models_#{last_id}.xml"
353
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models_48.xml"
354
- end
355
-
356
- should 'generate sitemap, update should respect old files' do
357
- max_id = 23
358
- TestModel.current_id = 0
359
- filename = "#{sitemaps_dir}/sitemap_test_models"
360
-
361
- create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
362
- add_model(:num_items => max_id) #TestModel
363
- @sitemap.generate
364
-
365
- # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
274
+ # Check the correct ID is returned for the beginning of the last sitemap
275
+ assert_equal 50, start_id
366
276
 
367
- assert_equal 5, elements("#{filename}.xml", 'loc').size
368
- assert_equal 5, elements("#{filename}_6.xml", 'loc').size
369
- assert_equal 3, elements("#{filename}_21.xml", 'loc').size
277
+ # Since we did a partial update, the earlier files shouldn't have been recreated
278
+ assert !File.exists?("#{filename}.xml") # ids 10 and 20
279
+ assert !File.exists?("#{filename}_30.xml") # ids 30 and 40
370
280
 
371
- TestModel.current_id = 20 #last_id is 21, so start with one below
372
- create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100)
373
- add_model( :num_items => 48 - TestModel.current_id ) #TestModel
374
- @sitemap.generate
281
+ # The last file of the first run should have been recreated with new records
282
+ # and a larger file size
283
+ assert (original_size < File.size?("#{filename}_50.xml"))
375
284
 
376
- assert_equal 5, elements("#{filename}_6.xml", 'loc').size
377
- assert_equal 5, elements("#{filename}_21.xml", 'loc').size
285
+ elems = elements("#{filename}_50.xml", 'loc').map(&:text)
378
286
 
379
- # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
287
+ assert_equal 2, elems.size
380
288
 
381
- elems = elements("#{filename}_26.xml", 'loc').map(&:text)
382
- (26..30).each do |i|
383
- assert elems.include? "http://example.com/test_models/#{i}"
289
+ [50, 60].each do |i|
290
+ assert elems.include? "http://example.com/#{i}"
384
291
  end
385
292
 
386
- #puts `cat /tmp/sitemaps/sitemap_test_models_41.xml`
387
-
388
- assert_equal 3, elements("#{filename}_46.xml", 'loc').size
389
- end
390
-
391
- context 'escape' do
392
- should 'add if not number' do
393
- create_sitemap
394
- data = {
395
- 42 => 42,
396
- '23' => 23,
397
- "test" => "'test'",
398
- "test10" => "'test10'",
399
- "10test" => "'10test'",
400
- "10t' est" => "'10t\\' est'",
401
- }
402
- data.each do |key, value|
403
- assert_equal value, @sitemap.send(:escape_if_string, key)
404
- end
293
+ elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
405
294
 
406
- end
295
+ assert elems.include? 'http://example.com/sitemap.bak.xml'
296
+ assert elems.include? 'http://example.com/sitemap_30.bak.xml'
297
+ assert elems.include? 'http://example.com/sitemap_50.xml'
298
+ assert elems.include? 'http://example.com/sitemap_70.xml'
407
299
  end
408
300
 
409
301
  context 'lockfile' do
@@ -411,10 +303,10 @@ class BigSitemapTest < Test::Unit::TestCase
411
303
  sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
412
304
 
413
305
  sitemap.with_lock do
414
- assert File.exists?('/tmp/sitemaps/generator.lock')
306
+ assert File.exists?("#{sitemaps_dir}/generator.lock")
415
307
  end
416
308
 
417
- assert !File.exists?('/tmp/sitemaps/generator.lock')
309
+ assert !File.exists?("#{sitemaps_dir}/generator.lock")
418
310
  end
419
311
 
420
312
  should 'not catch error not related to lock' do
@@ -425,7 +317,6 @@ class BigSitemapTest < Test::Unit::TestCase
425
317
  raise "Wrong"
426
318
  end
427
319
  end
428
-
429
320
  end
430
321
 
431
322
  should 'throw error if lock exits' do
@@ -442,53 +333,18 @@ class BigSitemapTest < Test::Unit::TestCase
442
333
 
443
334
  end
444
335
  end
445
-
446
336
  end
447
337
  end
448
338
 
449
339
  private
450
- def delete_tmp_files
451
- FileUtils.rm_rf(sitemaps_dir)
452
- end
453
-
454
- def create_sitemap(options={})
455
- @sitemap = BigSitemap.new({
456
- :base_url => 'http://example.com',
457
- :document_root => tmp_dir,
458
- :ping_google => false
459
- }.update(options))
460
- end
461
-
462
- def generate_sitemap_files(options={})
463
- create_sitemap(options)
464
- add_model
465
- @sitemap.generate
340
+ def generate_sitemap(options={}, &block)
341
+ BigSitemap.generate(options.merge(:base_url => 'http://example.com', :document_root => tmp_dir), &block)
466
342
  end
467
343
 
468
- def generate_one_sitemap_model_file(options={})
469
- change_frequency = options.delete(:change_frequency)
470
- priority = options.delete(:priority)
471
- create_sitemap(options.merge(:max_per_sitemap => default_num_items, :batch_size => default_num_items))
472
- add_model(:change_frequency => change_frequency, :priority => priority)
473
- @sitemap.generate
474
- end
475
-
476
- def generate_two_model_sitemap_files(options={})
477
- change_frequency = options.delete(:change_frequency)
478
- priority = options.delete(:priority)
479
- create_sitemap(options.merge(:max_per_sitemap => 2, :batch_size => 1))
480
- add_model(:num_items => 4, :change_frequency => change_frequency, :priority => priority)
481
- @sitemap.generate
482
- end
483
-
484
- def add_model(options={})
485
- num_items = options.delete(:num_items) || default_num_items
486
- TestModel.stubs(:count_for_sitemap).returns(num_items)
487
- @sitemap.add(TestModel, options)
488
- end
489
-
490
- def default_num_items
491
- 10
344
+ def delete_tmp_files
345
+ Dir["#{sitemaps_dir}/sitemap*"].each do |f|
346
+ FileUtils.rm_rf f
347
+ end
492
348
  end
493
349
 
494
350
  def sitemaps_index_file
@@ -499,28 +355,24 @@ class BigSitemapTest < Test::Unit::TestCase
499
355
  "#{sitemaps_dir}/sitemap_index.xml"
500
356
  end
501
357
 
502
- def unzipped_first_sitemaps_model_file
503
- "#{sitemaps_dir}/sitemap_test_models.xml"
504
- end
505
-
506
- def first_sitemaps_model_file
507
- "#{sitemaps_dir}/sitemap_test_models.xml.gz"
358
+ def unzipped_first_sitemap_file
359
+ "#{sitemaps_dir}/sitemap.xml"
508
360
  end
509
361
 
510
- def static_sitemaps_file
511
- "#{sitemaps_dir}/sitemap_static.xml.gz"
362
+ def first_sitemap_file
363
+ "#{sitemaps_dir}/sitemap.xml.gz"
512
364
  end
513
365
 
514
- def second_sitemaps_model_file
515
- "#{sitemaps_dir}/sitemap_test_models_1.xml.gz"
366
+ def second_sitemap_file
367
+ "#{sitemaps_dir}/sitemap_1.xml.gz"
516
368
  end
517
369
 
518
- def third_sitemaps_model_file
519
- "#{sitemaps_dir}/sitemap_test_models_2.xml.gz"
370
+ def third_sitemap_file
371
+ "#{sitemaps_dir}/sitemap_2.xml.gz"
520
372
  end
521
373
 
522
374
  def sitemaps_dir
523
- "#{tmp_dir}/sitemaps"
375
+ tmp_dir
524
376
  end
525
377
 
526
378
  def tmp_dir