sc-big_sitemap 0.8.3.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,184 @@
1
+ require 'fileutils'
2
+ require 'zlib'
3
+
4
+ class BigSitemap
5
+ class Builder
6
+ MAX_URLS = 50000
7
+ HEADER_ATTRIBUTES = {
8
+ 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9',
9
+ 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
10
+ 'xsi:schemaLocation' => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
11
+ }
12
+
13
+ def initialize(options)
14
+ @gzip = options.delete(:gzip)
15
+ @max_urls = options.delete(:max_urls) || MAX_URLS
16
+ @type = options.delete(:type)
17
+ @paths = []
18
+ @parts = options.delete(:start_part_id) || 0
19
+ @custom_part_nr = options.delete(:partial_update)
20
+
21
+ @filename = options.delete(:filename)
22
+ @current_filename = nil
23
+ @tmp_filename = nil
24
+ @target = _get_writer
25
+
26
+ @level = 0
27
+ @opened_tags = []
28
+ _init_document
29
+ end
30
+
31
+ def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil, extras = {})
32
+ _rotate(part_nr) if @max_urls == @urls
33
+
34
+ time = time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
35
+ _url_tag('url', extras.merge(:loc => url, :lastmod => time, :changefreq => frequency, :priority => priority))
36
+
37
+ @urls += 1
38
+ end
39
+
40
+ def paths!
41
+ @paths
42
+ end
43
+
44
+ def close!
45
+ _close_document
46
+ target!.close if target!.respond_to?(:close)
47
+ File.delete(@current_filename) if File.exists?(@current_filename)
48
+ File.rename(@tmp_filename, @current_filename)
49
+ end
50
+
51
+ def target!
52
+ @target
53
+ end
54
+
55
+ private
56
+
57
+ def _get_writer
58
+ filename = @filename.dup
59
+ filename << "_#{@parts}" if @parts > 0
60
+ filename << '.xml'
61
+ filename << '.gz' if @gzip
62
+ _open_writer(filename)
63
+ end
64
+
65
+ def _open_writer(filename)
66
+ @current_filename = filename
67
+ @tmp_filename = filename + ".tmp"
68
+ @paths << filename
69
+ file = ::File.open(@tmp_filename, 'w+')
70
+ @gzip ? ::Zlib::GzipWriter.new(file) : file
71
+ end
72
+
73
+ def _init_document( name = 'urlset', attrs = HEADER_ATTRIBUTES)
74
+ @urls = 0
75
+ target!.print '<?xml version="1.0" encoding="UTF-8"?>'
76
+ _newline
77
+ _open_tag name, attrs
78
+ end
79
+
80
+ def _url_tag(name, values = {})
81
+ _open_tag name
82
+ values.each do |key, value|
83
+ tag! key, value if value
84
+ end
85
+ _close_tag name
86
+ end
87
+
88
+ def _rotate(part_nr = nil)
89
+ # write out the current document and start writing into a new file
90
+ close!
91
+ @parts = (part_nr && @custom_part_nr) ? part_nr : @parts + 1
92
+ @target = _get_writer
93
+ _init_document
94
+ end
95
+
96
+ # opens a tag, bumps up level but doesn't require a block
97
+ def _open_tag(name, attrs = {})
98
+ _indent
99
+ _start_tag(name, attrs)
100
+ _newline
101
+ @level += 1
102
+ @opened_tags << name
103
+ end
104
+
105
+ def _start_tag(name, attrs = {}, single = false)
106
+ attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('')
107
+ target!.print "<#{name}#{attrs}#{'/' if single}>"
108
+ end
109
+
110
+ def tag!(name, content, attrs = {})
111
+ _indent
112
+ _start_tag(name, attrs, content == "")
113
+ if content != ""
114
+ target!.print content.to_s.gsub('&', '&amp;')
115
+ _end_tag(name)
116
+ end
117
+ _newline
118
+ end
119
+
120
+ def _end_tag(name)
121
+ target!.print "</#{name}>"
122
+ end
123
+
124
+ # closes a tag block by decreasing the level and inserting a close tag
125
+ def _close_tag(name)
126
+ @opened_tags.pop
127
+ @level -= 1
128
+ _indent
129
+ _end_tag(name)
130
+ _newline
131
+ end
132
+
133
+ def _close_document
134
+ for name in @opened_tags.reverse
135
+ _close_tag(name)
136
+ end
137
+ end
138
+
139
+ def _indent
140
+ return if @gzip
141
+ target!.print " " * @level
142
+ end
143
+
144
+ def _newline
145
+ return if @gzip
146
+ target!.puts ''
147
+ end
148
+ end
149
+
150
+ class IndexBuilder < Builder
151
+ def _init_document(name = 'sitemapindex', attrs = {'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9'})
152
+ #attrs.merge('xmlns:geo' => "http://www.google.com/geo/schemas/sitemap/1.0")
153
+ super(name, attrs)
154
+ end
155
+
156
+ def add_url!(url, time = nil)
157
+ time = time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
158
+ _url_tag('sitemap', :loc => url, :lastmod => time)
159
+ end
160
+ end
161
+
162
+ class MobileBuilder < Builder
163
+
164
+ def _init_document(name = 'urlset', attrs = HEADER_ATTRIBUTES)
165
+ super(name, attrs.merge('xmlns:mobile' => "http://www.google.com/schemas/sitemap-mobile/1.0"))
166
+ end
167
+
168
+ def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil, extras = {})
169
+ super(url, time, frequency, priority, part_nr, extras.merge('mobile:mobile' => ""))
170
+ end
171
+
172
+ end
173
+
174
+ class GeoBuilder < Builder
175
+ #_build_geo if @geo
176
+
177
+ # def _build_geo
178
+ # geo :geo do
179
+ # geo :format, 'kml'
180
+ # end
181
+ # end
182
+ end
183
+
184
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "sc-big_sitemap"
6
+ s.version = File.read('VERSION').strip
7
+ s.authors = ["Alex Rabarts", "Tobias Bielohlawek"]
8
+ s.email = ["alexrabarts@gmail.com", "tobi@soundcloud.com"]
9
+ s.homepage = %q{http://github.com/alexrabarts/big_sitemap}
10
+ s.summary = %q{A Sitemap generator specifically designed for large sites (although it works equally well with small sites)}
11
+ s.description = %q{BigSitemap is a Sitemapgenerator suitable for applications with greater than 50,000 URLs. It splits large Sitemaps into multiple files, gzips the files to minimize bandwidth usage, batches database queries to minimize memory usage, supports increment updates, can be set up with just a few lines of code and is compatible with just about any framework.}
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ s.require_paths = ["lib"]
17
+
18
+ ["bundler", "shoulda", "mocha", "nokogiri"].each do |gem|
19
+ s.add_development_dependency *gem.split(' ')
20
+ end
21
+
22
+ end
@@ -0,0 +1,485 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require 'nokogiri'
3
+
4
+ class BigSitemapTest < Test::Unit::TestCase
5
+ def setup
6
+ delete_tmp_files
7
+ end
8
+
9
+ def teardown
10
+ delete_tmp_files
11
+ end
12
+
13
+ should 'raise an error if the :base_url option is not specified' do
14
+ assert_nothing_raised { BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir) }
15
+ assert_raise(ArgumentError) { BigSitemap.new(:document_root => tmp_dir) }
16
+ end
17
+
18
+ should 'generate the same base URL with :base_url option' do
19
+ options = {:document_root => tmp_dir}
20
+ url = 'http://example.com'
21
+ sitemap = BigSitemap.new(options.merge(:base_url => url))
22
+
23
+ assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
24
+ end
25
+
26
+ should 'generate the same base URL with :url_options option' do
27
+ options = {:document_root => tmp_dir}
28
+ url = 'http://example.com'
29
+ sitemap = BigSitemap.new(options.merge(:url_options => {:host => 'example.com'}))
30
+
31
+ assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
32
+ end
33
+
34
+ should 'generate a sitemap index file' do
35
+ generate_sitemap_files
36
+ assert File.exists?(sitemaps_index_file)
37
+ end
38
+
39
+ should 'generate a single sitemap model file' do
40
+ create_sitemap
41
+ add_model
42
+ @sitemap.generate
43
+ assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
44
+ end
45
+
46
+ should 'generate two sitemap model files' do
47
+ generate_two_model_sitemap_files
48
+ assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
49
+ assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
50
+ assert !File.exists?(third_sitemaps_model_file), "#{third_sitemaps_model_file} does not exist"
51
+ end
52
+
53
+ should 'generate two sitemap model files for the same model with different options' do
54
+ create_sitemap
55
+ add_model(:path => 'foo')
56
+ add_model(:path => 'bar')
57
+ @sitemap.generate
58
+
59
+ assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
60
+ assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
61
+ end
62
+
63
+ should 'generate with absolute url_path' do
64
+ url = 'http://example.com'
65
+ url_path = 'http://external.com'
66
+
67
+ options = {:document_root => tmp_dir, :document_path => 'sitemaps', :url_path => url_path}
68
+ @sitemap = BigSitemap.new(options.merge(:url_options => {:host => 'example.com'}))
69
+
70
+ add_model(:path => 'foo')
71
+ @sitemap.generate
72
+
73
+ assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
74
+ assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
75
+
76
+ assert Zlib::GzipReader.open(sitemaps_index_file).read.include?("http://external.com")
77
+ end
78
+
79
+ context 'Sitemap index file' do
80
+ should 'contain one sitemapindex element' do
81
+ generate_sitemap_files
82
+ assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
83
+ end
84
+
85
+ should 'contain one sitemap element' do
86
+ generate_sitemap_files
87
+ assert_equal 1, num_elements(sitemaps_index_file, 'sitemap')
88
+ end
89
+
90
+ should 'contain one loc element' do
91
+ generate_one_sitemap_model_file
92
+ assert_equal 1, num_elements(sitemaps_index_file, 'loc')
93
+ end
94
+
95
+ should 'contain one lastmod element' do
96
+ generate_one_sitemap_model_file
97
+ assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
98
+ end
99
+
100
+ should 'contain two loc elements' do
101
+ generate_two_model_sitemap_files
102
+ assert_equal 2, num_elements(sitemaps_index_file, 'loc')
103
+ end
104
+
105
+ should 'contain two lastmod elements' do
106
+ generate_two_model_sitemap_files
107
+ assert_equal 2, num_elements(sitemaps_index_file, 'lastmod')
108
+ end
109
+
110
+ should 'not be gzipped' do
111
+ generate_sitemap_files(:gzip => false)
112
+ assert File.exists?(unzipped_sitemaps_index_file)
113
+ end
114
+ end
115
+
116
+ context 'Sitemap model file' do
117
+ should 'contain one urlset element' do
118
+ generate_one_sitemap_model_file
119
+ assert_equal 1, num_elements(first_sitemaps_model_file, 'urlset')
120
+ end
121
+
122
+ should 'contain several loc elements' do
123
+ generate_one_sitemap_model_file
124
+ assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'loc')
125
+ end
126
+
127
+ should 'contain several lastmod elements' do
128
+ generate_one_sitemap_model_file
129
+ assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'lastmod')
130
+ end
131
+
132
+ should 'contain several changefreq elements' do
133
+ generate_one_sitemap_model_file
134
+ assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'changefreq')
135
+ end
136
+
137
+ should 'contain several priority elements' do
138
+ generate_one_sitemap_model_file(:priority => 0.2)
139
+ assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'priority')
140
+ end
141
+
142
+ should 'have a change frequency of weekly by default' do
143
+ generate_one_sitemap_model_file
144
+ assert_equal 'weekly', elements(first_sitemaps_model_file, 'changefreq').first.text
145
+ end
146
+
147
+ should 'have a change frequency of daily' do
148
+ generate_one_sitemap_model_file(:change_frequency => 'daily')
149
+ assert_equal 'daily', elements(first_sitemaps_model_file, 'changefreq').first.text
150
+ end
151
+
152
+ should 'be able to use a lambda to specify change frequency' do
153
+ generate_one_sitemap_model_file(:change_frequency => lambda {|m| m.change_frequency})
154
+ assert_equal TestModel.new.change_frequency, elements(first_sitemaps_model_file, 'changefreq').first.text
155
+ end
156
+
157
+ should 'have a priority of 0.2' do
158
+ generate_one_sitemap_model_file(:priority => 0.2)
159
+ assert_equal '0.2', elements(first_sitemaps_model_file, 'priority').first.text
160
+ end
161
+
162
+ should 'be able to use a lambda to specify priority' do
163
+ generate_one_sitemap_model_file(:priority => lambda {|m| m.priority})
164
+ assert_equal TestModel.new.priority.to_s, elements(first_sitemaps_model_file, 'priority').first.text
165
+ end
166
+
167
+ should 'be able to use a lambda to specify lastmod' do
168
+ generate_one_sitemap_model_file(:last_modified => lambda {|m| m.updated_at})
169
+ assert_equal TestModel.new.updated_at.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00'), elements(first_sitemaps_model_file, 'lastmod').first.text
170
+ end
171
+
172
+ should 'contain two loc element' do
173
+ generate_two_model_sitemap_files
174
+ assert_equal 2, num_elements(first_sitemaps_model_file, 'loc')
175
+ assert_equal 2, num_elements(second_sitemaps_model_file, 'loc')
176
+ end
177
+
178
+ should 'contain two lastmod element' do
179
+ generate_two_model_sitemap_files
180
+ assert_equal 2, num_elements(first_sitemaps_model_file, 'lastmod')
181
+ assert_equal 2, num_elements(second_sitemaps_model_file, 'lastmod')
182
+ end
183
+
184
+ should 'contain two changefreq elements' do
185
+ generate_two_model_sitemap_files
186
+ assert_equal 2, num_elements(first_sitemaps_model_file, 'changefreq')
187
+ assert_equal 2, num_elements(second_sitemaps_model_file, 'changefreq')
188
+ end
189
+
190
+ should 'contain two priority element' do
191
+ generate_two_model_sitemap_files(:priority => 0.2)
192
+ assert_equal 2, num_elements(first_sitemaps_model_file, 'priority')
193
+ assert_equal 2, num_elements(second_sitemaps_model_file, 'priority')
194
+ end
195
+
196
+ should 'strip leading slashes from controller paths' do
197
+ create_sitemap
198
+ add_model
199
+ @sitemap.add(TestModel, :path => '/test_controller').generate
200
+ assert(
201
+ !elements(first_sitemaps_model_file, 'loc').first.text.match(/\/\/test_controller\//),
202
+ 'URL does not contain a double-slash before the controller path'
203
+ )
204
+ end
205
+
206
+ should 'not be gzipped' do
207
+ generate_one_sitemap_model_file(:gzip => false)
208
+ assert File.exists?(unzipped_first_sitemaps_model_file)
209
+ end
210
+ end
211
+
212
+ context 'add method' do
213
+ should 'be chainable' do
214
+ create_sitemap
215
+ assert_equal BigSitemap, @sitemap.add(TestModel).class
216
+ end
217
+ end
218
+
219
+ context 'add static method' do
220
+ should 'should generate static content' do
221
+ create_sitemap
222
+ @sitemap.add_static('/', Time.now, 'weekly', 0.5)
223
+ @sitemap.add_static('/about', Time.now, 'weekly', 0.5)
224
+ @sitemap.generate_static
225
+ elems = elements(static_sitemaps_file, 'loc')
226
+ assert_equal "/", elems.first.text
227
+ assert_equal "/about", elems.last.text
228
+ end
229
+ end
230
+
231
+ context 'sanatize XML chars' do
232
+ should 'should transform ampersands' do
233
+ create_sitemap
234
+ @sitemap.add_static('/something&else', Time.now, 'weekly', 0.5)
235
+ @sitemap.generate_static
236
+ elems = elements(static_sitemaps_file, 'loc')
237
+ assert Zlib::GzipReader.open(static_sitemaps_file).read.include?("/something&amp;else")
238
+ assert_equal "/something&else", elems.first.text
239
+ end
240
+ end
241
+
242
+ context 'clean method' do
243
+ should 'be chainable' do
244
+ create_sitemap
245
+ assert_equal BigSitemap, @sitemap.clean.class
246
+ end
247
+
248
+ should 'clean all sitemap files' do
249
+ generate_sitemap_files
250
+ assert Dir.entries(sitemaps_dir).size > 2, "#{sitemaps_dir} is not empty" # ['.', '..'].size == 2
251
+ @sitemap.clean
252
+ assert_equal 2, Dir.entries(sitemaps_dir).size, "#{sitemaps_dir} is empty"
253
+ end
254
+ end
255
+
256
+ context 'generate method' do
257
+ should 'be chainable' do
258
+ create_sitemap
259
+ assert_equal BigSitemap, @sitemap.generate.class
260
+ end
261
+ end
262
+
263
+ context 'sitemap index' do
264
+ should 'generate for all xml files in directory' do
265
+ create_sitemap
266
+ @sitemap.clean
267
+ create_files(
268
+ "#{sitemaps_dir}/sitemap_file1.xml",
269
+ "#{sitemaps_dir}/sitemap_file2.xml.gz",
270
+ "#{sitemaps_dir}/sitemap_file3.txt",
271
+ "#{sitemaps_dir}/file4.xml",
272
+ unzipped_sitemaps_index_file)
273
+ @sitemap.send :generate_sitemap_index
274
+
275
+ elem = elements(sitemaps_index_file, 'loc')
276
+ assert_equal 2, elem.size #no index and file3 and file4 found
277
+ assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
278
+ assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
279
+ end
280
+
281
+ should 'generate for all for given file' do
282
+ create_sitemap
283
+ @sitemap.clean
284
+ files = ["#{sitemaps_dir}/sitemap_file1.xml", "#{sitemaps_dir}/sitemap_file2.xml.gz"]
285
+ create_files *files
286
+ @sitemap.send :generate_sitemap_index, files
287
+
288
+ elem = elements(sitemaps_index_file, 'loc')
289
+ assert_equal 2, elem.size
290
+ assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
291
+ assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
292
+ end
293
+ end
294
+
295
+ context 'get_last_id' do
296
+ should 'return last id' do
297
+ create_sitemap.clean
298
+ filename = "#{sitemaps_dir}/sitemap_file"
299
+ create_files("#{filename}_1.xml",
300
+ "#{filename}_23.xml",
301
+ "#{filename}_42.xml.gz",
302
+ "#{filename}_9.xml")
303
+ assert_equal 42, @sitemap.send(:get_last_id, filename)
304
+ end
305
+
306
+ should 'return nil' do
307
+ create_sitemap.clean
308
+ filename = "#{sitemaps_dir}/sitemap_file"
309
+ assert_equal nil, @sitemap.send(:get_last_id, filename)
310
+ end
311
+ end
312
+
313
+ context 'mobile' do
314
+ should 'include mobile namespace' do
315
+ create_sitemap(:gzip => false)
316
+ add_model(:path => 'foo', :mobile => true)
317
+ @sitemap.generate
318
+
319
+ f = File.open(unzipped_first_sitemaps_model_file)
320
+ f.first #read next line
321
+ assert f.first.include?( ns['mobile'])
322
+ end
323
+
324
+ should 'include mobile tag' do
325
+ create_sitemap(:gzip => false)
326
+ add_model(:path => 'foo', :mobile => true)
327
+ @sitemap.generate
328
+
329
+ assert_equal 10, mobile_elements(unzipped_first_sitemaps_model_file, 'mobile').size
330
+ end
331
+ end
332
+
333
+ context 'partial update' do
334
+
335
+ context 'prepare_update' do
336
+ should 'generate correct condition for partial update' do
337
+ filename = "#{sitemaps_dir}/sitemap_test_models"
338
+
339
+ create_sitemap(:partial_update => true).clean
340
+ add_model(:num_items => 50) #TestModel
341
+
342
+ create_files "#{filename}_23.xml"
343
+ assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
344
+
345
+ create_files "#{filename}_42.xml"
346
+ assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
347
+ end
348
+
349
+ should 'generate correct condition for partial update with custom column' do
350
+ filename = "#{sitemaps_dir}/sitemap_test_models"
351
+
352
+ create_sitemap(:partial_update => true).clean
353
+ add_model(:num_items => 50, :primary_column => 'name') #TestModel
354
+
355
+ create_files "#{filename}_666.xml"
356
+ assert_equal "(name >= 666)", @sitemap.send(:prepare_update).first.last[:conditions]
357
+ end
358
+ end
359
+
360
+ should 'generate for all xml files in directory and delete last file' do
361
+ TestModel.current_id = last_id = 27
362
+ filename = "#{sitemaps_dir}/sitemap_test_models"
363
+
364
+ create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
365
+ add_model(:num_items => 50 - last_id) #TestModel
366
+
367
+ create_files("#{filename}.xml",
368
+ "#{filename}_5.xml",
369
+ "#{filename}_9.xml",
370
+ "#{filename}_23.xml",
371
+ "#{filename}_#{last_id}.xml")
372
+ @sitemap.generate
373
+
374
+ # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
375
+
376
+ assert File.exists?("#{filename}_48.xml")
377
+ assert File.exists?("#{filename}_#{last_id}.xml")
378
+ elems = elements("#{filename}_#{last_id}.xml", 'loc').map(&:text)
379
+
380
+ assert_equal 5, elems.size
381
+ (28..32).each do |i|
382
+ assert elems.include? "http://example.com/test_models/#{i}"
383
+ end
384
+
385
+ elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
386
+ assert elems.include? "http://example.com/sitemaps/sitemap_test_models.xml"
387
+ assert elems.include? "http://example.com/sitemaps/sitemap_test_models_9.xml"
388
+ assert elems.include? "http://example.com/sitemaps/sitemap_test_models_#{last_id}.xml"
389
+ assert elems.include? "http://example.com/sitemaps/sitemap_test_models_48.xml"
390
+ end
391
+
392
+ should 'generate sitemap, update should respect old files' do
393
+ max_id = 23
394
+ TestModel.current_id = 0
395
+ filename = "#{sitemaps_dir}/sitemap_test_models"
396
+
397
+ create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
398
+ add_model(:num_items => max_id) #TestModel
399
+ @sitemap.generate
400
+
401
+ # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
402
+
403
+ assert_equal 5, elements("#{filename}.xml", 'loc').size
404
+ assert_equal 5, elements("#{filename}_6.xml", 'loc').size
405
+ assert_equal 3, elements("#{filename}_21.xml", 'loc').size
406
+
407
+ TestModel.current_id = 20 #last_id is 21, so start with one below
408
+ create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100)
409
+ add_model( :num_items => 48 - TestModel.current_id ) #TestModel
410
+ @sitemap.generate
411
+
412
+ assert_equal 5, elements("#{filename}_6.xml", 'loc').size
413
+ assert_equal 5, elements("#{filename}_21.xml", 'loc').size
414
+
415
+ # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
416
+
417
+ elems = elements("#{filename}_26.xml", 'loc').map(&:text)
418
+ (26..30).each do |i|
419
+ assert elems.include? "http://example.com/test_models/#{i}"
420
+ end
421
+
422
+ #puts `cat /tmp/sitemaps/sitemap_test_models_41.xml`
423
+
424
+ assert_equal 3, elements("#{filename}_46.xml", 'loc').size
425
+ end
426
+
427
+ context 'escape' do
428
+ should 'add if not number' do
429
+ create_sitemap
430
+ data = {
431
+ 42 => 42,
432
+ '23' => 23,
433
+ "test" => "'test'",
434
+ "test10" => "'test10'",
435
+ "10test" => "'10test'",
436
+ "10t' est" => "'10t\\' est'",
437
+ }
438
+ data.each do |key, value|
439
+ assert_equal value, @sitemap.send(:escape_if_string, key)
440
+ end
441
+
442
+ end
443
+ end
444
+
445
+ context 'lockfile' do
446
+ should 'create and delete lock file' do
447
+ sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
448
+
449
+ sitemap.with_lock do
450
+ assert File.exists?('/tmp/sitemaps/generator.lock')
451
+ end
452
+
453
+ assert !File.exists?('/tmp/sitemaps/generator.lock')
454
+ end
455
+
456
+ should 'not catch error not related to lock' do
457
+ sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
458
+
459
+ assert_raise RuntimeError do
460
+ sitemap.with_lock do
461
+ raise "Wrong"
462
+ end
463
+ end
464
+
465
+ end
466
+
467
+ should 'throw error if lock exits' do
468
+ sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
469
+
470
+ sitemap.with_lock do
471
+ sitemap2 = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
472
+
473
+ assert_nothing_raised do
474
+ sitemap2.with_lock do
475
+ raise "Should not be called"
476
+ end
477
+ end
478
+
479
+ end
480
+ end
481
+ end
482
+
483
+ end
484
+
485
+ end