big_sitemap 0.8.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,35 +14,36 @@ class BigSitemap
14
14
  @gzip = options.delete(:gzip)
15
15
  @max_urls = options.delete(:max_urls) || MAX_URLS
16
16
  @type = options.delete(:type)
17
- @paths = []
17
+ @filepaths = []
18
18
  @parts = options.delete(:start_part_id) || 0
19
- @custom_part_nr = options.delete(:partial_update)
19
+ @partial_update = options.delete(:partial_update)
20
20
 
21
- @filename = options.delete(:filename)
21
+ @filename = options.delete(:filename)
22
22
  @current_filename = nil
23
23
  @tmp_filename = nil
24
- @target = _get_writer
24
+ @target = _get_writer
25
25
 
26
26
  @level = 0
27
27
  @opened_tags = []
28
28
  _init_document
29
29
  end
30
30
 
31
- def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil)
32
- _rotate(part_nr) if @max_urls == @urls
33
-
31
+ def add_url!(location, options={})
32
+ _rotate(options[:id]) if @max_urls == @urls
34
33
  _open_tag 'url'
35
- tag! 'loc', url
36
- tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
37
- tag! 'changefreq', frequency if frequency
38
- tag! 'priority', priority if priority
34
+
35
+ tag! 'loc', location
36
+ tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
37
+ tag! 'changefreq', options[:change_frequency] || 'weekly'
38
+ tag! 'priority', options[:priority] if options[:priority]
39
+
39
40
  _close_tag 'url'
40
41
 
41
42
  @urls += 1
42
43
  end
43
44
 
44
- def paths!
45
- @paths
45
+ def filepaths!
46
+ @filepaths
46
47
  end
47
48
 
48
49
  def close!
@@ -60,7 +61,7 @@ class BigSitemap
60
61
 
61
62
  def _get_writer
62
63
  filename = @filename.dup
63
- filename << "_#{@parts}" if @parts > 0
64
+ filename << "_#{@parts}" if @parts > 0 && @type != 'index'
64
65
  filename << '.xml'
65
66
  filename << '.gz' if @gzip
66
67
  _open_writer(filename)
@@ -69,28 +70,28 @@ class BigSitemap
69
70
  def _open_writer(filename)
70
71
  @current_filename = filename
71
72
  @tmp_filename = filename + ".tmp"
72
- @paths << filename
73
- file = ::File.open(@tmp_filename, 'w+')
73
+ @filepaths << filename
74
+ file = ::File.open(@tmp_filename, 'w+:ASCII-8BIT')
74
75
  @gzip ? ::Zlib::GzipWriter.new(file) : file
75
76
  end
76
77
 
77
- def _init_document( name = 'urlset', attrs = HEADER_ATTRIBUTES)
78
+ def _init_document(name='urlset', attrs=HEADER_ATTRIBUTES)
78
79
  @urls = 0
79
80
  target!.print '<?xml version="1.0" encoding="UTF-8"?>'
80
81
  _newline
81
82
  _open_tag name, attrs
82
83
  end
83
84
 
84
- def _rotate(part_nr = nil)
85
+ def _rotate(part_nr=nil)
85
86
  # write out the current document and start writing into a new file
86
87
  close!
87
- @parts = (part_nr && @custom_part_nr) ? part_nr : @parts + 1
88
+ @parts = part_nr || @parts + 1
88
89
  @target = _get_writer
89
90
  _init_document
90
91
  end
91
92
 
92
93
  # opens a tag, bumps up level but doesn't require a block
93
- def _open_tag(name, attrs = {})
94
+ def _open_tag(name, attrs={})
94
95
  _indent
95
96
  _start_tag(name, attrs)
96
97
  _newline
@@ -98,8 +99,8 @@ class BigSitemap
98
99
  @opened_tags << name
99
100
  end
100
101
 
101
- def _start_tag(name, attrs = {})
102
- attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('')
102
+ def _start_tag(name, attrs={})
103
+ attrs = attrs.map { |attr, value| %Q( #{attr}="#{value}") }.join('')
103
104
  target!.print "<#{name}#{attrs}>"
104
105
  end
105
106
 
@@ -131,12 +132,10 @@ class BigSitemap
131
132
  end
132
133
 
133
134
  def _indent
134
- return if @gzip
135
135
  target!.print " " * @level
136
136
  end
137
137
 
138
138
  def _newline
139
- return if @gzip
140
139
  target!.puts ''
141
140
  end
142
141
  end
@@ -147,10 +146,12 @@ class BigSitemap
147
146
  super(name, attrs)
148
147
  end
149
148
 
150
- def add_url!(url, time = nil)
149
+ def add_url!(location, options={})
151
150
  _open_tag 'sitemap'
152
- tag! 'loc', url
153
- tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
151
+
152
+ tag! 'loc', location
153
+ tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
154
+
154
155
  _close_tag 'sitemap'
155
156
  end
156
157
  end
@@ -31,379 +31,271 @@ class BigSitemapTest < Test::Unit::TestCase
31
31
  assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
32
32
  end
33
33
 
34
- should 'generate a sitemap index file' do
35
- generate_sitemap_files
36
- assert File.exists?(sitemaps_index_file)
34
+ should 'generate sitemap index file' do
35
+ generate_sitemap { add '/foo' }
36
+ assert File.exists? first_sitemap_file
37
37
  end
38
38
 
39
- should 'generate a single sitemap model file' do
40
- create_sitemap
41
- add_model
42
- @sitemap.generate
43
- assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
39
+ should 'generate static file' do
40
+ generate_sitemap { add '/foo' }
41
+ assert File.exists? first_sitemap_file
44
42
  end
45
43
 
46
- should 'generate two sitemap model files' do
47
- generate_two_model_sitemap_files
48
- assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
49
- assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
50
- assert !File.exists?(third_sitemaps_model_file), "#{third_sitemaps_model_file} does not exist"
51
- end
52
-
53
- should 'generate two sitemap model files for the same model with different options' do
54
- create_sitemap
55
- add_model(:path => 'foo')
56
- add_model(:path => 'bar')
57
- @sitemap.generate
44
+ should 'should add paths' do
45
+ generate_sitemap do
46
+ add '/', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
47
+ add '/about', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
48
+ end
58
49
 
59
- assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
60
- assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
50
+ elems = elements first_sitemap_file, 'loc'
51
+ assert_equal 'http://example.com/', elems.first.text
52
+ assert_equal 'http://example.com/about', elems.last.text
61
53
  end
62
54
 
63
55
  context 'Sitemap index file' do
64
56
  should 'contain one sitemapindex element' do
65
- generate_sitemap_files
57
+ generate_sitemap { add '/' }
66
58
  assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
67
59
  end
68
60
 
69
61
  should 'contain one sitemap element' do
70
- generate_sitemap_files
62
+ generate_sitemap { add '/' }
71
63
  assert_equal 1, num_elements(sitemaps_index_file, 'sitemap')
72
64
  end
73
65
 
74
66
  should 'contain one loc element' do
75
- generate_one_sitemap_model_file
67
+ generate_sitemap { add '/' }
76
68
  assert_equal 1, num_elements(sitemaps_index_file, 'loc')
77
69
  end
78
70
 
79
71
  should 'contain one lastmod element' do
80
- generate_one_sitemap_model_file
72
+ generate_sitemap { add '/' }
81
73
  assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
82
74
  end
83
75
 
84
76
  should 'contain two loc elements' do
85
- generate_two_model_sitemap_files
77
+ generate_sitemap(:max_per_sitemap => 2) do
78
+ 4.times { add '/' }
79
+ end
80
+
86
81
  assert_equal 2, num_elements(sitemaps_index_file, 'loc')
87
82
  end
88
83
 
89
84
  should 'contain two lastmod elements' do
90
- generate_two_model_sitemap_files
85
+ generate_sitemap(:max_per_sitemap => 2) do
86
+ 4.times { add '/' }
87
+ end
88
+
91
89
  assert_equal 2, num_elements(sitemaps_index_file, 'lastmod')
92
90
  end
93
91
 
94
92
  should 'not be gzipped' do
95
- generate_sitemap_files(:gzip => false)
93
+ generate_sitemap(:gzip => false) { add '/' }
96
94
  assert File.exists?(unzipped_sitemaps_index_file)
97
95
  end
98
96
  end
99
97
 
100
- context 'Sitemap model file' do
98
+ context 'Sitemap file' do
101
99
  should 'contain one urlset element' do
102
- generate_one_sitemap_model_file
103
- assert_equal 1, num_elements(first_sitemaps_model_file, 'urlset')
100
+ generate_sitemap { add '/' }
101
+ assert_equal 1, num_elements(first_sitemap_file, 'urlset')
104
102
  end
105
103
 
106
104
  should 'contain several loc elements' do
107
- generate_one_sitemap_model_file
108
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'loc')
105
+ generate_sitemap do
106
+ 3.times { add '/' }
107
+ end
108
+
109
+ assert_equal 3, num_elements(first_sitemap_file, 'loc')
109
110
  end
110
111
 
111
112
  should 'contain several lastmod elements' do
112
- generate_one_sitemap_model_file
113
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'lastmod')
113
+ generate_sitemap do
114
+ 3.times { add '/', :last_modified => Time.now }
115
+ end
116
+
117
+ assert_equal 3, num_elements(first_sitemap_file, 'lastmod')
114
118
  end
115
119
 
116
120
  should 'contain several changefreq elements' do
117
- generate_one_sitemap_model_file
118
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'changefreq')
121
+ generate_sitemap do
122
+ 3.times { add '/' }
123
+ end
124
+
125
+ assert_equal 3, num_elements(first_sitemap_file, 'changefreq')
119
126
  end
120
127
 
121
128
  should 'contain several priority elements' do
122
- generate_one_sitemap_model_file(:priority => 0.2)
123
- assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'priority')
129
+ generate_sitemap do
130
+ 3.times { add '/', :priority => 0.2 }
131
+ end
132
+
133
+ assert_equal 3, num_elements(first_sitemap_file, 'priority')
124
134
  end
125
135
 
126
136
  should 'have a change frequency of weekly by default' do
127
- generate_one_sitemap_model_file
128
- assert_equal 'weekly', elements(first_sitemaps_model_file, 'changefreq').first.text
129
- end
137
+ generate_sitemap do
138
+ 3.times { add '/' }
139
+ end
130
140
 
131
- should 'have a change frequency of daily' do
132
- generate_one_sitemap_model_file(:change_frequency => 'daily')
133
- assert_equal 'daily', elements(first_sitemaps_model_file, 'changefreq').first.text
141
+ assert_equal 'weekly', elements(first_sitemap_file, 'changefreq').first.text
134
142
  end
135
143
 
136
- should 'be able to use a lambda to specify change frequency' do
137
- generate_one_sitemap_model_file(:change_frequency => lambda {|m| m.change_frequency})
138
- assert_equal TestModel.new.change_frequency, elements(first_sitemaps_model_file, 'changefreq').first.text
144
+ should 'have a change frequency of daily' do
145
+ generate_sitemap { add '/', :change_frequency => 'daily' }
146
+ assert_equal 'daily', elements(first_sitemap_file, 'changefreq').first.text
139
147
  end
140
148
 
141
149
  should 'have a priority of 0.2' do
142
- generate_one_sitemap_model_file(:priority => 0.2)
143
- assert_equal '0.2', elements(first_sitemaps_model_file, 'priority').first.text
144
- end
145
-
146
- should 'be able to use a lambda to specify priority' do
147
- generate_one_sitemap_model_file(:priority => lambda {|m| m.priority})
148
- assert_equal TestModel.new.priority.to_s, elements(first_sitemaps_model_file, 'priority').first.text
149
- end
150
-
151
- should 'be able to use a lambda to specify lastmod' do
152
- generate_one_sitemap_model_file(:last_modified => lambda {|m| m.updated_at})
153
- assert_equal TestModel.new.updated_at.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00'), elements(first_sitemaps_model_file, 'lastmod').first.text
150
+ generate_sitemap { add '/', :priority => 0.2 }
151
+ assert_equal '0.2', elements(first_sitemap_file, 'priority').first.text
154
152
  end
155
153
 
156
154
  should 'contain two loc element' do
157
- generate_two_model_sitemap_files
158
- assert_equal 2, num_elements(first_sitemaps_model_file, 'loc')
159
- assert_equal 2, num_elements(second_sitemaps_model_file, 'loc')
160
- end
155
+ generate_sitemap(:max_per_sitemap => 2) do
156
+ 4.times { add '/' }
157
+ end
161
158
 
162
- should 'contain two lastmod element' do
163
- generate_two_model_sitemap_files
164
- assert_equal 2, num_elements(first_sitemaps_model_file, 'lastmod')
165
- assert_equal 2, num_elements(second_sitemaps_model_file, 'lastmod')
159
+ assert_equal 2, num_elements(first_sitemap_file, 'loc')
160
+ assert_equal 2, num_elements(second_sitemap_file, 'loc')
166
161
  end
167
162
 
168
163
  should 'contain two changefreq elements' do
169
- generate_two_model_sitemap_files
170
- assert_equal 2, num_elements(first_sitemaps_model_file, 'changefreq')
171
- assert_equal 2, num_elements(second_sitemaps_model_file, 'changefreq')
164
+ generate_sitemap(:max_per_sitemap => 2) do
165
+ 4.times { add '/' }
166
+ end
167
+
168
+ assert_equal 2, num_elements(first_sitemap_file, 'changefreq')
169
+ assert_equal 2, num_elements(second_sitemap_file, 'changefreq')
172
170
  end
173
171
 
174
172
  should 'contain two priority element' do
175
- generate_two_model_sitemap_files(:priority => 0.2)
176
- assert_equal 2, num_elements(first_sitemaps_model_file, 'priority')
177
- assert_equal 2, num_elements(second_sitemaps_model_file, 'priority')
178
- end
173
+ generate_sitemap(:max_per_sitemap => 2) do
174
+ 4.times { add '/', :priority => 0.2 }
175
+ end
179
176
 
180
- should 'strip leading slashes from controller paths' do
181
- create_sitemap
182
- add_model
183
- @sitemap.add(TestModel, :path => '/test_controller').generate
184
- assert(
185
- !elements(first_sitemaps_model_file, 'loc').first.text.match(/\/\/test_controller\//),
186
- 'URL does not contain a double-slash before the controller path'
187
- )
177
+ assert_equal 2, num_elements(first_sitemap_file, 'priority')
178
+ assert_equal 2, num_elements(second_sitemap_file, 'priority')
188
179
  end
189
180
 
190
181
  should 'not be gzipped' do
191
- generate_one_sitemap_model_file(:gzip => false)
192
- assert File.exists?(unzipped_first_sitemaps_model_file)
193
- end
194
- end
195
-
196
- context 'add method' do
197
- should 'be chainable' do
198
- create_sitemap
199
- assert_equal BigSitemap, @sitemap.add(TestModel).class
200
- end
201
- end
202
-
203
- context 'add static method' do
204
- should 'should generate static content' do
205
- create_sitemap
206
- @sitemap.add_static('/', Time.now, 'weekly', 0.5)
207
- @sitemap.add_static('/about', Time.now, 'weekly', 0.5)
208
- @sitemap.generate_static
209
- elems = elements(static_sitemaps_file, 'loc')
210
- assert_equal "/", elems.first.text
211
- assert_equal "/about", elems.last.text
182
+ generate_sitemap(:gzip => false) { add '/' }
183
+ assert File.exists?(unzipped_first_sitemap_file)
212
184
  end
213
185
  end
214
186
 
215
187
  context 'sanatize XML chars' do
216
188
  should 'should transform ampersands' do
217
- create_sitemap
218
- @sitemap.add_static('/something&else', Time.now, 'weekly', 0.5)
219
- @sitemap.generate_static
220
- elems = elements(static_sitemaps_file, 'loc')
221
- assert Zlib::GzipReader.open(static_sitemaps_file).read.include?("/something&amp;else")
222
- assert_equal "/something&else", elems.first.text
189
+ generate_sitemap { add '/something&else' }
190
+ elems = elements(first_sitemap_file, 'loc')
191
+
192
+ assert Zlib::GzipReader.open(first_sitemap_file).read.include?("/something&amp;else")
193
+ assert_equal 'http://example.com/something&else', elems.first.text
223
194
  end
224
195
  end
225
196
 
226
197
  context 'clean method' do
227
198
  should 'be chainable' do
228
- create_sitemap
229
- assert_equal BigSitemap, @sitemap.clean.class
199
+ sitemap = generate_sitemap { add '/' }
200
+ assert_equal BigSitemap, sitemap.clean.class
230
201
  end
231
202
 
232
203
  should 'clean all sitemap files' do
233
- generate_sitemap_files
234
- assert Dir.entries(sitemaps_dir).size > 2, "#{sitemaps_dir} is not empty" # ['.', '..'].size == 2
235
- @sitemap.clean
236
- assert_equal 2, Dir.entries(sitemaps_dir).size, "#{sitemaps_dir} is empty"
237
- end
238
- end
239
-
240
- context 'generate method' do
241
- should 'be chainable' do
242
- create_sitemap
243
- assert_equal BigSitemap, @sitemap.generate.class
204
+ sitemap = generate_sitemap { add '/' }
205
+ assert Dir["#{sitemaps_dir}/sitemap*"].size > 0, "#{sitemaps_dir} has sitemap files"
206
+ sitemap.clean
207
+ assert_equal 0, Dir["#{sitemaps_dir}/sitemap*"].size, "#{sitemaps_dir} is empty of sitemap files"
244
208
  end
245
209
  end
246
210
 
247
211
  context 'sitemap index' do
248
212
  should 'generate for all xml files in directory' do
249
- create_sitemap
250
- @sitemap.clean
213
+ sitemap = generate_sitemap {}
251
214
  File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
252
215
  File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
253
216
  File.open("#{sitemaps_dir}/sitemap_file3.txt", 'w')
254
217
  File.open("#{sitemaps_dir}/file4.xml", 'w')
255
218
  File.open(unzipped_sitemaps_index_file, 'w')
256
- @sitemap.send :generate_sitemap_index
219
+ sitemap.send :generate_sitemap_index
257
220
 
258
221
  elem = elements(sitemaps_index_file, 'loc')
259
222
  assert_equal 2, elem.size #no index and file3 and file4 found
260
- assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
261
- assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
223
+ assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
224
+ assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
262
225
  end
263
226
 
264
227
  should 'generate for all for given file' do
265
- create_sitemap
266
- @sitemap.clean
228
+ sitemap = generate_sitemap {}
267
229
  File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
268
230
  File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
269
231
  files = ["#{sitemaps_dir}/sitemap_file1.xml", "#{sitemaps_dir}/sitemap_file2.xml.gz"]
270
- @sitemap.send :generate_sitemap_index, files
232
+ sitemap.send :generate_sitemap_index, files
271
233
 
272
234
  elem = elements(sitemaps_index_file, 'loc')
273
235
  assert_equal 2, elem.size
274
- assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
275
- assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
276
- end
277
- end
278
-
279
- context 'get_last_id' do
280
- should 'return last id' do
281
- create_sitemap.clean
282
- filename = "#{sitemaps_dir}/sitemap_file"
283
- File.open("#{filename}_1.xml", 'w')
284
- File.open("#{filename}_23.xml", 'w')
285
- File.open("#{filename}_42.xml.gz", 'w')
286
- File.open("#{filename}_9.xml", 'w')
287
- assert_equal 42, @sitemap.send(:get_last_id, filename)
288
- end
289
-
290
- should 'return nil' do
291
- create_sitemap.clean
292
- filename = "#{sitemaps_dir}/sitemap_file"
293
- assert_equal nil, @sitemap.send(:get_last_id, filename)
236
+ assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
237
+ assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
294
238
  end
295
239
  end
296
240
 
297
241
  context 'partial update' do
298
-
299
- context 'prepare_update' do
300
- should 'generate correct condition for partial update' do
301
- filename = "#{sitemaps_dir}/sitemap_test_models"
302
-
303
- create_sitemap(:partial_update => true).clean
304
- add_model(:num_items => 50) #TestModel
305
-
306
- File.open("#{filename}_23.xml", 'w')
307
- assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
308
-
309
- File.open("#{filename}_42.xml", 'w')
310
- assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
242
+ should 'not recreate old files' do
243
+ # The first run should generate all the files
244
+ generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
245
+ [10, 20, 30, 40, 50].each do |i|
246
+ add "/#{i}", :id => i
247
+ end
311
248
  end
312
249
 
313
- should 'generate correct condition for partial update with custom column' do
314
- filename = "#{sitemaps_dir}/sitemap_test_models"
250
+ filename = "#{sitemaps_dir}/sitemap"
315
251
 
316
- create_sitemap(:partial_update => true).clean
317
- add_model(:num_items => 50, :primary_column => 'name') #TestModel
252
+ assert File.exists? "#{filename}.xml" # ids 10 and 20
253
+ assert File.exists? "#{filename}_30.xml" # ids 30 and 40
254
+ assert File.exists? "#{filename}_50.xml" # id 50
318
255
 
319
- File.open("#{filename}_666.xml", 'w')
320
- assert_equal "(name >= 666)", @sitemap.send(:prepare_update).first.last[:conditions]
321
- end
322
- end
256
+ # Move the files so we can test if they are re-created
257
+ FileUtils.mv "#{filename}.xml", "#{filename}.bak.xml"
258
+ FileUtils.mv "#{filename}_30.xml", "#{filename}_30.bak.xml"
323
259
 
324
- should 'generate for all xml files in directory and delete last file' do
325
- TestModel.current_id = last_id = 27
326
- filename = "#{sitemaps_dir}/sitemap_test_models"
260
+ # Store the original file size so we can compare it later
261
+ original_size = File.size "#{filename}_50.xml"
327
262
 
328
- create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
329
- add_model(:num_items => 50 - last_id) #TestModel
263
+ start_id = nil
330
264
 
331
- File.open("#{filename}.xml", 'w')
332
- File.open("#{filename}_5.xml", 'w')
333
- File.open("#{filename}_9.xml", 'w')
334
- File.open("#{filename}_23.xml", 'w')
335
- File.open("#{filename}_#{last_id}.xml", 'w')
336
- @sitemap.generate
265
+ # Run a new update starting from the first ID of the last sitemap
266
+ generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
267
+ start_id = first_id_of_last_sitemap
337
268
 
338
- # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
339
-
340
- assert File.exists?("#{filename}_48.xml")
341
- assert File.exists?("#{filename}_#{last_id}.xml")
342
- elems = elements("#{filename}_#{last_id}.xml", 'loc').map(&:text)
343
-
344
- assert_equal 5, elems.size
345
- (28..32).each do |i|
346
- assert elems.include? "http://example.com/test_models/#{i}"
269
+ [50, 60, 70, 80].each do |i|
270
+ add "/#{i}", :id => i
271
+ end
347
272
  end
348
273
 
349
- elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
350
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models.xml"
351
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models_9.xml"
352
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models_#{last_id}.xml"
353
- assert elems.include? "http://example.com/sitemaps/sitemap_test_models_48.xml"
354
- end
355
-
356
- should 'generate sitemap, update should respect old files' do
357
- max_id = 23
358
- TestModel.current_id = 0
359
- filename = "#{sitemaps_dir}/sitemap_test_models"
360
-
361
- create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
362
- add_model(:num_items => max_id) #TestModel
363
- @sitemap.generate
364
-
365
- # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
274
+ # Check the correct ID is returned for the beginning of the last sitemap
275
+ assert_equal 50, start_id
366
276
 
367
- assert_equal 5, elements("#{filename}.xml", 'loc').size
368
- assert_equal 5, elements("#{filename}_6.xml", 'loc').size
369
- assert_equal 3, elements("#{filename}_21.xml", 'loc').size
277
+ # Since we did a partial update, the earlier files shouldn't have been recreated
278
+ assert !File.exists?("#{filename}.xml") # ids 10 and 20
279
+ assert !File.exists?("#{filename}_30.xml") # ids 30 and 40
370
280
 
371
- TestModel.current_id = 20 #last_id is 21, so start with one below
372
- create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100)
373
- add_model( :num_items => 48 - TestModel.current_id ) #TestModel
374
- @sitemap.generate
281
+ # The last file of the first run should have been recreated with new records
282
+ # and a larger file size
283
+ assert (original_size < File.size?("#{filename}_50.xml"))
375
284
 
376
- assert_equal 5, elements("#{filename}_6.xml", 'loc').size
377
- assert_equal 5, elements("#{filename}_21.xml", 'loc').size
285
+ elems = elements("#{filename}_50.xml", 'loc').map(&:text)
378
286
 
379
- # Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
287
+ assert_equal 2, elems.size
380
288
 
381
- elems = elements("#{filename}_26.xml", 'loc').map(&:text)
382
- (26..30).each do |i|
383
- assert elems.include? "http://example.com/test_models/#{i}"
289
+ [50, 60].each do |i|
290
+ assert elems.include? "http://example.com/#{i}"
384
291
  end
385
292
 
386
- #puts `cat /tmp/sitemaps/sitemap_test_models_41.xml`
387
-
388
- assert_equal 3, elements("#{filename}_46.xml", 'loc').size
389
- end
390
-
391
- context 'escape' do
392
- should 'add if not number' do
393
- create_sitemap
394
- data = {
395
- 42 => 42,
396
- '23' => 23,
397
- "test" => "'test'",
398
- "test10" => "'test10'",
399
- "10test" => "'10test'",
400
- "10t' est" => "'10t\\' est'",
401
- }
402
- data.each do |key, value|
403
- assert_equal value, @sitemap.send(:escape_if_string, key)
404
- end
293
+ elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
405
294
 
406
- end
295
+ assert elems.include? 'http://example.com/sitemap.bak.xml'
296
+ assert elems.include? 'http://example.com/sitemap_30.bak.xml'
297
+ assert elems.include? 'http://example.com/sitemap_50.xml'
298
+ assert elems.include? 'http://example.com/sitemap_70.xml'
407
299
  end
408
300
 
409
301
  context 'lockfile' do
@@ -411,10 +303,10 @@ class BigSitemapTest < Test::Unit::TestCase
411
303
  sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
412
304
 
413
305
  sitemap.with_lock do
414
- assert File.exists?('/tmp/sitemaps/generator.lock')
306
+ assert File.exists?("#{sitemaps_dir}/generator.lock")
415
307
  end
416
308
 
417
- assert !File.exists?('/tmp/sitemaps/generator.lock')
309
+ assert !File.exists?("#{sitemaps_dir}/generator.lock")
418
310
  end
419
311
 
420
312
  should 'not catch error not related to lock' do
@@ -425,7 +317,6 @@ class BigSitemapTest < Test::Unit::TestCase
425
317
  raise "Wrong"
426
318
  end
427
319
  end
428
-
429
320
  end
430
321
 
431
322
  should 'throw error if lock exits' do
@@ -442,53 +333,18 @@ class BigSitemapTest < Test::Unit::TestCase
442
333
 
443
334
  end
444
335
  end
445
-
446
336
  end
447
337
  end
448
338
 
449
339
  private
450
- def delete_tmp_files
451
- FileUtils.rm_rf(sitemaps_dir)
452
- end
453
-
454
- def create_sitemap(options={})
455
- @sitemap = BigSitemap.new({
456
- :base_url => 'http://example.com',
457
- :document_root => tmp_dir,
458
- :ping_google => false
459
- }.update(options))
460
- end
461
-
462
- def generate_sitemap_files(options={})
463
- create_sitemap(options)
464
- add_model
465
- @sitemap.generate
340
+ def generate_sitemap(options={}, &block)
341
+ BigSitemap.generate(options.merge(:base_url => 'http://example.com', :document_root => tmp_dir), &block)
466
342
  end
467
343
 
468
- def generate_one_sitemap_model_file(options={})
469
- change_frequency = options.delete(:change_frequency)
470
- priority = options.delete(:priority)
471
- create_sitemap(options.merge(:max_per_sitemap => default_num_items, :batch_size => default_num_items))
472
- add_model(:change_frequency => change_frequency, :priority => priority)
473
- @sitemap.generate
474
- end
475
-
476
- def generate_two_model_sitemap_files(options={})
477
- change_frequency = options.delete(:change_frequency)
478
- priority = options.delete(:priority)
479
- create_sitemap(options.merge(:max_per_sitemap => 2, :batch_size => 1))
480
- add_model(:num_items => 4, :change_frequency => change_frequency, :priority => priority)
481
- @sitemap.generate
482
- end
483
-
484
- def add_model(options={})
485
- num_items = options.delete(:num_items) || default_num_items
486
- TestModel.stubs(:count_for_sitemap).returns(num_items)
487
- @sitemap.add(TestModel, options)
488
- end
489
-
490
- def default_num_items
491
- 10
344
+ def delete_tmp_files
345
+ Dir["#{sitemaps_dir}/sitemap*"].each do |f|
346
+ FileUtils.rm_rf f
347
+ end
492
348
  end
493
349
 
494
350
  def sitemaps_index_file
@@ -499,28 +355,24 @@ class BigSitemapTest < Test::Unit::TestCase
499
355
  "#{sitemaps_dir}/sitemap_index.xml"
500
356
  end
501
357
 
502
- def unzipped_first_sitemaps_model_file
503
- "#{sitemaps_dir}/sitemap_test_models.xml"
504
- end
505
-
506
- def first_sitemaps_model_file
507
- "#{sitemaps_dir}/sitemap_test_models.xml.gz"
358
+ def unzipped_first_sitemap_file
359
+ "#{sitemaps_dir}/sitemap.xml"
508
360
  end
509
361
 
510
- def static_sitemaps_file
511
- "#{sitemaps_dir}/sitemap_static.xml.gz"
362
+ def first_sitemap_file
363
+ "#{sitemaps_dir}/sitemap.xml.gz"
512
364
  end
513
365
 
514
- def second_sitemaps_model_file
515
- "#{sitemaps_dir}/sitemap_test_models_1.xml.gz"
366
+ def second_sitemap_file
367
+ "#{sitemaps_dir}/sitemap_1.xml.gz"
516
368
  end
517
369
 
518
- def third_sitemaps_model_file
519
- "#{sitemaps_dir}/sitemap_test_models_2.xml.gz"
370
+ def third_sitemap_file
371
+ "#{sitemaps_dir}/sitemap_2.xml.gz"
520
372
  end
521
373
 
522
374
  def sitemaps_dir
523
- "#{tmp_dir}/sitemaps"
375
+ tmp_dir
524
376
  end
525
377
 
526
378
  def tmp_dir