big_sitemap 0.8.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +8 -1
- data/Gemfile.lock +11 -0
- data/History.txt +21 -0
- data/README.rdoc +30 -94
- data/Rakefile +2 -2
- data/VERSION.yml +4 -4
- data/lib/big_sitemap.rb +181 -104
- data/lib/big_sitemap/builder.rb +28 -27
- data/test/big_sitemap_test.rb +152 -300
- data/test/fixtures/test_model.rb +1 -1
- metadata +69 -61
- data/.gitignore +0 -3
- data/big_sitemap.gemspec +0 -58
data/lib/big_sitemap/builder.rb
CHANGED
@@ -14,35 +14,36 @@ class BigSitemap
|
|
14
14
|
@gzip = options.delete(:gzip)
|
15
15
|
@max_urls = options.delete(:max_urls) || MAX_URLS
|
16
16
|
@type = options.delete(:type)
|
17
|
-
@
|
17
|
+
@filepaths = []
|
18
18
|
@parts = options.delete(:start_part_id) || 0
|
19
|
-
@
|
19
|
+
@partial_update = options.delete(:partial_update)
|
20
20
|
|
21
|
-
@filename
|
21
|
+
@filename = options.delete(:filename)
|
22
22
|
@current_filename = nil
|
23
23
|
@tmp_filename = nil
|
24
|
-
@target
|
24
|
+
@target = _get_writer
|
25
25
|
|
26
26
|
@level = 0
|
27
27
|
@opened_tags = []
|
28
28
|
_init_document
|
29
29
|
end
|
30
30
|
|
31
|
-
def add_url!(
|
32
|
-
_rotate(
|
33
|
-
|
31
|
+
def add_url!(location, options={})
|
32
|
+
_rotate(options[:id]) if @max_urls == @urls
|
34
33
|
_open_tag 'url'
|
35
|
-
|
36
|
-
tag! '
|
37
|
-
tag! '
|
38
|
-
tag! '
|
34
|
+
|
35
|
+
tag! 'loc', location
|
36
|
+
tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
|
37
|
+
tag! 'changefreq', options[:change_frequency] || 'weekly'
|
38
|
+
tag! 'priority', options[:priority] if options[:priority]
|
39
|
+
|
39
40
|
_close_tag 'url'
|
40
41
|
|
41
42
|
@urls += 1
|
42
43
|
end
|
43
44
|
|
44
|
-
def
|
45
|
-
@
|
45
|
+
def filepaths!
|
46
|
+
@filepaths
|
46
47
|
end
|
47
48
|
|
48
49
|
def close!
|
@@ -60,7 +61,7 @@ class BigSitemap
|
|
60
61
|
|
61
62
|
def _get_writer
|
62
63
|
filename = @filename.dup
|
63
|
-
filename << "_#{@parts}" if @parts > 0
|
64
|
+
filename << "_#{@parts}" if @parts > 0 && @type != 'index'
|
64
65
|
filename << '.xml'
|
65
66
|
filename << '.gz' if @gzip
|
66
67
|
_open_writer(filename)
|
@@ -69,28 +70,28 @@ class BigSitemap
|
|
69
70
|
def _open_writer(filename)
|
70
71
|
@current_filename = filename
|
71
72
|
@tmp_filename = filename + ".tmp"
|
72
|
-
@
|
73
|
-
file = ::File.open(@tmp_filename, 'w
|
73
|
+
@filepaths << filename
|
74
|
+
file = ::File.open(@tmp_filename, 'w+:ASCII-8BIT')
|
74
75
|
@gzip ? ::Zlib::GzipWriter.new(file) : file
|
75
76
|
end
|
76
77
|
|
77
|
-
def _init_document(
|
78
|
+
def _init_document(name='urlset', attrs=HEADER_ATTRIBUTES)
|
78
79
|
@urls = 0
|
79
80
|
target!.print '<?xml version="1.0" encoding="UTF-8"?>'
|
80
81
|
_newline
|
81
82
|
_open_tag name, attrs
|
82
83
|
end
|
83
84
|
|
84
|
-
def _rotate(part_nr
|
85
|
+
def _rotate(part_nr=nil)
|
85
86
|
# write out the current document and start writing into a new file
|
86
87
|
close!
|
87
|
-
@parts =
|
88
|
+
@parts = part_nr || @parts + 1
|
88
89
|
@target = _get_writer
|
89
90
|
_init_document
|
90
91
|
end
|
91
92
|
|
92
93
|
# opens a tag, bumps up level but doesn't require a block
|
93
|
-
def _open_tag(name, attrs
|
94
|
+
def _open_tag(name, attrs={})
|
94
95
|
_indent
|
95
96
|
_start_tag(name, attrs)
|
96
97
|
_newline
|
@@ -98,8 +99,8 @@ class BigSitemap
|
|
98
99
|
@opened_tags << name
|
99
100
|
end
|
100
101
|
|
101
|
-
def _start_tag(name, attrs
|
102
|
-
attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('')
|
102
|
+
def _start_tag(name, attrs={})
|
103
|
+
attrs = attrs.map { |attr, value| %Q( #{attr}="#{value}") }.join('')
|
103
104
|
target!.print "<#{name}#{attrs}>"
|
104
105
|
end
|
105
106
|
|
@@ -131,12 +132,10 @@ class BigSitemap
|
|
131
132
|
end
|
132
133
|
|
133
134
|
def _indent
|
134
|
-
return if @gzip
|
135
135
|
target!.print " " * @level
|
136
136
|
end
|
137
137
|
|
138
138
|
def _newline
|
139
|
-
return if @gzip
|
140
139
|
target!.puts ''
|
141
140
|
end
|
142
141
|
end
|
@@ -147,10 +146,12 @@ class BigSitemap
|
|
147
146
|
super(name, attrs)
|
148
147
|
end
|
149
148
|
|
150
|
-
def add_url!(
|
149
|
+
def add_url!(location, options={})
|
151
150
|
_open_tag 'sitemap'
|
152
|
-
|
153
|
-
tag! '
|
151
|
+
|
152
|
+
tag! 'loc', location
|
153
|
+
tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
|
154
|
+
|
154
155
|
_close_tag 'sitemap'
|
155
156
|
end
|
156
157
|
end
|
data/test/big_sitemap_test.rb
CHANGED
@@ -31,379 +31,271 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
31
31
|
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
32
32
|
end
|
33
33
|
|
34
|
-
should 'generate
|
35
|
-
|
36
|
-
assert File.exists?
|
34
|
+
should 'generate sitemap index file' do
|
35
|
+
generate_sitemap { add '/foo' }
|
36
|
+
assert File.exists? first_sitemap_file
|
37
37
|
end
|
38
38
|
|
39
|
-
should 'generate
|
40
|
-
|
41
|
-
|
42
|
-
@sitemap.generate
|
43
|
-
assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
|
39
|
+
should 'generate static file' do
|
40
|
+
generate_sitemap { add '/foo' }
|
41
|
+
assert File.exists? first_sitemap_file
|
44
42
|
end
|
45
43
|
|
46
|
-
should '
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
should 'generate two sitemap model files for the same model with different options' do
|
54
|
-
create_sitemap
|
55
|
-
add_model(:path => 'foo')
|
56
|
-
add_model(:path => 'bar')
|
57
|
-
@sitemap.generate
|
44
|
+
should 'should add paths' do
|
45
|
+
generate_sitemap do
|
46
|
+
add '/', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
|
47
|
+
add '/about', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
|
48
|
+
end
|
58
49
|
|
59
|
-
|
60
|
-
|
50
|
+
elems = elements first_sitemap_file, 'loc'
|
51
|
+
assert_equal 'http://example.com/', elems.first.text
|
52
|
+
assert_equal 'http://example.com/about', elems.last.text
|
61
53
|
end
|
62
54
|
|
63
55
|
context 'Sitemap index file' do
|
64
56
|
should 'contain one sitemapindex element' do
|
65
|
-
|
57
|
+
generate_sitemap { add '/' }
|
66
58
|
assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
|
67
59
|
end
|
68
60
|
|
69
61
|
should 'contain one sitemap element' do
|
70
|
-
|
62
|
+
generate_sitemap { add '/' }
|
71
63
|
assert_equal 1, num_elements(sitemaps_index_file, 'sitemap')
|
72
64
|
end
|
73
65
|
|
74
66
|
should 'contain one loc element' do
|
75
|
-
|
67
|
+
generate_sitemap { add '/' }
|
76
68
|
assert_equal 1, num_elements(sitemaps_index_file, 'loc')
|
77
69
|
end
|
78
70
|
|
79
71
|
should 'contain one lastmod element' do
|
80
|
-
|
72
|
+
generate_sitemap { add '/' }
|
81
73
|
assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
|
82
74
|
end
|
83
75
|
|
84
76
|
should 'contain two loc elements' do
|
85
|
-
|
77
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
78
|
+
4.times { add '/' }
|
79
|
+
end
|
80
|
+
|
86
81
|
assert_equal 2, num_elements(sitemaps_index_file, 'loc')
|
87
82
|
end
|
88
83
|
|
89
84
|
should 'contain two lastmod elements' do
|
90
|
-
|
85
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
86
|
+
4.times { add '/' }
|
87
|
+
end
|
88
|
+
|
91
89
|
assert_equal 2, num_elements(sitemaps_index_file, 'lastmod')
|
92
90
|
end
|
93
91
|
|
94
92
|
should 'not be gzipped' do
|
95
|
-
|
93
|
+
generate_sitemap(:gzip => false) { add '/' }
|
96
94
|
assert File.exists?(unzipped_sitemaps_index_file)
|
97
95
|
end
|
98
96
|
end
|
99
97
|
|
100
|
-
context 'Sitemap
|
98
|
+
context 'Sitemap file' do
|
101
99
|
should 'contain one urlset element' do
|
102
|
-
|
103
|
-
assert_equal 1, num_elements(
|
100
|
+
generate_sitemap { add '/' }
|
101
|
+
assert_equal 1, num_elements(first_sitemap_file, 'urlset')
|
104
102
|
end
|
105
103
|
|
106
104
|
should 'contain several loc elements' do
|
107
|
-
|
108
|
-
|
105
|
+
generate_sitemap do
|
106
|
+
3.times { add '/' }
|
107
|
+
end
|
108
|
+
|
109
|
+
assert_equal 3, num_elements(first_sitemap_file, 'loc')
|
109
110
|
end
|
110
111
|
|
111
112
|
should 'contain several lastmod elements' do
|
112
|
-
|
113
|
-
|
113
|
+
generate_sitemap do
|
114
|
+
3.times { add '/', :last_modified => Time.now }
|
115
|
+
end
|
116
|
+
|
117
|
+
assert_equal 3, num_elements(first_sitemap_file, 'lastmod')
|
114
118
|
end
|
115
119
|
|
116
120
|
should 'contain several changefreq elements' do
|
117
|
-
|
118
|
-
|
121
|
+
generate_sitemap do
|
122
|
+
3.times { add '/' }
|
123
|
+
end
|
124
|
+
|
125
|
+
assert_equal 3, num_elements(first_sitemap_file, 'changefreq')
|
119
126
|
end
|
120
127
|
|
121
128
|
should 'contain several priority elements' do
|
122
|
-
|
123
|
-
|
129
|
+
generate_sitemap do
|
130
|
+
3.times { add '/', :priority => 0.2 }
|
131
|
+
end
|
132
|
+
|
133
|
+
assert_equal 3, num_elements(first_sitemap_file, 'priority')
|
124
134
|
end
|
125
135
|
|
126
136
|
should 'have a change frequency of weekly by default' do
|
127
|
-
|
128
|
-
|
129
|
-
|
137
|
+
generate_sitemap do
|
138
|
+
3.times { add '/' }
|
139
|
+
end
|
130
140
|
|
131
|
-
|
132
|
-
generate_one_sitemap_model_file(:change_frequency => 'daily')
|
133
|
-
assert_equal 'daily', elements(first_sitemaps_model_file, 'changefreq').first.text
|
141
|
+
assert_equal 'weekly', elements(first_sitemap_file, 'changefreq').first.text
|
134
142
|
end
|
135
143
|
|
136
|
-
should '
|
137
|
-
|
138
|
-
assert_equal
|
144
|
+
should 'have a change frequency of daily' do
|
145
|
+
generate_sitemap { add '/', :change_frequency => 'daily' }
|
146
|
+
assert_equal 'daily', elements(first_sitemap_file, 'changefreq').first.text
|
139
147
|
end
|
140
148
|
|
141
149
|
should 'have a priority of 0.2' do
|
142
|
-
|
143
|
-
assert_equal '0.2', elements(
|
144
|
-
end
|
145
|
-
|
146
|
-
should 'be able to use a lambda to specify priority' do
|
147
|
-
generate_one_sitemap_model_file(:priority => lambda {|m| m.priority})
|
148
|
-
assert_equal TestModel.new.priority.to_s, elements(first_sitemaps_model_file, 'priority').first.text
|
149
|
-
end
|
150
|
-
|
151
|
-
should 'be able to use a lambda to specify lastmod' do
|
152
|
-
generate_one_sitemap_model_file(:last_modified => lambda {|m| m.updated_at})
|
153
|
-
assert_equal TestModel.new.updated_at.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00'), elements(first_sitemaps_model_file, 'lastmod').first.text
|
150
|
+
generate_sitemap { add '/', :priority => 0.2 }
|
151
|
+
assert_equal '0.2', elements(first_sitemap_file, 'priority').first.text
|
154
152
|
end
|
155
153
|
|
156
154
|
should 'contain two loc element' do
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
end
|
155
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
156
|
+
4.times { add '/' }
|
157
|
+
end
|
161
158
|
|
162
|
-
|
163
|
-
|
164
|
-
assert_equal 2, num_elements(first_sitemaps_model_file, 'lastmod')
|
165
|
-
assert_equal 2, num_elements(second_sitemaps_model_file, 'lastmod')
|
159
|
+
assert_equal 2, num_elements(first_sitemap_file, 'loc')
|
160
|
+
assert_equal 2, num_elements(second_sitemap_file, 'loc')
|
166
161
|
end
|
167
162
|
|
168
163
|
should 'contain two changefreq elements' do
|
169
|
-
|
170
|
-
|
171
|
-
|
164
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
165
|
+
4.times { add '/' }
|
166
|
+
end
|
167
|
+
|
168
|
+
assert_equal 2, num_elements(first_sitemap_file, 'changefreq')
|
169
|
+
assert_equal 2, num_elements(second_sitemap_file, 'changefreq')
|
172
170
|
end
|
173
171
|
|
174
172
|
should 'contain two priority element' do
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
end
|
173
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
174
|
+
4.times { add '/', :priority => 0.2 }
|
175
|
+
end
|
179
176
|
|
180
|
-
|
181
|
-
|
182
|
-
add_model
|
183
|
-
@sitemap.add(TestModel, :path => '/test_controller').generate
|
184
|
-
assert(
|
185
|
-
!elements(first_sitemaps_model_file, 'loc').first.text.match(/\/\/test_controller\//),
|
186
|
-
'URL does not contain a double-slash before the controller path'
|
187
|
-
)
|
177
|
+
assert_equal 2, num_elements(first_sitemap_file, 'priority')
|
178
|
+
assert_equal 2, num_elements(second_sitemap_file, 'priority')
|
188
179
|
end
|
189
180
|
|
190
181
|
should 'not be gzipped' do
|
191
|
-
|
192
|
-
assert File.exists?(
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
context 'add method' do
|
197
|
-
should 'be chainable' do
|
198
|
-
create_sitemap
|
199
|
-
assert_equal BigSitemap, @sitemap.add(TestModel).class
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
context 'add static method' do
|
204
|
-
should 'should generate static content' do
|
205
|
-
create_sitemap
|
206
|
-
@sitemap.add_static('/', Time.now, 'weekly', 0.5)
|
207
|
-
@sitemap.add_static('/about', Time.now, 'weekly', 0.5)
|
208
|
-
@sitemap.generate_static
|
209
|
-
elems = elements(static_sitemaps_file, 'loc')
|
210
|
-
assert_equal "/", elems.first.text
|
211
|
-
assert_equal "/about", elems.last.text
|
182
|
+
generate_sitemap(:gzip => false) { add '/' }
|
183
|
+
assert File.exists?(unzipped_first_sitemap_file)
|
212
184
|
end
|
213
185
|
end
|
214
186
|
|
215
187
|
context 'sanatize XML chars' do
|
216
188
|
should 'should transform ampersands' do
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
assert_equal "/something&else", elems.first.text
|
189
|
+
generate_sitemap { add '/something&else' }
|
190
|
+
elems = elements(first_sitemap_file, 'loc')
|
191
|
+
|
192
|
+
assert Zlib::GzipReader.open(first_sitemap_file).read.include?("/something&else")
|
193
|
+
assert_equal 'http://example.com/something&else', elems.first.text
|
223
194
|
end
|
224
195
|
end
|
225
196
|
|
226
197
|
context 'clean method' do
|
227
198
|
should 'be chainable' do
|
228
|
-
|
229
|
-
assert_equal BigSitemap,
|
199
|
+
sitemap = generate_sitemap { add '/' }
|
200
|
+
assert_equal BigSitemap, sitemap.clean.class
|
230
201
|
end
|
231
202
|
|
232
203
|
should 'clean all sitemap files' do
|
233
|
-
|
234
|
-
assert Dir
|
235
|
-
|
236
|
-
assert_equal
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
context 'generate method' do
|
241
|
-
should 'be chainable' do
|
242
|
-
create_sitemap
|
243
|
-
assert_equal BigSitemap, @sitemap.generate.class
|
204
|
+
sitemap = generate_sitemap { add '/' }
|
205
|
+
assert Dir["#{sitemaps_dir}/sitemap*"].size > 0, "#{sitemaps_dir} has sitemap files"
|
206
|
+
sitemap.clean
|
207
|
+
assert_equal 0, Dir["#{sitemaps_dir}/sitemap*"].size, "#{sitemaps_dir} is empty of sitemap files"
|
244
208
|
end
|
245
209
|
end
|
246
210
|
|
247
211
|
context 'sitemap index' do
|
248
212
|
should 'generate for all xml files in directory' do
|
249
|
-
|
250
|
-
@sitemap.clean
|
213
|
+
sitemap = generate_sitemap {}
|
251
214
|
File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
|
252
215
|
File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
|
253
216
|
File.open("#{sitemaps_dir}/sitemap_file3.txt", 'w')
|
254
217
|
File.open("#{sitemaps_dir}/file4.xml", 'w')
|
255
218
|
File.open(unzipped_sitemaps_index_file, 'w')
|
256
|
-
|
219
|
+
sitemap.send :generate_sitemap_index
|
257
220
|
|
258
221
|
elem = elements(sitemaps_index_file, 'loc')
|
259
222
|
assert_equal 2, elem.size #no index and file3 and file4 found
|
260
|
-
assert_equal "http://example.com/
|
261
|
-
assert_equal "http://example.com/
|
223
|
+
assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
|
224
|
+
assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
|
262
225
|
end
|
263
226
|
|
264
227
|
should 'generate for all for given file' do
|
265
|
-
|
266
|
-
@sitemap.clean
|
228
|
+
sitemap = generate_sitemap {}
|
267
229
|
File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
|
268
230
|
File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
|
269
231
|
files = ["#{sitemaps_dir}/sitemap_file1.xml", "#{sitemaps_dir}/sitemap_file2.xml.gz"]
|
270
|
-
|
232
|
+
sitemap.send :generate_sitemap_index, files
|
271
233
|
|
272
234
|
elem = elements(sitemaps_index_file, 'loc')
|
273
235
|
assert_equal 2, elem.size
|
274
|
-
assert_equal "http://example.com/
|
275
|
-
assert_equal "http://example.com/
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
|
-
context 'get_last_id' do
|
280
|
-
should 'return last id' do
|
281
|
-
create_sitemap.clean
|
282
|
-
filename = "#{sitemaps_dir}/sitemap_file"
|
283
|
-
File.open("#{filename}_1.xml", 'w')
|
284
|
-
File.open("#{filename}_23.xml", 'w')
|
285
|
-
File.open("#{filename}_42.xml.gz", 'w')
|
286
|
-
File.open("#{filename}_9.xml", 'w')
|
287
|
-
assert_equal 42, @sitemap.send(:get_last_id, filename)
|
288
|
-
end
|
289
|
-
|
290
|
-
should 'return nil' do
|
291
|
-
create_sitemap.clean
|
292
|
-
filename = "#{sitemaps_dir}/sitemap_file"
|
293
|
-
assert_equal nil, @sitemap.send(:get_last_id, filename)
|
236
|
+
assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
|
237
|
+
assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
|
294
238
|
end
|
295
239
|
end
|
296
240
|
|
297
241
|
context 'partial update' do
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
add_model(:num_items => 50) #TestModel
|
305
|
-
|
306
|
-
File.open("#{filename}_23.xml", 'w')
|
307
|
-
assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
|
308
|
-
|
309
|
-
File.open("#{filename}_42.xml", 'w')
|
310
|
-
assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
|
242
|
+
should 'not recreate old files' do
|
243
|
+
# The first run should generate all the files
|
244
|
+
generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
|
245
|
+
[10, 20, 30, 40, 50].each do |i|
|
246
|
+
add "/#{i}", :id => i
|
247
|
+
end
|
311
248
|
end
|
312
249
|
|
313
|
-
|
314
|
-
filename = "#{sitemaps_dir}/sitemap_test_models"
|
250
|
+
filename = "#{sitemaps_dir}/sitemap"
|
315
251
|
|
316
|
-
|
317
|
-
|
252
|
+
assert File.exists? "#{filename}.xml" # ids 10 and 20
|
253
|
+
assert File.exists? "#{filename}_30.xml" # ids 30 and 40
|
254
|
+
assert File.exists? "#{filename}_50.xml" # id 50
|
318
255
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
end
|
256
|
+
# Move the files so we can test if they are re-created
|
257
|
+
FileUtils.mv "#{filename}.xml", "#{filename}.bak.xml"
|
258
|
+
FileUtils.mv "#{filename}_30.xml", "#{filename}_30.bak.xml"
|
323
259
|
|
324
|
-
|
325
|
-
|
326
|
-
filename = "#{sitemaps_dir}/sitemap_test_models"
|
260
|
+
# Store the original file size so we can compare it later
|
261
|
+
original_size = File.size "#{filename}_50.xml"
|
327
262
|
|
328
|
-
|
329
|
-
add_model(:num_items => 50 - last_id) #TestModel
|
263
|
+
start_id = nil
|
330
264
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
File.open("#{filename}_23.xml", 'w')
|
335
|
-
File.open("#{filename}_#{last_id}.xml", 'w')
|
336
|
-
@sitemap.generate
|
265
|
+
# Run a new update starting from the first ID of the last sitemap
|
266
|
+
generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
|
267
|
+
start_id = first_id_of_last_sitemap
|
337
268
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
assert File.exists?("#{filename}_#{last_id}.xml")
|
342
|
-
elems = elements("#{filename}_#{last_id}.xml", 'loc').map(&:text)
|
343
|
-
|
344
|
-
assert_equal 5, elems.size
|
345
|
-
(28..32).each do |i|
|
346
|
-
assert elems.include? "http://example.com/test_models/#{i}"
|
269
|
+
[50, 60, 70, 80].each do |i|
|
270
|
+
add "/#{i}", :id => i
|
271
|
+
end
|
347
272
|
end
|
348
273
|
|
349
|
-
|
350
|
-
|
351
|
-
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_9.xml"
|
352
|
-
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_#{last_id}.xml"
|
353
|
-
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_48.xml"
|
354
|
-
end
|
355
|
-
|
356
|
-
should 'generate sitemap, update should respect old files' do
|
357
|
-
max_id = 23
|
358
|
-
TestModel.current_id = 0
|
359
|
-
filename = "#{sitemaps_dir}/sitemap_test_models"
|
360
|
-
|
361
|
-
create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
|
362
|
-
add_model(:num_items => max_id) #TestModel
|
363
|
-
@sitemap.generate
|
364
|
-
|
365
|
-
# Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
|
274
|
+
# Check the correct ID is returned for the beginning of the last sitemap
|
275
|
+
assert_equal 50, start_id
|
366
276
|
|
367
|
-
|
368
|
-
|
369
|
-
|
277
|
+
# Since we did a partial update, the earlier files shouldn't have been recreated
|
278
|
+
assert !File.exists?("#{filename}.xml") # ids 10 and 20
|
279
|
+
assert !File.exists?("#{filename}_30.xml") # ids 30 and 40
|
370
280
|
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
@sitemap.generate
|
281
|
+
# The last file of the first run should have been recreated with new records
|
282
|
+
# and a larger file size
|
283
|
+
assert (original_size < File.size?("#{filename}_50.xml"))
|
375
284
|
|
376
|
-
|
377
|
-
assert_equal 5, elements("#{filename}_21.xml", 'loc').size
|
285
|
+
elems = elements("#{filename}_50.xml", 'loc').map(&:text)
|
378
286
|
|
379
|
-
|
287
|
+
assert_equal 2, elems.size
|
380
288
|
|
381
|
-
|
382
|
-
|
383
|
-
assert elems.include? "http://example.com/test_models/#{i}"
|
289
|
+
[50, 60].each do |i|
|
290
|
+
assert elems.include? "http://example.com/#{i}"
|
384
291
|
end
|
385
292
|
|
386
|
-
|
387
|
-
|
388
|
-
assert_equal 3, elements("#{filename}_46.xml", 'loc').size
|
389
|
-
end
|
390
|
-
|
391
|
-
context 'escape' do
|
392
|
-
should 'add if not number' do
|
393
|
-
create_sitemap
|
394
|
-
data = {
|
395
|
-
42 => 42,
|
396
|
-
'23' => 23,
|
397
|
-
"test" => "'test'",
|
398
|
-
"test10" => "'test10'",
|
399
|
-
"10test" => "'10test'",
|
400
|
-
"10t' est" => "'10t\\' est'",
|
401
|
-
}
|
402
|
-
data.each do |key, value|
|
403
|
-
assert_equal value, @sitemap.send(:escape_if_string, key)
|
404
|
-
end
|
293
|
+
elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
|
405
294
|
|
406
|
-
|
295
|
+
assert elems.include? 'http://example.com/sitemap.bak.xml'
|
296
|
+
assert elems.include? 'http://example.com/sitemap_30.bak.xml'
|
297
|
+
assert elems.include? 'http://example.com/sitemap_50.xml'
|
298
|
+
assert elems.include? 'http://example.com/sitemap_70.xml'
|
407
299
|
end
|
408
300
|
|
409
301
|
context 'lockfile' do
|
@@ -411,10 +303,10 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
411
303
|
sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
412
304
|
|
413
305
|
sitemap.with_lock do
|
414
|
-
assert File.exists?(
|
306
|
+
assert File.exists?("#{sitemaps_dir}/generator.lock")
|
415
307
|
end
|
416
308
|
|
417
|
-
assert !File.exists?(
|
309
|
+
assert !File.exists?("#{sitemaps_dir}/generator.lock")
|
418
310
|
end
|
419
311
|
|
420
312
|
should 'not catch error not related to lock' do
|
@@ -425,7 +317,6 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
425
317
|
raise "Wrong"
|
426
318
|
end
|
427
319
|
end
|
428
|
-
|
429
320
|
end
|
430
321
|
|
431
322
|
should 'throw error if lock exits' do
|
@@ -442,53 +333,18 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
442
333
|
|
443
334
|
end
|
444
335
|
end
|
445
|
-
|
446
336
|
end
|
447
337
|
end
|
448
338
|
|
449
339
|
private
|
450
|
-
def
|
451
|
-
|
452
|
-
end
|
453
|
-
|
454
|
-
def create_sitemap(options={})
|
455
|
-
@sitemap = BigSitemap.new({
|
456
|
-
:base_url => 'http://example.com',
|
457
|
-
:document_root => tmp_dir,
|
458
|
-
:ping_google => false
|
459
|
-
}.update(options))
|
460
|
-
end
|
461
|
-
|
462
|
-
def generate_sitemap_files(options={})
|
463
|
-
create_sitemap(options)
|
464
|
-
add_model
|
465
|
-
@sitemap.generate
|
340
|
+
def generate_sitemap(options={}, &block)
|
341
|
+
BigSitemap.generate(options.merge(:base_url => 'http://example.com', :document_root => tmp_dir), &block)
|
466
342
|
end
|
467
343
|
|
468
|
-
def
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
add_model(:change_frequency => change_frequency, :priority => priority)
|
473
|
-
@sitemap.generate
|
474
|
-
end
|
475
|
-
|
476
|
-
def generate_two_model_sitemap_files(options={})
|
477
|
-
change_frequency = options.delete(:change_frequency)
|
478
|
-
priority = options.delete(:priority)
|
479
|
-
create_sitemap(options.merge(:max_per_sitemap => 2, :batch_size => 1))
|
480
|
-
add_model(:num_items => 4, :change_frequency => change_frequency, :priority => priority)
|
481
|
-
@sitemap.generate
|
482
|
-
end
|
483
|
-
|
484
|
-
def add_model(options={})
|
485
|
-
num_items = options.delete(:num_items) || default_num_items
|
486
|
-
TestModel.stubs(:count_for_sitemap).returns(num_items)
|
487
|
-
@sitemap.add(TestModel, options)
|
488
|
-
end
|
489
|
-
|
490
|
-
def default_num_items
|
491
|
-
10
|
344
|
+
def delete_tmp_files
|
345
|
+
Dir["#{sitemaps_dir}/sitemap*"].each do |f|
|
346
|
+
FileUtils.rm_rf f
|
347
|
+
end
|
492
348
|
end
|
493
349
|
|
494
350
|
def sitemaps_index_file
|
@@ -499,28 +355,24 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
499
355
|
"#{sitemaps_dir}/sitemap_index.xml"
|
500
356
|
end
|
501
357
|
|
502
|
-
def
|
503
|
-
"#{sitemaps_dir}/
|
504
|
-
end
|
505
|
-
|
506
|
-
def first_sitemaps_model_file
|
507
|
-
"#{sitemaps_dir}/sitemap_test_models.xml.gz"
|
358
|
+
def unzipped_first_sitemap_file
|
359
|
+
"#{sitemaps_dir}/sitemap.xml"
|
508
360
|
end
|
509
361
|
|
510
|
-
def
|
511
|
-
"#{sitemaps_dir}/
|
362
|
+
def first_sitemap_file
|
363
|
+
"#{sitemaps_dir}/sitemap.xml.gz"
|
512
364
|
end
|
513
365
|
|
514
|
-
def
|
515
|
-
"#{sitemaps_dir}/
|
366
|
+
def second_sitemap_file
|
367
|
+
"#{sitemaps_dir}/sitemap_1.xml.gz"
|
516
368
|
end
|
517
369
|
|
518
|
-
def
|
519
|
-
"#{sitemaps_dir}/
|
370
|
+
def third_sitemap_file
|
371
|
+
"#{sitemaps_dir}/sitemap_2.xml.gz"
|
520
372
|
end
|
521
373
|
|
522
374
|
def sitemaps_dir
|
523
|
-
|
375
|
+
tmp_dir
|
524
376
|
end
|
525
377
|
|
526
378
|
def tmp_dir
|