big_sitemap 0.8.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +8 -1
- data/Gemfile.lock +11 -0
- data/History.txt +21 -0
- data/README.rdoc +30 -94
- data/Rakefile +2 -2
- data/VERSION.yml +4 -4
- data/lib/big_sitemap.rb +181 -104
- data/lib/big_sitemap/builder.rb +28 -27
- data/test/big_sitemap_test.rb +152 -300
- data/test/fixtures/test_model.rb +1 -1
- metadata +69 -61
- data/.gitignore +0 -3
- data/big_sitemap.gemspec +0 -58
data/lib/big_sitemap/builder.rb
CHANGED
@@ -14,35 +14,36 @@ class BigSitemap
|
|
14
14
|
@gzip = options.delete(:gzip)
|
15
15
|
@max_urls = options.delete(:max_urls) || MAX_URLS
|
16
16
|
@type = options.delete(:type)
|
17
|
-
@
|
17
|
+
@filepaths = []
|
18
18
|
@parts = options.delete(:start_part_id) || 0
|
19
|
-
@
|
19
|
+
@partial_update = options.delete(:partial_update)
|
20
20
|
|
21
|
-
@filename
|
21
|
+
@filename = options.delete(:filename)
|
22
22
|
@current_filename = nil
|
23
23
|
@tmp_filename = nil
|
24
|
-
@target
|
24
|
+
@target = _get_writer
|
25
25
|
|
26
26
|
@level = 0
|
27
27
|
@opened_tags = []
|
28
28
|
_init_document
|
29
29
|
end
|
30
30
|
|
31
|
-
def add_url!(
|
32
|
-
_rotate(
|
33
|
-
|
31
|
+
def add_url!(location, options={})
|
32
|
+
_rotate(options[:id]) if @max_urls == @urls
|
34
33
|
_open_tag 'url'
|
35
|
-
|
36
|
-
tag! '
|
37
|
-
tag! '
|
38
|
-
tag! '
|
34
|
+
|
35
|
+
tag! 'loc', location
|
36
|
+
tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
|
37
|
+
tag! 'changefreq', options[:change_frequency] || 'weekly'
|
38
|
+
tag! 'priority', options[:priority] if options[:priority]
|
39
|
+
|
39
40
|
_close_tag 'url'
|
40
41
|
|
41
42
|
@urls += 1
|
42
43
|
end
|
43
44
|
|
44
|
-
def
|
45
|
-
@
|
45
|
+
def filepaths!
|
46
|
+
@filepaths
|
46
47
|
end
|
47
48
|
|
48
49
|
def close!
|
@@ -60,7 +61,7 @@ class BigSitemap
|
|
60
61
|
|
61
62
|
def _get_writer
|
62
63
|
filename = @filename.dup
|
63
|
-
filename << "_#{@parts}" if @parts > 0
|
64
|
+
filename << "_#{@parts}" if @parts > 0 && @type != 'index'
|
64
65
|
filename << '.xml'
|
65
66
|
filename << '.gz' if @gzip
|
66
67
|
_open_writer(filename)
|
@@ -69,28 +70,28 @@ class BigSitemap
|
|
69
70
|
def _open_writer(filename)
|
70
71
|
@current_filename = filename
|
71
72
|
@tmp_filename = filename + ".tmp"
|
72
|
-
@
|
73
|
-
file = ::File.open(@tmp_filename, 'w
|
73
|
+
@filepaths << filename
|
74
|
+
file = ::File.open(@tmp_filename, 'w+:ASCII-8BIT')
|
74
75
|
@gzip ? ::Zlib::GzipWriter.new(file) : file
|
75
76
|
end
|
76
77
|
|
77
|
-
def _init_document(
|
78
|
+
def _init_document(name='urlset', attrs=HEADER_ATTRIBUTES)
|
78
79
|
@urls = 0
|
79
80
|
target!.print '<?xml version="1.0" encoding="UTF-8"?>'
|
80
81
|
_newline
|
81
82
|
_open_tag name, attrs
|
82
83
|
end
|
83
84
|
|
84
|
-
def _rotate(part_nr
|
85
|
+
def _rotate(part_nr=nil)
|
85
86
|
# write out the current document and start writing into a new file
|
86
87
|
close!
|
87
|
-
@parts =
|
88
|
+
@parts = part_nr || @parts + 1
|
88
89
|
@target = _get_writer
|
89
90
|
_init_document
|
90
91
|
end
|
91
92
|
|
92
93
|
# opens a tag, bumps up level but doesn't require a block
|
93
|
-
def _open_tag(name, attrs
|
94
|
+
def _open_tag(name, attrs={})
|
94
95
|
_indent
|
95
96
|
_start_tag(name, attrs)
|
96
97
|
_newline
|
@@ -98,8 +99,8 @@ class BigSitemap
|
|
98
99
|
@opened_tags << name
|
99
100
|
end
|
100
101
|
|
101
|
-
def _start_tag(name, attrs
|
102
|
-
attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('')
|
102
|
+
def _start_tag(name, attrs={})
|
103
|
+
attrs = attrs.map { |attr, value| %Q( #{attr}="#{value}") }.join('')
|
103
104
|
target!.print "<#{name}#{attrs}>"
|
104
105
|
end
|
105
106
|
|
@@ -131,12 +132,10 @@ class BigSitemap
|
|
131
132
|
end
|
132
133
|
|
133
134
|
def _indent
|
134
|
-
return if @gzip
|
135
135
|
target!.print " " * @level
|
136
136
|
end
|
137
137
|
|
138
138
|
def _newline
|
139
|
-
return if @gzip
|
140
139
|
target!.puts ''
|
141
140
|
end
|
142
141
|
end
|
@@ -147,10 +146,12 @@ class BigSitemap
|
|
147
146
|
super(name, attrs)
|
148
147
|
end
|
149
148
|
|
150
|
-
def add_url!(
|
149
|
+
def add_url!(location, options={})
|
151
150
|
_open_tag 'sitemap'
|
152
|
-
|
153
|
-
tag! '
|
151
|
+
|
152
|
+
tag! 'loc', location
|
153
|
+
tag! 'lastmod', options[:last_modified].utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if options[:last_modified]
|
154
|
+
|
154
155
|
_close_tag 'sitemap'
|
155
156
|
end
|
156
157
|
end
|
data/test/big_sitemap_test.rb
CHANGED
@@ -31,379 +31,271 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
31
31
|
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
32
32
|
end
|
33
33
|
|
34
|
-
should 'generate
|
35
|
-
|
36
|
-
assert File.exists?
|
34
|
+
should 'generate sitemap index file' do
|
35
|
+
generate_sitemap { add '/foo' }
|
36
|
+
assert File.exists? first_sitemap_file
|
37
37
|
end
|
38
38
|
|
39
|
-
should 'generate
|
40
|
-
|
41
|
-
|
42
|
-
@sitemap.generate
|
43
|
-
assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
|
39
|
+
should 'generate static file' do
|
40
|
+
generate_sitemap { add '/foo' }
|
41
|
+
assert File.exists? first_sitemap_file
|
44
42
|
end
|
45
43
|
|
46
|
-
should '
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
should 'generate two sitemap model files for the same model with different options' do
|
54
|
-
create_sitemap
|
55
|
-
add_model(:path => 'foo')
|
56
|
-
add_model(:path => 'bar')
|
57
|
-
@sitemap.generate
|
44
|
+
should 'should add paths' do
|
45
|
+
generate_sitemap do
|
46
|
+
add '/', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
|
47
|
+
add '/about', {:last_modified => Time.now, :change_frequency => 'weekly', :priority => 0.5}
|
48
|
+
end
|
58
49
|
|
59
|
-
|
60
|
-
|
50
|
+
elems = elements first_sitemap_file, 'loc'
|
51
|
+
assert_equal 'http://example.com/', elems.first.text
|
52
|
+
assert_equal 'http://example.com/about', elems.last.text
|
61
53
|
end
|
62
54
|
|
63
55
|
context 'Sitemap index file' do
|
64
56
|
should 'contain one sitemapindex element' do
|
65
|
-
|
57
|
+
generate_sitemap { add '/' }
|
66
58
|
assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
|
67
59
|
end
|
68
60
|
|
69
61
|
should 'contain one sitemap element' do
|
70
|
-
|
62
|
+
generate_sitemap { add '/' }
|
71
63
|
assert_equal 1, num_elements(sitemaps_index_file, 'sitemap')
|
72
64
|
end
|
73
65
|
|
74
66
|
should 'contain one loc element' do
|
75
|
-
|
67
|
+
generate_sitemap { add '/' }
|
76
68
|
assert_equal 1, num_elements(sitemaps_index_file, 'loc')
|
77
69
|
end
|
78
70
|
|
79
71
|
should 'contain one lastmod element' do
|
80
|
-
|
72
|
+
generate_sitemap { add '/' }
|
81
73
|
assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
|
82
74
|
end
|
83
75
|
|
84
76
|
should 'contain two loc elements' do
|
85
|
-
|
77
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
78
|
+
4.times { add '/' }
|
79
|
+
end
|
80
|
+
|
86
81
|
assert_equal 2, num_elements(sitemaps_index_file, 'loc')
|
87
82
|
end
|
88
83
|
|
89
84
|
should 'contain two lastmod elements' do
|
90
|
-
|
85
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
86
|
+
4.times { add '/' }
|
87
|
+
end
|
88
|
+
|
91
89
|
assert_equal 2, num_elements(sitemaps_index_file, 'lastmod')
|
92
90
|
end
|
93
91
|
|
94
92
|
should 'not be gzipped' do
|
95
|
-
|
93
|
+
generate_sitemap(:gzip => false) { add '/' }
|
96
94
|
assert File.exists?(unzipped_sitemaps_index_file)
|
97
95
|
end
|
98
96
|
end
|
99
97
|
|
100
|
-
context 'Sitemap
|
98
|
+
context 'Sitemap file' do
|
101
99
|
should 'contain one urlset element' do
|
102
|
-
|
103
|
-
assert_equal 1, num_elements(
|
100
|
+
generate_sitemap { add '/' }
|
101
|
+
assert_equal 1, num_elements(first_sitemap_file, 'urlset')
|
104
102
|
end
|
105
103
|
|
106
104
|
should 'contain several loc elements' do
|
107
|
-
|
108
|
-
|
105
|
+
generate_sitemap do
|
106
|
+
3.times { add '/' }
|
107
|
+
end
|
108
|
+
|
109
|
+
assert_equal 3, num_elements(first_sitemap_file, 'loc')
|
109
110
|
end
|
110
111
|
|
111
112
|
should 'contain several lastmod elements' do
|
112
|
-
|
113
|
-
|
113
|
+
generate_sitemap do
|
114
|
+
3.times { add '/', :last_modified => Time.now }
|
115
|
+
end
|
116
|
+
|
117
|
+
assert_equal 3, num_elements(first_sitemap_file, 'lastmod')
|
114
118
|
end
|
115
119
|
|
116
120
|
should 'contain several changefreq elements' do
|
117
|
-
|
118
|
-
|
121
|
+
generate_sitemap do
|
122
|
+
3.times { add '/' }
|
123
|
+
end
|
124
|
+
|
125
|
+
assert_equal 3, num_elements(first_sitemap_file, 'changefreq')
|
119
126
|
end
|
120
127
|
|
121
128
|
should 'contain several priority elements' do
|
122
|
-
|
123
|
-
|
129
|
+
generate_sitemap do
|
130
|
+
3.times { add '/', :priority => 0.2 }
|
131
|
+
end
|
132
|
+
|
133
|
+
assert_equal 3, num_elements(first_sitemap_file, 'priority')
|
124
134
|
end
|
125
135
|
|
126
136
|
should 'have a change frequency of weekly by default' do
|
127
|
-
|
128
|
-
|
129
|
-
|
137
|
+
generate_sitemap do
|
138
|
+
3.times { add '/' }
|
139
|
+
end
|
130
140
|
|
131
|
-
|
132
|
-
generate_one_sitemap_model_file(:change_frequency => 'daily')
|
133
|
-
assert_equal 'daily', elements(first_sitemaps_model_file, 'changefreq').first.text
|
141
|
+
assert_equal 'weekly', elements(first_sitemap_file, 'changefreq').first.text
|
134
142
|
end
|
135
143
|
|
136
|
-
should '
|
137
|
-
|
138
|
-
assert_equal
|
144
|
+
should 'have a change frequency of daily' do
|
145
|
+
generate_sitemap { add '/', :change_frequency => 'daily' }
|
146
|
+
assert_equal 'daily', elements(first_sitemap_file, 'changefreq').first.text
|
139
147
|
end
|
140
148
|
|
141
149
|
should 'have a priority of 0.2' do
|
142
|
-
|
143
|
-
assert_equal '0.2', elements(
|
144
|
-
end
|
145
|
-
|
146
|
-
should 'be able to use a lambda to specify priority' do
|
147
|
-
generate_one_sitemap_model_file(:priority => lambda {|m| m.priority})
|
148
|
-
assert_equal TestModel.new.priority.to_s, elements(first_sitemaps_model_file, 'priority').first.text
|
149
|
-
end
|
150
|
-
|
151
|
-
should 'be able to use a lambda to specify lastmod' do
|
152
|
-
generate_one_sitemap_model_file(:last_modified => lambda {|m| m.updated_at})
|
153
|
-
assert_equal TestModel.new.updated_at.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00'), elements(first_sitemaps_model_file, 'lastmod').first.text
|
150
|
+
generate_sitemap { add '/', :priority => 0.2 }
|
151
|
+
assert_equal '0.2', elements(first_sitemap_file, 'priority').first.text
|
154
152
|
end
|
155
153
|
|
156
154
|
should 'contain two loc element' do
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
end
|
155
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
156
|
+
4.times { add '/' }
|
157
|
+
end
|
161
158
|
|
162
|
-
|
163
|
-
|
164
|
-
assert_equal 2, num_elements(first_sitemaps_model_file, 'lastmod')
|
165
|
-
assert_equal 2, num_elements(second_sitemaps_model_file, 'lastmod')
|
159
|
+
assert_equal 2, num_elements(first_sitemap_file, 'loc')
|
160
|
+
assert_equal 2, num_elements(second_sitemap_file, 'loc')
|
166
161
|
end
|
167
162
|
|
168
163
|
should 'contain two changefreq elements' do
|
169
|
-
|
170
|
-
|
171
|
-
|
164
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
165
|
+
4.times { add '/' }
|
166
|
+
end
|
167
|
+
|
168
|
+
assert_equal 2, num_elements(first_sitemap_file, 'changefreq')
|
169
|
+
assert_equal 2, num_elements(second_sitemap_file, 'changefreq')
|
172
170
|
end
|
173
171
|
|
174
172
|
should 'contain two priority element' do
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
end
|
173
|
+
generate_sitemap(:max_per_sitemap => 2) do
|
174
|
+
4.times { add '/', :priority => 0.2 }
|
175
|
+
end
|
179
176
|
|
180
|
-
|
181
|
-
|
182
|
-
add_model
|
183
|
-
@sitemap.add(TestModel, :path => '/test_controller').generate
|
184
|
-
assert(
|
185
|
-
!elements(first_sitemaps_model_file, 'loc').first.text.match(/\/\/test_controller\//),
|
186
|
-
'URL does not contain a double-slash before the controller path'
|
187
|
-
)
|
177
|
+
assert_equal 2, num_elements(first_sitemap_file, 'priority')
|
178
|
+
assert_equal 2, num_elements(second_sitemap_file, 'priority')
|
188
179
|
end
|
189
180
|
|
190
181
|
should 'not be gzipped' do
|
191
|
-
|
192
|
-
assert File.exists?(
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
context 'add method' do
|
197
|
-
should 'be chainable' do
|
198
|
-
create_sitemap
|
199
|
-
assert_equal BigSitemap, @sitemap.add(TestModel).class
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
context 'add static method' do
|
204
|
-
should 'should generate static content' do
|
205
|
-
create_sitemap
|
206
|
-
@sitemap.add_static('/', Time.now, 'weekly', 0.5)
|
207
|
-
@sitemap.add_static('/about', Time.now, 'weekly', 0.5)
|
208
|
-
@sitemap.generate_static
|
209
|
-
elems = elements(static_sitemaps_file, 'loc')
|
210
|
-
assert_equal "/", elems.first.text
|
211
|
-
assert_equal "/about", elems.last.text
|
182
|
+
generate_sitemap(:gzip => false) { add '/' }
|
183
|
+
assert File.exists?(unzipped_first_sitemap_file)
|
212
184
|
end
|
213
185
|
end
|
214
186
|
|
215
187
|
context 'sanatize XML chars' do
|
216
188
|
should 'should transform ampersands' do
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
assert_equal "/something&else", elems.first.text
|
189
|
+
generate_sitemap { add '/something&else' }
|
190
|
+
elems = elements(first_sitemap_file, 'loc')
|
191
|
+
|
192
|
+
assert Zlib::GzipReader.open(first_sitemap_file).read.include?("/something&else")
|
193
|
+
assert_equal 'http://example.com/something&else', elems.first.text
|
223
194
|
end
|
224
195
|
end
|
225
196
|
|
226
197
|
context 'clean method' do
|
227
198
|
should 'be chainable' do
|
228
|
-
|
229
|
-
assert_equal BigSitemap,
|
199
|
+
sitemap = generate_sitemap { add '/' }
|
200
|
+
assert_equal BigSitemap, sitemap.clean.class
|
230
201
|
end
|
231
202
|
|
232
203
|
should 'clean all sitemap files' do
|
233
|
-
|
234
|
-
assert Dir
|
235
|
-
|
236
|
-
assert_equal
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
context 'generate method' do
|
241
|
-
should 'be chainable' do
|
242
|
-
create_sitemap
|
243
|
-
assert_equal BigSitemap, @sitemap.generate.class
|
204
|
+
sitemap = generate_sitemap { add '/' }
|
205
|
+
assert Dir["#{sitemaps_dir}/sitemap*"].size > 0, "#{sitemaps_dir} has sitemap files"
|
206
|
+
sitemap.clean
|
207
|
+
assert_equal 0, Dir["#{sitemaps_dir}/sitemap*"].size, "#{sitemaps_dir} is empty of sitemap files"
|
244
208
|
end
|
245
209
|
end
|
246
210
|
|
247
211
|
context 'sitemap index' do
|
248
212
|
should 'generate for all xml files in directory' do
|
249
|
-
|
250
|
-
@sitemap.clean
|
213
|
+
sitemap = generate_sitemap {}
|
251
214
|
File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
|
252
215
|
File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
|
253
216
|
File.open("#{sitemaps_dir}/sitemap_file3.txt", 'w')
|
254
217
|
File.open("#{sitemaps_dir}/file4.xml", 'w')
|
255
218
|
File.open(unzipped_sitemaps_index_file, 'w')
|
256
|
-
|
219
|
+
sitemap.send :generate_sitemap_index
|
257
220
|
|
258
221
|
elem = elements(sitemaps_index_file, 'loc')
|
259
222
|
assert_equal 2, elem.size #no index and file3 and file4 found
|
260
|
-
assert_equal "http://example.com/
|
261
|
-
assert_equal "http://example.com/
|
223
|
+
assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
|
224
|
+
assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
|
262
225
|
end
|
263
226
|
|
264
227
|
should 'generate for all for given file' do
|
265
|
-
|
266
|
-
@sitemap.clean
|
228
|
+
sitemap = generate_sitemap {}
|
267
229
|
File.open("#{sitemaps_dir}/sitemap_file1.xml", 'w')
|
268
230
|
File.open("#{sitemaps_dir}/sitemap_file2.xml.gz", 'w')
|
269
231
|
files = ["#{sitemaps_dir}/sitemap_file1.xml", "#{sitemaps_dir}/sitemap_file2.xml.gz"]
|
270
|
-
|
232
|
+
sitemap.send :generate_sitemap_index, files
|
271
233
|
|
272
234
|
elem = elements(sitemaps_index_file, 'loc')
|
273
235
|
assert_equal 2, elem.size
|
274
|
-
assert_equal "http://example.com/
|
275
|
-
assert_equal "http://example.com/
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
|
-
context 'get_last_id' do
|
280
|
-
should 'return last id' do
|
281
|
-
create_sitemap.clean
|
282
|
-
filename = "#{sitemaps_dir}/sitemap_file"
|
283
|
-
File.open("#{filename}_1.xml", 'w')
|
284
|
-
File.open("#{filename}_23.xml", 'w')
|
285
|
-
File.open("#{filename}_42.xml.gz", 'w')
|
286
|
-
File.open("#{filename}_9.xml", 'w')
|
287
|
-
assert_equal 42, @sitemap.send(:get_last_id, filename)
|
288
|
-
end
|
289
|
-
|
290
|
-
should 'return nil' do
|
291
|
-
create_sitemap.clean
|
292
|
-
filename = "#{sitemaps_dir}/sitemap_file"
|
293
|
-
assert_equal nil, @sitemap.send(:get_last_id, filename)
|
236
|
+
assert_equal "http://example.com/sitemap_file1.xml", elem.first.text
|
237
|
+
assert_equal "http://example.com/sitemap_file2.xml.gz", elem.last.text
|
294
238
|
end
|
295
239
|
end
|
296
240
|
|
297
241
|
context 'partial update' do
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
add_model(:num_items => 50) #TestModel
|
305
|
-
|
306
|
-
File.open("#{filename}_23.xml", 'w')
|
307
|
-
assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
|
308
|
-
|
309
|
-
File.open("#{filename}_42.xml", 'w')
|
310
|
-
assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
|
242
|
+
should 'not recreate old files' do
|
243
|
+
# The first run should generate all the files
|
244
|
+
generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
|
245
|
+
[10, 20, 30, 40, 50].each do |i|
|
246
|
+
add "/#{i}", :id => i
|
247
|
+
end
|
311
248
|
end
|
312
249
|
|
313
|
-
|
314
|
-
filename = "#{sitemaps_dir}/sitemap_test_models"
|
250
|
+
filename = "#{sitemaps_dir}/sitemap"
|
315
251
|
|
316
|
-
|
317
|
-
|
252
|
+
assert File.exists? "#{filename}.xml" # ids 10 and 20
|
253
|
+
assert File.exists? "#{filename}_30.xml" # ids 30 and 40
|
254
|
+
assert File.exists? "#{filename}_50.xml" # id 50
|
318
255
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
end
|
256
|
+
# Move the files so we can test if they are re-created
|
257
|
+
FileUtils.mv "#{filename}.xml", "#{filename}.bak.xml"
|
258
|
+
FileUtils.mv "#{filename}_30.xml", "#{filename}_30.bak.xml"
|
323
259
|
|
324
|
-
|
325
|
-
|
326
|
-
filename = "#{sitemaps_dir}/sitemap_test_models"
|
260
|
+
# Store the original file size so we can compare it later
|
261
|
+
original_size = File.size "#{filename}_50.xml"
|
327
262
|
|
328
|
-
|
329
|
-
add_model(:num_items => 50 - last_id) #TestModel
|
263
|
+
start_id = nil
|
330
264
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
File.open("#{filename}_23.xml", 'w')
|
335
|
-
File.open("#{filename}_#{last_id}.xml", 'w')
|
336
|
-
@sitemap.generate
|
265
|
+
# Run a new update starting from the first ID of the last sitemap
|
266
|
+
generate_sitemap(:max_per_sitemap => 2, :partial_update => true, :gzip => false) do
|
267
|
+
start_id = first_id_of_last_sitemap
|
337
268
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
assert File.exists?("#{filename}_#{last_id}.xml")
|
342
|
-
elems = elements("#{filename}_#{last_id}.xml", 'loc').map(&:text)
|
343
|
-
|
344
|
-
assert_equal 5, elems.size
|
345
|
-
(28..32).each do |i|
|
346
|
-
assert elems.include? "http://example.com/test_models/#{i}"
|
269
|
+
[50, 60, 70, 80].each do |i|
|
270
|
+
add "/#{i}", :id => i
|
271
|
+
end
|
347
272
|
end
|
348
273
|
|
349
|
-
|
350
|
-
|
351
|
-
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_9.xml"
|
352
|
-
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_#{last_id}.xml"
|
353
|
-
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_48.xml"
|
354
|
-
end
|
355
|
-
|
356
|
-
should 'generate sitemap, update should respect old files' do
|
357
|
-
max_id = 23
|
358
|
-
TestModel.current_id = 0
|
359
|
-
filename = "#{sitemaps_dir}/sitemap_test_models"
|
360
|
-
|
361
|
-
create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
|
362
|
-
add_model(:num_items => max_id) #TestModel
|
363
|
-
@sitemap.generate
|
364
|
-
|
365
|
-
# Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
|
274
|
+
# Check the correct ID is returned for the beginning of the last sitemap
|
275
|
+
assert_equal 50, start_id
|
366
276
|
|
367
|
-
|
368
|
-
|
369
|
-
|
277
|
+
# Since we did a partial update, the earlier files shouldn't have been recreated
|
278
|
+
assert !File.exists?("#{filename}.xml") # ids 10 and 20
|
279
|
+
assert !File.exists?("#{filename}_30.xml") # ids 30 and 40
|
370
280
|
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
@sitemap.generate
|
281
|
+
# The last file of the first run should have been recreated with new records
|
282
|
+
# and a larger file size
|
283
|
+
assert (original_size < File.size?("#{filename}_50.xml"))
|
375
284
|
|
376
|
-
|
377
|
-
assert_equal 5, elements("#{filename}_21.xml", 'loc').size
|
285
|
+
elems = elements("#{filename}_50.xml", 'loc').map(&:text)
|
378
286
|
|
379
|
-
|
287
|
+
assert_equal 2, elems.size
|
380
288
|
|
381
|
-
|
382
|
-
|
383
|
-
assert elems.include? "http://example.com/test_models/#{i}"
|
289
|
+
[50, 60].each do |i|
|
290
|
+
assert elems.include? "http://example.com/#{i}"
|
384
291
|
end
|
385
292
|
|
386
|
-
|
387
|
-
|
388
|
-
assert_equal 3, elements("#{filename}_46.xml", 'loc').size
|
389
|
-
end
|
390
|
-
|
391
|
-
context 'escape' do
|
392
|
-
should 'add if not number' do
|
393
|
-
create_sitemap
|
394
|
-
data = {
|
395
|
-
42 => 42,
|
396
|
-
'23' => 23,
|
397
|
-
"test" => "'test'",
|
398
|
-
"test10" => "'test10'",
|
399
|
-
"10test" => "'10test'",
|
400
|
-
"10t' est" => "'10t\\' est'",
|
401
|
-
}
|
402
|
-
data.each do |key, value|
|
403
|
-
assert_equal value, @sitemap.send(:escape_if_string, key)
|
404
|
-
end
|
293
|
+
elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
|
405
294
|
|
406
|
-
|
295
|
+
assert elems.include? 'http://example.com/sitemap.bak.xml'
|
296
|
+
assert elems.include? 'http://example.com/sitemap_30.bak.xml'
|
297
|
+
assert elems.include? 'http://example.com/sitemap_50.xml'
|
298
|
+
assert elems.include? 'http://example.com/sitemap_70.xml'
|
407
299
|
end
|
408
300
|
|
409
301
|
context 'lockfile' do
|
@@ -411,10 +303,10 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
411
303
|
sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
412
304
|
|
413
305
|
sitemap.with_lock do
|
414
|
-
assert File.exists?(
|
306
|
+
assert File.exists?("#{sitemaps_dir}/generator.lock")
|
415
307
|
end
|
416
308
|
|
417
|
-
assert !File.exists?(
|
309
|
+
assert !File.exists?("#{sitemaps_dir}/generator.lock")
|
418
310
|
end
|
419
311
|
|
420
312
|
should 'not catch error not related to lock' do
|
@@ -425,7 +317,6 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
425
317
|
raise "Wrong"
|
426
318
|
end
|
427
319
|
end
|
428
|
-
|
429
320
|
end
|
430
321
|
|
431
322
|
should 'throw error if lock exits' do
|
@@ -442,53 +333,18 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
442
333
|
|
443
334
|
end
|
444
335
|
end
|
445
|
-
|
446
336
|
end
|
447
337
|
end
|
448
338
|
|
449
339
|
private
|
450
|
-
def
|
451
|
-
|
452
|
-
end
|
453
|
-
|
454
|
-
def create_sitemap(options={})
|
455
|
-
@sitemap = BigSitemap.new({
|
456
|
-
:base_url => 'http://example.com',
|
457
|
-
:document_root => tmp_dir,
|
458
|
-
:ping_google => false
|
459
|
-
}.update(options))
|
460
|
-
end
|
461
|
-
|
462
|
-
def generate_sitemap_files(options={})
|
463
|
-
create_sitemap(options)
|
464
|
-
add_model
|
465
|
-
@sitemap.generate
|
340
|
+
def generate_sitemap(options={}, &block)
|
341
|
+
BigSitemap.generate(options.merge(:base_url => 'http://example.com', :document_root => tmp_dir), &block)
|
466
342
|
end
|
467
343
|
|
468
|
-
def
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
add_model(:change_frequency => change_frequency, :priority => priority)
|
473
|
-
@sitemap.generate
|
474
|
-
end
|
475
|
-
|
476
|
-
def generate_two_model_sitemap_files(options={})
|
477
|
-
change_frequency = options.delete(:change_frequency)
|
478
|
-
priority = options.delete(:priority)
|
479
|
-
create_sitemap(options.merge(:max_per_sitemap => 2, :batch_size => 1))
|
480
|
-
add_model(:num_items => 4, :change_frequency => change_frequency, :priority => priority)
|
481
|
-
@sitemap.generate
|
482
|
-
end
|
483
|
-
|
484
|
-
def add_model(options={})
|
485
|
-
num_items = options.delete(:num_items) || default_num_items
|
486
|
-
TestModel.stubs(:count_for_sitemap).returns(num_items)
|
487
|
-
@sitemap.add(TestModel, options)
|
488
|
-
end
|
489
|
-
|
490
|
-
def default_num_items
|
491
|
-
10
|
344
|
+
def delete_tmp_files
|
345
|
+
Dir["#{sitemaps_dir}/sitemap*"].each do |f|
|
346
|
+
FileUtils.rm_rf f
|
347
|
+
end
|
492
348
|
end
|
493
349
|
|
494
350
|
def sitemaps_index_file
|
@@ -499,28 +355,24 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
499
355
|
"#{sitemaps_dir}/sitemap_index.xml"
|
500
356
|
end
|
501
357
|
|
502
|
-
def
|
503
|
-
"#{sitemaps_dir}/
|
504
|
-
end
|
505
|
-
|
506
|
-
def first_sitemaps_model_file
|
507
|
-
"#{sitemaps_dir}/sitemap_test_models.xml.gz"
|
358
|
+
def unzipped_first_sitemap_file
|
359
|
+
"#{sitemaps_dir}/sitemap.xml"
|
508
360
|
end
|
509
361
|
|
510
|
-
def
|
511
|
-
"#{sitemaps_dir}/
|
362
|
+
def first_sitemap_file
|
363
|
+
"#{sitemaps_dir}/sitemap.xml.gz"
|
512
364
|
end
|
513
365
|
|
514
|
-
def
|
515
|
-
"#{sitemaps_dir}/
|
366
|
+
def second_sitemap_file
|
367
|
+
"#{sitemaps_dir}/sitemap_1.xml.gz"
|
516
368
|
end
|
517
369
|
|
518
|
-
def
|
519
|
-
"#{sitemaps_dir}/
|
370
|
+
def third_sitemap_file
|
371
|
+
"#{sitemaps_dir}/sitemap_2.xml.gz"
|
520
372
|
end
|
521
373
|
|
522
374
|
def sitemaps_dir
|
523
|
-
|
375
|
+
tmp_dir
|
524
376
|
end
|
525
377
|
|
526
378
|
def tmp_dir
|