sc-big_sitemap 0.8.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/Gemfile.lock +23 -0
- data/History.txt +88 -0
- data/LICENSE +22 -0
- data/README.rdoc +179 -0
- data/Rakefile +69 -0
- data/VERSION +1 -0
- data/lib/big_sitemap.rb +367 -0
- data/lib/big_sitemap/builder.rb +184 -0
- data/sc-big_sitemap.gemspec +22 -0
- data/test/big_sitemap_test.rb +485 -0
- data/test/fixtures/test_model.rb +48 -0
- data/test/test_helper.rb +117 -0
- metadata +114 -0
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'zlib'
|
3
|
+
|
4
|
+
class BigSitemap
|
5
|
+
class Builder
|
6
|
+
MAX_URLS = 50000
|
7
|
+
HEADER_ATTRIBUTES = {
|
8
|
+
'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9',
|
9
|
+
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
|
10
|
+
'xsi:schemaLocation' => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
11
|
+
}
|
12
|
+
|
13
|
+
def initialize(options)
|
14
|
+
@gzip = options.delete(:gzip)
|
15
|
+
@max_urls = options.delete(:max_urls) || MAX_URLS
|
16
|
+
@type = options.delete(:type)
|
17
|
+
@paths = []
|
18
|
+
@parts = options.delete(:start_part_id) || 0
|
19
|
+
@custom_part_nr = options.delete(:partial_update)
|
20
|
+
|
21
|
+
@filename = options.delete(:filename)
|
22
|
+
@current_filename = nil
|
23
|
+
@tmp_filename = nil
|
24
|
+
@target = _get_writer
|
25
|
+
|
26
|
+
@level = 0
|
27
|
+
@opened_tags = []
|
28
|
+
_init_document
|
29
|
+
end
|
30
|
+
|
31
|
+
def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil, extras = {})
|
32
|
+
_rotate(part_nr) if @max_urls == @urls
|
33
|
+
|
34
|
+
time = time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
|
35
|
+
_url_tag('url', extras.merge(:loc => url, :lastmod => time, :changefreq => frequency, :priority => priority))
|
36
|
+
|
37
|
+
@urls += 1
|
38
|
+
end
|
39
|
+
|
40
|
+
def paths!
|
41
|
+
@paths
|
42
|
+
end
|
43
|
+
|
44
|
+
def close!
|
45
|
+
_close_document
|
46
|
+
target!.close if target!.respond_to?(:close)
|
47
|
+
File.delete(@current_filename) if File.exists?(@current_filename)
|
48
|
+
File.rename(@tmp_filename, @current_filename)
|
49
|
+
end
|
50
|
+
|
51
|
+
def target!
|
52
|
+
@target
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def _get_writer
|
58
|
+
filename = @filename.dup
|
59
|
+
filename << "_#{@parts}" if @parts > 0
|
60
|
+
filename << '.xml'
|
61
|
+
filename << '.gz' if @gzip
|
62
|
+
_open_writer(filename)
|
63
|
+
end
|
64
|
+
|
65
|
+
def _open_writer(filename)
|
66
|
+
@current_filename = filename
|
67
|
+
@tmp_filename = filename + ".tmp"
|
68
|
+
@paths << filename
|
69
|
+
file = ::File.open(@tmp_filename, 'w+')
|
70
|
+
@gzip ? ::Zlib::GzipWriter.new(file) : file
|
71
|
+
end
|
72
|
+
|
73
|
+
def _init_document( name = 'urlset', attrs = HEADER_ATTRIBUTES)
|
74
|
+
@urls = 0
|
75
|
+
target!.print '<?xml version="1.0" encoding="UTF-8"?>'
|
76
|
+
_newline
|
77
|
+
_open_tag name, attrs
|
78
|
+
end
|
79
|
+
|
80
|
+
def _url_tag(name, values = {})
|
81
|
+
_open_tag name
|
82
|
+
values.each do |key, value|
|
83
|
+
tag! key, value if value
|
84
|
+
end
|
85
|
+
_close_tag name
|
86
|
+
end
|
87
|
+
|
88
|
+
def _rotate(part_nr = nil)
|
89
|
+
# write out the current document and start writing into a new file
|
90
|
+
close!
|
91
|
+
@parts = (part_nr && @custom_part_nr) ? part_nr : @parts + 1
|
92
|
+
@target = _get_writer
|
93
|
+
_init_document
|
94
|
+
end
|
95
|
+
|
96
|
+
# opens a tag, bumps up level but doesn't require a block
|
97
|
+
def _open_tag(name, attrs = {})
|
98
|
+
_indent
|
99
|
+
_start_tag(name, attrs)
|
100
|
+
_newline
|
101
|
+
@level += 1
|
102
|
+
@opened_tags << name
|
103
|
+
end
|
104
|
+
|
105
|
+
def _start_tag(name, attrs = {}, single = false)
|
106
|
+
attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('')
|
107
|
+
target!.print "<#{name}#{attrs}#{'/' if single}>"
|
108
|
+
end
|
109
|
+
|
110
|
+
def tag!(name, content, attrs = {})
|
111
|
+
_indent
|
112
|
+
_start_tag(name, attrs, content == "")
|
113
|
+
if content != ""
|
114
|
+
target!.print content.to_s.gsub('&', '&')
|
115
|
+
_end_tag(name)
|
116
|
+
end
|
117
|
+
_newline
|
118
|
+
end
|
119
|
+
|
120
|
+
def _end_tag(name)
|
121
|
+
target!.print "</#{name}>"
|
122
|
+
end
|
123
|
+
|
124
|
+
# closes a tag block by decreasing the level and inserting a close tag
|
125
|
+
def _close_tag(name)
|
126
|
+
@opened_tags.pop
|
127
|
+
@level -= 1
|
128
|
+
_indent
|
129
|
+
_end_tag(name)
|
130
|
+
_newline
|
131
|
+
end
|
132
|
+
|
133
|
+
def _close_document
|
134
|
+
for name in @opened_tags.reverse
|
135
|
+
_close_tag(name)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def _indent
|
140
|
+
return if @gzip
|
141
|
+
target!.print " " * @level
|
142
|
+
end
|
143
|
+
|
144
|
+
def _newline
|
145
|
+
return if @gzip
|
146
|
+
target!.puts ''
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
class IndexBuilder < Builder
|
151
|
+
def _init_document(name = 'sitemapindex', attrs = {'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9'})
|
152
|
+
#attrs.merge('xmlns:geo' => "http://www.google.com/geo/schemas/sitemap/1.0")
|
153
|
+
super(name, attrs)
|
154
|
+
end
|
155
|
+
|
156
|
+
def add_url!(url, time = nil)
|
157
|
+
time = time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
|
158
|
+
_url_tag('sitemap', :loc => url, :lastmod => time)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
class MobileBuilder < Builder
|
163
|
+
|
164
|
+
def _init_document(name = 'urlset', attrs = HEADER_ATTRIBUTES)
|
165
|
+
super(name, attrs.merge('xmlns:mobile' => "http://www.google.com/schemas/sitemap-mobile/1.0"))
|
166
|
+
end
|
167
|
+
|
168
|
+
def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil, extras = {})
|
169
|
+
super(url, time, frequency, priority, part_nr, extras.merge('mobile:mobile' => ""))
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
class GeoBuilder < Builder
|
175
|
+
#_build_geo if @geo
|
176
|
+
|
177
|
+
# def _build_geo
|
178
|
+
# geo :geo do
|
179
|
+
# geo :format, 'kml'
|
180
|
+
# end
|
181
|
+
# end
|
182
|
+
end
|
183
|
+
|
184
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "sc-big_sitemap"
|
6
|
+
s.version = File.read('VERSION').strip
|
7
|
+
s.authors = ["Alex Rabarts", "Tobias Bielohlawek"]
|
8
|
+
s.email = ["alexrabarts@gmail.com", "tobi@soundcloud.com"]
|
9
|
+
s.homepage = %q{http://github.com/alexrabarts/big_sitemap}
|
10
|
+
s.summary = %q{A Sitemap generator specifically designed for large sites (although it works equally well with small sites)}
|
11
|
+
s.description = %q{BigSitemap is a Sitemapgenerator suitable for applications with greater than 50,000 URLs. It splits large Sitemaps into multiple files, gzips the files to minimize bandwidth usage, batches database queries to minimize memory usage, supports increment updates, can be set up with just a few lines of code and is compatible with just about any framework.}
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
|
18
|
+
["bundler", "shoulda", "mocha", "nokogiri"].each do |gem|
|
19
|
+
s.add_development_dependency *gem.split(' ')
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,485 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class BigSitemapTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
delete_tmp_files
|
7
|
+
end
|
8
|
+
|
9
|
+
def teardown
|
10
|
+
delete_tmp_files
|
11
|
+
end
|
12
|
+
|
13
|
+
should 'raise an error if the :base_url option is not specified' do
|
14
|
+
assert_nothing_raised { BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir) }
|
15
|
+
assert_raise(ArgumentError) { BigSitemap.new(:document_root => tmp_dir) }
|
16
|
+
end
|
17
|
+
|
18
|
+
should 'generate the same base URL with :base_url option' do
|
19
|
+
options = {:document_root => tmp_dir}
|
20
|
+
url = 'http://example.com'
|
21
|
+
sitemap = BigSitemap.new(options.merge(:base_url => url))
|
22
|
+
|
23
|
+
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
24
|
+
end
|
25
|
+
|
26
|
+
should 'generate the same base URL with :url_options option' do
|
27
|
+
options = {:document_root => tmp_dir}
|
28
|
+
url = 'http://example.com'
|
29
|
+
sitemap = BigSitemap.new(options.merge(:url_options => {:host => 'example.com'}))
|
30
|
+
|
31
|
+
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
32
|
+
end
|
33
|
+
|
34
|
+
should 'generate a sitemap index file' do
|
35
|
+
generate_sitemap_files
|
36
|
+
assert File.exists?(sitemaps_index_file)
|
37
|
+
end
|
38
|
+
|
39
|
+
should 'generate a single sitemap model file' do
|
40
|
+
create_sitemap
|
41
|
+
add_model
|
42
|
+
@sitemap.generate
|
43
|
+
assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
|
44
|
+
end
|
45
|
+
|
46
|
+
should 'generate two sitemap model files' do
|
47
|
+
generate_two_model_sitemap_files
|
48
|
+
assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
|
49
|
+
assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
|
50
|
+
assert !File.exists?(third_sitemaps_model_file), "#{third_sitemaps_model_file} does not exist"
|
51
|
+
end
|
52
|
+
|
53
|
+
should 'generate two sitemap model files for the same model with different options' do
|
54
|
+
create_sitemap
|
55
|
+
add_model(:path => 'foo')
|
56
|
+
add_model(:path => 'bar')
|
57
|
+
@sitemap.generate
|
58
|
+
|
59
|
+
assert File.exists?(first_sitemaps_model_file), "#{first_sitemaps_model_file} exists"
|
60
|
+
assert File.exists?(second_sitemaps_model_file), "#{second_sitemaps_model_file} exists"
|
61
|
+
end
|
62
|
+
|
63
|
+
should 'generate with absolute url_path' do
|
64
|
+
url = 'http://example.com'
|
65
|
+
url_path = 'http://external.com'
|
66
|
+
|
67
|
+
options = {:document_root => tmp_dir, :document_path => 'sitemaps', :url_path => url_path}
|
68
|
+
@sitemap = BigSitemap.new(options.merge(:url_options => {:host => 'example.com'}))
|
69
|
+
|
70
|
+
add_model(:path => 'foo')
|
71
|
+
@sitemap.generate
|
72
|
+
|
73
|
+
assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
|
74
|
+
assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
|
75
|
+
|
76
|
+
assert Zlib::GzipReader.open(sitemaps_index_file).read.include?("http://external.com")
|
77
|
+
end
|
78
|
+
|
79
|
+
context 'Sitemap index file' do
|
80
|
+
should 'contain one sitemapindex element' do
|
81
|
+
generate_sitemap_files
|
82
|
+
assert_equal 1, num_elements(sitemaps_index_file, 'sitemapindex')
|
83
|
+
end
|
84
|
+
|
85
|
+
should 'contain one sitemap element' do
|
86
|
+
generate_sitemap_files
|
87
|
+
assert_equal 1, num_elements(sitemaps_index_file, 'sitemap')
|
88
|
+
end
|
89
|
+
|
90
|
+
should 'contain one loc element' do
|
91
|
+
generate_one_sitemap_model_file
|
92
|
+
assert_equal 1, num_elements(sitemaps_index_file, 'loc')
|
93
|
+
end
|
94
|
+
|
95
|
+
should 'contain one lastmod element' do
|
96
|
+
generate_one_sitemap_model_file
|
97
|
+
assert_equal 1, num_elements(sitemaps_index_file, 'lastmod')
|
98
|
+
end
|
99
|
+
|
100
|
+
should 'contain two loc elements' do
|
101
|
+
generate_two_model_sitemap_files
|
102
|
+
assert_equal 2, num_elements(sitemaps_index_file, 'loc')
|
103
|
+
end
|
104
|
+
|
105
|
+
should 'contain two lastmod elements' do
|
106
|
+
generate_two_model_sitemap_files
|
107
|
+
assert_equal 2, num_elements(sitemaps_index_file, 'lastmod')
|
108
|
+
end
|
109
|
+
|
110
|
+
should 'not be gzipped' do
|
111
|
+
generate_sitemap_files(:gzip => false)
|
112
|
+
assert File.exists?(unzipped_sitemaps_index_file)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
context 'Sitemap model file' do
|
117
|
+
should 'contain one urlset element' do
|
118
|
+
generate_one_sitemap_model_file
|
119
|
+
assert_equal 1, num_elements(first_sitemaps_model_file, 'urlset')
|
120
|
+
end
|
121
|
+
|
122
|
+
should 'contain several loc elements' do
|
123
|
+
generate_one_sitemap_model_file
|
124
|
+
assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'loc')
|
125
|
+
end
|
126
|
+
|
127
|
+
should 'contain several lastmod elements' do
|
128
|
+
generate_one_sitemap_model_file
|
129
|
+
assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'lastmod')
|
130
|
+
end
|
131
|
+
|
132
|
+
should 'contain several changefreq elements' do
|
133
|
+
generate_one_sitemap_model_file
|
134
|
+
assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'changefreq')
|
135
|
+
end
|
136
|
+
|
137
|
+
should 'contain several priority elements' do
|
138
|
+
generate_one_sitemap_model_file(:priority => 0.2)
|
139
|
+
assert_equal default_num_items, num_elements(first_sitemaps_model_file, 'priority')
|
140
|
+
end
|
141
|
+
|
142
|
+
should 'have a change frequency of weekly by default' do
|
143
|
+
generate_one_sitemap_model_file
|
144
|
+
assert_equal 'weekly', elements(first_sitemaps_model_file, 'changefreq').first.text
|
145
|
+
end
|
146
|
+
|
147
|
+
should 'have a change frequency of daily' do
|
148
|
+
generate_one_sitemap_model_file(:change_frequency => 'daily')
|
149
|
+
assert_equal 'daily', elements(first_sitemaps_model_file, 'changefreq').first.text
|
150
|
+
end
|
151
|
+
|
152
|
+
should 'be able to use a lambda to specify change frequency' do
|
153
|
+
generate_one_sitemap_model_file(:change_frequency => lambda {|m| m.change_frequency})
|
154
|
+
assert_equal TestModel.new.change_frequency, elements(first_sitemaps_model_file, 'changefreq').first.text
|
155
|
+
end
|
156
|
+
|
157
|
+
should 'have a priority of 0.2' do
|
158
|
+
generate_one_sitemap_model_file(:priority => 0.2)
|
159
|
+
assert_equal '0.2', elements(first_sitemaps_model_file, 'priority').first.text
|
160
|
+
end
|
161
|
+
|
162
|
+
should 'be able to use a lambda to specify priority' do
|
163
|
+
generate_one_sitemap_model_file(:priority => lambda {|m| m.priority})
|
164
|
+
assert_equal TestModel.new.priority.to_s, elements(first_sitemaps_model_file, 'priority').first.text
|
165
|
+
end
|
166
|
+
|
167
|
+
should 'be able to use a lambda to specify lastmod' do
|
168
|
+
generate_one_sitemap_model_file(:last_modified => lambda {|m| m.updated_at})
|
169
|
+
assert_equal TestModel.new.updated_at.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00'), elements(first_sitemaps_model_file, 'lastmod').first.text
|
170
|
+
end
|
171
|
+
|
172
|
+
should 'contain two loc element' do
|
173
|
+
generate_two_model_sitemap_files
|
174
|
+
assert_equal 2, num_elements(first_sitemaps_model_file, 'loc')
|
175
|
+
assert_equal 2, num_elements(second_sitemaps_model_file, 'loc')
|
176
|
+
end
|
177
|
+
|
178
|
+
should 'contain two lastmod element' do
|
179
|
+
generate_two_model_sitemap_files
|
180
|
+
assert_equal 2, num_elements(first_sitemaps_model_file, 'lastmod')
|
181
|
+
assert_equal 2, num_elements(second_sitemaps_model_file, 'lastmod')
|
182
|
+
end
|
183
|
+
|
184
|
+
should 'contain two changefreq elements' do
|
185
|
+
generate_two_model_sitemap_files
|
186
|
+
assert_equal 2, num_elements(first_sitemaps_model_file, 'changefreq')
|
187
|
+
assert_equal 2, num_elements(second_sitemaps_model_file, 'changefreq')
|
188
|
+
end
|
189
|
+
|
190
|
+
should 'contain two priority element' do
|
191
|
+
generate_two_model_sitemap_files(:priority => 0.2)
|
192
|
+
assert_equal 2, num_elements(first_sitemaps_model_file, 'priority')
|
193
|
+
assert_equal 2, num_elements(second_sitemaps_model_file, 'priority')
|
194
|
+
end
|
195
|
+
|
196
|
+
should 'strip leading slashes from controller paths' do
|
197
|
+
create_sitemap
|
198
|
+
add_model
|
199
|
+
@sitemap.add(TestModel, :path => '/test_controller').generate
|
200
|
+
assert(
|
201
|
+
!elements(first_sitemaps_model_file, 'loc').first.text.match(/\/\/test_controller\//),
|
202
|
+
'URL does not contain a double-slash before the controller path'
|
203
|
+
)
|
204
|
+
end
|
205
|
+
|
206
|
+
should 'not be gzipped' do
|
207
|
+
generate_one_sitemap_model_file(:gzip => false)
|
208
|
+
assert File.exists?(unzipped_first_sitemaps_model_file)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
context 'add method' do
|
213
|
+
should 'be chainable' do
|
214
|
+
create_sitemap
|
215
|
+
assert_equal BigSitemap, @sitemap.add(TestModel).class
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
context 'add static method' do
|
220
|
+
should 'should generate static content' do
|
221
|
+
create_sitemap
|
222
|
+
@sitemap.add_static('/', Time.now, 'weekly', 0.5)
|
223
|
+
@sitemap.add_static('/about', Time.now, 'weekly', 0.5)
|
224
|
+
@sitemap.generate_static
|
225
|
+
elems = elements(static_sitemaps_file, 'loc')
|
226
|
+
assert_equal "/", elems.first.text
|
227
|
+
assert_equal "/about", elems.last.text
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
context 'sanatize XML chars' do
|
232
|
+
should 'should transform ampersands' do
|
233
|
+
create_sitemap
|
234
|
+
@sitemap.add_static('/something&else', Time.now, 'weekly', 0.5)
|
235
|
+
@sitemap.generate_static
|
236
|
+
elems = elements(static_sitemaps_file, 'loc')
|
237
|
+
assert Zlib::GzipReader.open(static_sitemaps_file).read.include?("/something&else")
|
238
|
+
assert_equal "/something&else", elems.first.text
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
context 'clean method' do
|
243
|
+
should 'be chainable' do
|
244
|
+
create_sitemap
|
245
|
+
assert_equal BigSitemap, @sitemap.clean.class
|
246
|
+
end
|
247
|
+
|
248
|
+
should 'clean all sitemap files' do
|
249
|
+
generate_sitemap_files
|
250
|
+
assert Dir.entries(sitemaps_dir).size > 2, "#{sitemaps_dir} is not empty" # ['.', '..'].size == 2
|
251
|
+
@sitemap.clean
|
252
|
+
assert_equal 2, Dir.entries(sitemaps_dir).size, "#{sitemaps_dir} is empty"
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
context 'generate method' do
|
257
|
+
should 'be chainable' do
|
258
|
+
create_sitemap
|
259
|
+
assert_equal BigSitemap, @sitemap.generate.class
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
context 'sitemap index' do
|
264
|
+
should 'generate for all xml files in directory' do
|
265
|
+
create_sitemap
|
266
|
+
@sitemap.clean
|
267
|
+
create_files(
|
268
|
+
"#{sitemaps_dir}/sitemap_file1.xml",
|
269
|
+
"#{sitemaps_dir}/sitemap_file2.xml.gz",
|
270
|
+
"#{sitemaps_dir}/sitemap_file3.txt",
|
271
|
+
"#{sitemaps_dir}/file4.xml",
|
272
|
+
unzipped_sitemaps_index_file)
|
273
|
+
@sitemap.send :generate_sitemap_index
|
274
|
+
|
275
|
+
elem = elements(sitemaps_index_file, 'loc')
|
276
|
+
assert_equal 2, elem.size #no index and file3 and file4 found
|
277
|
+
assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
|
278
|
+
assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
|
279
|
+
end
|
280
|
+
|
281
|
+
should 'generate for all for given file' do
|
282
|
+
create_sitemap
|
283
|
+
@sitemap.clean
|
284
|
+
files = ["#{sitemaps_dir}/sitemap_file1.xml", "#{sitemaps_dir}/sitemap_file2.xml.gz"]
|
285
|
+
create_files *files
|
286
|
+
@sitemap.send :generate_sitemap_index, files
|
287
|
+
|
288
|
+
elem = elements(sitemaps_index_file, 'loc')
|
289
|
+
assert_equal 2, elem.size
|
290
|
+
assert_equal "http://example.com/sitemaps/sitemap_file1.xml", elem.first.text
|
291
|
+
assert_equal "http://example.com/sitemaps/sitemap_file2.xml.gz", elem.last.text
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
context 'get_last_id' do
|
296
|
+
should 'return last id' do
|
297
|
+
create_sitemap.clean
|
298
|
+
filename = "#{sitemaps_dir}/sitemap_file"
|
299
|
+
create_files("#{filename}_1.xml",
|
300
|
+
"#{filename}_23.xml",
|
301
|
+
"#{filename}_42.xml.gz",
|
302
|
+
"#{filename}_9.xml")
|
303
|
+
assert_equal 42, @sitemap.send(:get_last_id, filename)
|
304
|
+
end
|
305
|
+
|
306
|
+
should 'return nil' do
|
307
|
+
create_sitemap.clean
|
308
|
+
filename = "#{sitemaps_dir}/sitemap_file"
|
309
|
+
assert_equal nil, @sitemap.send(:get_last_id, filename)
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
context 'mobile' do
|
314
|
+
should 'include mobile namespace' do
|
315
|
+
create_sitemap(:gzip => false)
|
316
|
+
add_model(:path => 'foo', :mobile => true)
|
317
|
+
@sitemap.generate
|
318
|
+
|
319
|
+
f = File.open(unzipped_first_sitemaps_model_file)
|
320
|
+
f.first #read next line
|
321
|
+
assert f.first.include?( ns['mobile'])
|
322
|
+
end
|
323
|
+
|
324
|
+
should 'include mobile tag' do
|
325
|
+
create_sitemap(:gzip => false)
|
326
|
+
add_model(:path => 'foo', :mobile => true)
|
327
|
+
@sitemap.generate
|
328
|
+
|
329
|
+
assert_equal 10, mobile_elements(unzipped_first_sitemaps_model_file, 'mobile').size
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
context 'partial update' do
|
334
|
+
|
335
|
+
context 'prepare_update' do
|
336
|
+
should 'generate correct condition for partial update' do
|
337
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
338
|
+
|
339
|
+
create_sitemap(:partial_update => true).clean
|
340
|
+
add_model(:num_items => 50) #TestModel
|
341
|
+
|
342
|
+
create_files "#{filename}_23.xml"
|
343
|
+
assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
|
344
|
+
|
345
|
+
create_files "#{filename}_42.xml"
|
346
|
+
assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
|
347
|
+
end
|
348
|
+
|
349
|
+
should 'generate correct condition for partial update with custom column' do
|
350
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
351
|
+
|
352
|
+
create_sitemap(:partial_update => true).clean
|
353
|
+
add_model(:num_items => 50, :primary_column => 'name') #TestModel
|
354
|
+
|
355
|
+
create_files "#{filename}_666.xml"
|
356
|
+
assert_equal "(name >= 666)", @sitemap.send(:prepare_update).first.last[:conditions]
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
should 'generate for all xml files in directory and delete last file' do
|
361
|
+
TestModel.current_id = last_id = 27
|
362
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
363
|
+
|
364
|
+
create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
|
365
|
+
add_model(:num_items => 50 - last_id) #TestModel
|
366
|
+
|
367
|
+
create_files("#{filename}.xml",
|
368
|
+
"#{filename}_5.xml",
|
369
|
+
"#{filename}_9.xml",
|
370
|
+
"#{filename}_23.xml",
|
371
|
+
"#{filename}_#{last_id}.xml")
|
372
|
+
@sitemap.generate
|
373
|
+
|
374
|
+
# Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
|
375
|
+
|
376
|
+
assert File.exists?("#{filename}_48.xml")
|
377
|
+
assert File.exists?("#{filename}_#{last_id}.xml")
|
378
|
+
elems = elements("#{filename}_#{last_id}.xml", 'loc').map(&:text)
|
379
|
+
|
380
|
+
assert_equal 5, elems.size
|
381
|
+
(28..32).each do |i|
|
382
|
+
assert elems.include? "http://example.com/test_models/#{i}"
|
383
|
+
end
|
384
|
+
|
385
|
+
elems = elements(unzipped_sitemaps_index_file, 'loc').map(&:text)
|
386
|
+
assert elems.include? "http://example.com/sitemaps/sitemap_test_models.xml"
|
387
|
+
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_9.xml"
|
388
|
+
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_#{last_id}.xml"
|
389
|
+
assert elems.include? "http://example.com/sitemaps/sitemap_test_models_48.xml"
|
390
|
+
end
|
391
|
+
|
392
|
+
should 'generate sitemap, update should respect old files' do
|
393
|
+
max_id = 23
|
394
|
+
TestModel.current_id = 0
|
395
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
396
|
+
|
397
|
+
create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100).clean
|
398
|
+
add_model(:num_items => max_id) #TestModel
|
399
|
+
@sitemap.generate
|
400
|
+
|
401
|
+
# Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
|
402
|
+
|
403
|
+
assert_equal 5, elements("#{filename}.xml", 'loc').size
|
404
|
+
assert_equal 5, elements("#{filename}_6.xml", 'loc').size
|
405
|
+
assert_equal 3, elements("#{filename}_21.xml", 'loc').size
|
406
|
+
|
407
|
+
TestModel.current_id = 20 #last_id is 21, so start with one below
|
408
|
+
create_sitemap(:partial_update => true, :gzip => false, :batch_size => 5, :max_per_sitemap => 5, :max_per_index => 100)
|
409
|
+
add_model( :num_items => 48 - TestModel.current_id ) #TestModel
|
410
|
+
@sitemap.generate
|
411
|
+
|
412
|
+
assert_equal 5, elements("#{filename}_6.xml", 'loc').size
|
413
|
+
assert_equal 5, elements("#{filename}_21.xml", 'loc').size
|
414
|
+
|
415
|
+
# Dir["#{sitemaps_dir}/*"].each do |d| puts d; end
|
416
|
+
|
417
|
+
elems = elements("#{filename}_26.xml", 'loc').map(&:text)
|
418
|
+
(26..30).each do |i|
|
419
|
+
assert elems.include? "http://example.com/test_models/#{i}"
|
420
|
+
end
|
421
|
+
|
422
|
+
#puts `cat /tmp/sitemaps/sitemap_test_models_41.xml`
|
423
|
+
|
424
|
+
assert_equal 3, elements("#{filename}_46.xml", 'loc').size
|
425
|
+
end
|
426
|
+
|
427
|
+
context 'escape' do
|
428
|
+
should 'add if not number' do
|
429
|
+
create_sitemap
|
430
|
+
data = {
|
431
|
+
42 => 42,
|
432
|
+
'23' => 23,
|
433
|
+
"test" => "'test'",
|
434
|
+
"test10" => "'test10'",
|
435
|
+
"10test" => "'10test'",
|
436
|
+
"10t' est" => "'10t\\' est'",
|
437
|
+
}
|
438
|
+
data.each do |key, value|
|
439
|
+
assert_equal value, @sitemap.send(:escape_if_string, key)
|
440
|
+
end
|
441
|
+
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
context 'lockfile' do
|
446
|
+
should 'create and delete lock file' do
|
447
|
+
sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
448
|
+
|
449
|
+
sitemap.with_lock do
|
450
|
+
assert File.exists?('/tmp/sitemaps/generator.lock')
|
451
|
+
end
|
452
|
+
|
453
|
+
assert !File.exists?('/tmp/sitemaps/generator.lock')
|
454
|
+
end
|
455
|
+
|
456
|
+
should 'not catch error not related to lock' do
|
457
|
+
sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
458
|
+
|
459
|
+
assert_raise RuntimeError do
|
460
|
+
sitemap.with_lock do
|
461
|
+
raise "Wrong"
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
end
|
466
|
+
|
467
|
+
should 'throw error if lock exits' do
|
468
|
+
sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
469
|
+
|
470
|
+
sitemap.with_lock do
|
471
|
+
sitemap2 = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
472
|
+
|
473
|
+
assert_nothing_raised do
|
474
|
+
sitemap2.with_lock do
|
475
|
+
raise "Should not be called"
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
end
|
484
|
+
|
485
|
+
end
|