big_sitemap 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +8 -2
- data/README.rdoc +4 -1
- data/Rakefile +0 -18
- data/VERSION.yml +2 -1
- data/big_sitemap.gemspec +2 -2
- data/lib/big_sitemap.rb +51 -62
- data/test/big_sitemap_test.rb +57 -5
- metadata +4 -4
data/History.txt
CHANGED
@@ -1,9 +1,15 @@
|
|
1
|
-
=== 0.8.
|
1
|
+
=== 0.8.3 / 2011-03-08
|
2
|
+
|
3
|
+
* Separate URL and file paths are now supported via the :document_path
|
4
|
+
and :url_path options
|
5
|
+
* Fixes an issue when initializing in Rails 3
|
6
|
+
|
7
|
+
=== 0.8.2 / 2011-01-25
|
2
8
|
|
3
9
|
* Fixes an issue where sitemap files were not being generated if the same model
|
4
10
|
was added more than once (fixes issue #5: https://github.com/alexrabarts/big_sitemap/issues/#issue/5)
|
5
11
|
|
6
|
-
=== 0.8.1 /
|
12
|
+
=== 0.8.1 / 2011-01-25
|
7
13
|
|
8
14
|
* API change: Rails/Merb are no longer automatically detected - use BigSitemapRails and BigSitemapMerb instead
|
9
15
|
* API change: Rails' polymorphic_url helper is no longer used to generate URLs (use a lambda with the new :location option instead)
|
data/README.rdoc
CHANGED
@@ -61,8 +61,11 @@ Via gem:
|
|
61
61
|
|
62
62
|
* <code>:url_options</code> -- hash with <code>:host</code>, optionally <code>:port</code> and <code>:protocol</code>
|
63
63
|
* <code>:base_url</code> -- string alternative to <code>:url_options</code>, e.g. <code>'https://example.com:8080/'</code>
|
64
|
+
* <code>:url_path</code> -- string path_name to sitemaps folder, defaults to <code>:document_path</code>
|
64
65
|
* <code>:document_root</code> -- string
|
65
|
-
* <code>:
|
66
|
+
* <code>:document_path</code> -- string document path to generation folder, relative to :document_root, defaults to <code>'sitemaps/'</code>
|
67
|
+
* <code>:path</code> -- string, alias for ":document_path" for legacy reasons
|
68
|
+
* <code>:document_full</code> -- string absolute document path to generation folder - defaults to <code>:document_root/:document_path</code>
|
66
69
|
* <code>:max_per_sitemap</code> -- <code>50000</code>, which is the limit dictated by Google but can be less
|
67
70
|
* <code>:batch_size</code> -- <code>1001</code> (not <code>1000</code> due to a bug in DataMapper)
|
68
71
|
* <code>:gzip</code> -- <code>true</code>
|
data/Rakefile
CHANGED
@@ -31,22 +31,4 @@ Rake::TestTask.new(:test) do |t|
|
|
31
31
|
t.verbose = false
|
32
32
|
end
|
33
33
|
|
34
|
-
begin
|
35
|
-
require 'rcov/rcovtask'
|
36
|
-
Rcov::RcovTask.new do |t|
|
37
|
-
t.libs << 'test'
|
38
|
-
t.test_files = FileList['test/**/*_test.rb']
|
39
|
-
t.verbose = true
|
40
|
-
end
|
41
|
-
rescue LoadError
|
42
|
-
puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
43
|
-
end
|
44
|
-
|
45
|
-
begin
|
46
|
-
require 'cucumber/rake/task'
|
47
|
-
Cucumber::Rake::Task.new(:features)
|
48
|
-
rescue LoadError
|
49
|
-
puts "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
|
50
|
-
end
|
51
|
-
|
52
34
|
task :default => :test
|
data/VERSION.yml
CHANGED
data/big_sitemap.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{big_sitemap}
|
8
|
-
s.version = "0.8.
|
8
|
+
s.version = "0.8.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Alex Rabarts", "Tobias Bielohlawek"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-03-14}
|
13
13
|
s.description = %q{A Sitemap generator specifically designed for large sites (although it works equally well with small sites)}
|
14
14
|
s.email = ["alexrabarts@gmail.com", "tobi@soundcloud.com"]
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/big_sitemap.rb
CHANGED
@@ -7,7 +7,7 @@ class BigSitemap
|
|
7
7
|
DEFAULTS = {
|
8
8
|
:max_per_sitemap => Builder::MAX_URLS,
|
9
9
|
:batch_size => 1001,
|
10
|
-
:
|
10
|
+
:document_path => 'sitemaps/',
|
11
11
|
:gzip => true,
|
12
12
|
|
13
13
|
# opinionated
|
@@ -24,36 +24,31 @@ class BigSitemap
|
|
24
24
|
|
25
25
|
def initialize(options={})
|
26
26
|
@options = DEFAULTS.merge options
|
27
|
-
|
28
|
-
@default_url_options = options.delete(:default_url_options) || {}
|
27
|
+
@options[:document_path] ||= @options[:path] #for legacy reasons
|
29
28
|
|
30
29
|
if @options[:max_per_sitemap] <= 1
|
31
30
|
raise ArgumentError, '":max_per_sitemap" must be greater than 1'
|
32
31
|
end
|
33
32
|
|
34
33
|
if @options[:url_options]
|
35
|
-
@
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
@default_url_options[:port] = uri.port
|
40
|
-
@default_url_options[:protocol] = uri.scheme
|
41
|
-
else
|
34
|
+
@options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s
|
35
|
+
end
|
36
|
+
|
37
|
+
unless @options[:base_url]
|
42
38
|
raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string'
|
43
39
|
end
|
40
|
+
@options[:url_path] ||= @options[:document_path]
|
44
41
|
|
45
42
|
if @options[:batch_size] > @options[:max_per_sitemap]
|
46
43
|
raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"'
|
47
44
|
end
|
48
45
|
|
49
|
-
@options[:
|
50
|
-
|
51
|
-
|
52
|
-
raise ArgumentError, 'Document root must be specified with the ":document_root" option'
|
46
|
+
@options[:document_full] ||= File.join(@options[:document_root], @options[:document_path])
|
47
|
+
unless @options[:document_full]
|
48
|
+
raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"'
|
53
49
|
end
|
54
50
|
|
55
|
-
@
|
56
|
-
Dir.mkdir(@file_path) unless File.exists? @file_path
|
51
|
+
Dir.mkdir(@options[:document_full]) unless File.exists?(@options[:document_full])
|
57
52
|
|
58
53
|
@sources = []
|
59
54
|
@models = []
|
@@ -100,14 +95,15 @@ class BigSitemap
|
|
100
95
|
|
101
96
|
def file_name(name)
|
102
97
|
name = table_name(name) unless name.is_a? String
|
103
|
-
"
|
98
|
+
File.join(@options[:document_full], "sitemap_#{name}")
|
104
99
|
end
|
105
100
|
|
106
|
-
def
|
101
|
+
def dir_files
|
102
|
+
File.join(@options[:document_full], "sitemap_*.{xml,xml.gz}")
|
107
103
|
end
|
108
104
|
|
109
105
|
def clean
|
110
|
-
Dir[
|
106
|
+
Dir[dir_files].each do |file|
|
111
107
|
FileUtils.rm file
|
112
108
|
end
|
113
109
|
self
|
@@ -161,8 +157,8 @@ class BigSitemap
|
|
161
157
|
|
162
158
|
if last_id && primary_column
|
163
159
|
find_options.update(:limit => limit, :offset => nil)
|
164
|
-
primary_column_value = last_id
|
165
|
-
find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} >
|
160
|
+
primary_column_value = escape_if_string last_id #escape '
|
161
|
+
find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} > #{primary_column_value})"].compact.join(' AND '))
|
166
162
|
end
|
167
163
|
|
168
164
|
model.send(find_method, find_options).each do |record|
|
@@ -176,12 +172,12 @@ class BigSitemap
|
|
176
172
|
|
177
173
|
param_method = pick_method(record, PARAM_METHODS)
|
178
174
|
|
179
|
-
location =
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
175
|
+
location =
|
176
|
+
if options[:location].is_a?(Proc)
|
177
|
+
options[:location].call(record)
|
178
|
+
else
|
179
|
+
File.join @options[:base_url], options[:path], record.send(param_method).to_s
|
180
|
+
end
|
185
181
|
|
186
182
|
change_frequency = options[:change_frequency] || 'weekly'
|
187
183
|
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
|
@@ -211,7 +207,7 @@ class BigSitemap
|
|
211
207
|
|
212
208
|
# Create a sitemap index document
|
213
209
|
def generate_sitemap_index(files = nil)
|
214
|
-
files ||= Dir[
|
210
|
+
files ||= Dir[dir_files]
|
215
211
|
with_sitemap 'index', :type => 'index' do |sitemap|
|
216
212
|
for path in files
|
217
213
|
next if path =~ /index/
|
@@ -238,7 +234,7 @@ class BigSitemap
|
|
238
234
|
"appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
|
239
235
|
)
|
240
236
|
else
|
241
|
-
|
237
|
+
STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
|
242
238
|
end
|
243
239
|
end
|
244
240
|
|
@@ -251,36 +247,27 @@ class BigSitemap
|
|
251
247
|
end
|
252
248
|
end
|
253
249
|
|
254
|
-
def root_url
|
255
|
-
@root_url ||= begin
|
256
|
-
url = ''
|
257
|
-
url << (@default_url_options[:protocol] || 'http')
|
258
|
-
url << '://' unless url.match('://')
|
259
|
-
url << @default_url_options[:host]
|
260
|
-
url << ":#{port}" if port = @default_url_options[:port] and port != 80
|
261
|
-
url
|
262
|
-
end
|
263
|
-
end
|
264
|
-
|
265
250
|
private
|
266
251
|
|
267
252
|
def prepare_update
|
268
253
|
@files_to_move = []
|
269
254
|
@sources.each do |model, options|
|
270
|
-
if options[:partial_update] && primary_column = options[:primary_column] && last_id = get_last_id(options[:filename])
|
271
|
-
primary_column_value = last_id
|
272
|
-
options[:conditions] = [options[:conditions], "(#{primary_column} >=
|
255
|
+
if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename]))
|
256
|
+
primary_column_value = escape_if_string last_id #escape '
|
257
|
+
options[:conditions] = [options[:conditions], "(#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
|
273
258
|
options[:start_part_id] = last_id
|
274
259
|
end
|
275
260
|
end
|
276
261
|
end
|
277
262
|
|
278
263
|
def lock!(lock_file = 'generator.lock')
|
279
|
-
File.
|
264
|
+
lock_file = File.join(@options[:document_full], lock_file)
|
265
|
+
File.open(lock_file, 'w', File::EXCL)
|
280
266
|
end
|
281
267
|
|
282
268
|
def unlock!(lock_file = 'generator.lock')
|
283
|
-
|
269
|
+
lock_file = File.join(@options[:document_full], lock_file)
|
270
|
+
FileUtils.rm lock_file
|
284
271
|
end
|
285
272
|
|
286
273
|
def with_sitemap(name, options={})
|
@@ -307,10 +294,6 @@ class BigSitemap
|
|
307
294
|
end
|
308
295
|
end
|
309
296
|
|
310
|
-
def strip_leading_slash(str)
|
311
|
-
str.sub(/^\//, '')
|
312
|
-
end
|
313
|
-
|
314
297
|
def get_last_id(filename)
|
315
298
|
Dir["#{filename}*.{xml,xml.gz}"].map do |file|
|
316
299
|
file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
|
@@ -328,44 +311,50 @@ class BigSitemap
|
|
328
311
|
method
|
329
312
|
end
|
330
313
|
|
314
|
+
def escape_if_string(value)
|
315
|
+
(value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'"
|
316
|
+
end
|
317
|
+
|
331
318
|
def url_for_sitemap(path)
|
332
|
-
[
|
319
|
+
File.join @options[:base_url], @options[:url_path], File.basename(path)
|
333
320
|
end
|
334
321
|
|
335
322
|
end
|
336
323
|
|
337
324
|
|
338
|
-
|
339
325
|
class BigSitemapRails < BigSitemap
|
340
326
|
|
341
|
-
|
327
|
+
if defined?(Rails) && Rails.version < "3"
|
328
|
+
include ActionController::UrlWriter
|
329
|
+
end
|
342
330
|
|
343
331
|
def initialize(options={})
|
332
|
+
raise "No Rails Environment loaded" unless defined? Rails
|
344
333
|
require 'action_controller'
|
345
334
|
|
346
|
-
|
347
|
-
|
335
|
+
if Rails.version >= "3"
|
336
|
+
self.class.send(:include, Rails.application.routes.url_helpers)
|
337
|
+
end
|
348
338
|
|
349
|
-
|
350
|
-
|
339
|
+
DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options)
|
340
|
+
super(options)
|
351
341
|
end
|
352
|
-
end
|
353
342
|
|
343
|
+
end
|
354
344
|
|
355
345
|
|
356
346
|
class BigSitemapMerb < BigSitemap
|
357
347
|
|
358
348
|
def initialize(options={})
|
349
|
+
raise "No Merb Environment loaded" unless defined? Merb
|
359
350
|
require 'extlib'
|
360
|
-
super
|
361
|
-
end
|
362
351
|
|
363
|
-
|
364
|
-
|
352
|
+
DEFAULTS.merge!(:document_root => "#{Merb.root}/public")
|
353
|
+
super(options)
|
365
354
|
end
|
366
355
|
|
367
356
|
def table_name(model)
|
368
357
|
Extlib::Inflection.tableize(model.to_s)
|
369
358
|
end
|
370
359
|
|
371
|
-
end
|
360
|
+
end
|
data/test/big_sitemap_test.rb
CHANGED
@@ -15,12 +15,20 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
15
15
|
assert_raise(ArgumentError) { BigSitemap.new(:document_root => tmp_dir) }
|
16
16
|
end
|
17
17
|
|
18
|
-
should 'generate the same base URL' do
|
18
|
+
should 'generate the same base URL with :base_url option' do
|
19
19
|
options = {:document_root => tmp_dir}
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
)
|
20
|
+
url = 'http://example.com'
|
21
|
+
sitemap = BigSitemap.new(options.merge(:base_url => url))
|
22
|
+
|
23
|
+
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
24
|
+
end
|
25
|
+
|
26
|
+
should 'generate the same base URL with :url_options option' do
|
27
|
+
options = {:document_root => tmp_dir}
|
28
|
+
url = 'http://example.com'
|
29
|
+
sitemap = BigSitemap.new(options.merge(:url_options => {:host => 'example.com'}))
|
30
|
+
|
31
|
+
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
24
32
|
end
|
25
33
|
|
26
34
|
should 'generate a sitemap index file' do
|
@@ -287,6 +295,32 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
287
295
|
end
|
288
296
|
|
289
297
|
context 'partial update' do
|
298
|
+
|
299
|
+
context 'prepare_update' do
|
300
|
+
should 'generate correct condition for partial update' do
|
301
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
302
|
+
|
303
|
+
create_sitemap(:partial_update => true).clean
|
304
|
+
add_model(:num_items => 50) #TestModel
|
305
|
+
|
306
|
+
File.open("#{filename}_23.xml", 'w')
|
307
|
+
assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
|
308
|
+
|
309
|
+
File.open("#{filename}_42.xml", 'w')
|
310
|
+
assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
|
311
|
+
end
|
312
|
+
|
313
|
+
should 'generate correct condition for partial update with custom column' do
|
314
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
315
|
+
|
316
|
+
create_sitemap(:partial_update => true).clean
|
317
|
+
add_model(:num_items => 50, :primary_column => 'name') #TestModel
|
318
|
+
|
319
|
+
File.open("#{filename}_666.xml", 'w')
|
320
|
+
assert_equal "(name >= 666)", @sitemap.send(:prepare_update).first.last[:conditions]
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
290
324
|
should 'generate for all xml files in directory and delete last file' do
|
291
325
|
TestModel.current_id = last_id = 27
|
292
326
|
filename = "#{sitemaps_dir}/sitemap_test_models"
|
@@ -354,6 +388,24 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
354
388
|
assert_equal 3, elements("#{filename}_46.xml", 'loc').size
|
355
389
|
end
|
356
390
|
|
391
|
+
context 'escape' do
|
392
|
+
should 'add if not number' do
|
393
|
+
create_sitemap
|
394
|
+
data = {
|
395
|
+
42 => 42,
|
396
|
+
'23' => 23,
|
397
|
+
"test" => "'test'",
|
398
|
+
"test10" => "'test10'",
|
399
|
+
"10test" => "'10test'",
|
400
|
+
"10t' est" => "'10t\\' est'",
|
401
|
+
}
|
402
|
+
data.each do |key, value|
|
403
|
+
assert_equal value, @sitemap.send(:escape_if_string, key)
|
404
|
+
end
|
405
|
+
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
357
409
|
context 'lockfile' do
|
358
410
|
should 'create and delete lock file' do
|
359
411
|
sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: big_sitemap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 57
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 8
|
9
|
-
-
|
10
|
-
version: 0.8.
|
9
|
+
- 3
|
10
|
+
version: 0.8.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alex Rabarts
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-03-14 00:00:00 +00:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|