big_sitemap 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +8 -2
- data/README.rdoc +4 -1
- data/Rakefile +0 -18
- data/VERSION.yml +2 -1
- data/big_sitemap.gemspec +2 -2
- data/lib/big_sitemap.rb +51 -62
- data/test/big_sitemap_test.rb +57 -5
- metadata +4 -4
data/History.txt
CHANGED
@@ -1,9 +1,15 @@
|
|
1
|
-
=== 0.8.
|
1
|
+
=== 0.8.3 / 2011-03-08
|
2
|
+
|
3
|
+
* Separate URL and file paths are now supported via the :document_path
|
4
|
+
and :url_path options
|
5
|
+
* Fixes an issue when initializing in Rails 3
|
6
|
+
|
7
|
+
=== 0.8.2 / 2011-01-25
|
2
8
|
|
3
9
|
* Fixes an issue where sitemap files were not being generated if the same model
|
4
10
|
was added more than once (fixes issue #5: https://github.com/alexrabarts/big_sitemap/issues/#issue/5)
|
5
11
|
|
6
|
-
=== 0.8.1 /
|
12
|
+
=== 0.8.1 / 2011-01-25
|
7
13
|
|
8
14
|
* API change: Rails/Merb are no longer automatically detected - use BigSitemapRails and BigSitemapMerb instead
|
9
15
|
* API change: Rails' polymorphic_url helper is no longer used to generate URLs (use a lambda with the new :location option instead)
|
data/README.rdoc
CHANGED
@@ -61,8 +61,11 @@ Via gem:
|
|
61
61
|
|
62
62
|
* <code>:url_options</code> -- hash with <code>:host</code>, optionally <code>:port</code> and <code>:protocol</code>
|
63
63
|
* <code>:base_url</code> -- string alternative to <code>:url_options</code>, e.g. <code>'https://example.com:8080/'</code>
|
64
|
+
* <code>:url_path</code> -- string path_name to sitemaps folder, defaults to <code>:document_path</code>
|
64
65
|
* <code>:document_root</code> -- string
|
65
|
-
* <code>:
|
66
|
+
* <code>:document_path</code> -- string document path to generation folder, relative to :document_root, defaults to <code>'sitemaps/'</code>
|
67
|
+
* <code>:path</code> -- string, alias for ":document_path" for legacy reasons
|
68
|
+
* <code>:document_full</code> -- string absolute document path to generation folder - defaults to <code>:document_root/:document_path</code>
|
66
69
|
* <code>:max_per_sitemap</code> -- <code>50000</code>, which is the limit dictated by Google but can be less
|
67
70
|
* <code>:batch_size</code> -- <code>1001</code> (not <code>1000</code> due to a bug in DataMapper)
|
68
71
|
* <code>:gzip</code> -- <code>true</code>
|
data/Rakefile
CHANGED
@@ -31,22 +31,4 @@ Rake::TestTask.new(:test) do |t|
|
|
31
31
|
t.verbose = false
|
32
32
|
end
|
33
33
|
|
34
|
-
begin
|
35
|
-
require 'rcov/rcovtask'
|
36
|
-
Rcov::RcovTask.new do |t|
|
37
|
-
t.libs << 'test'
|
38
|
-
t.test_files = FileList['test/**/*_test.rb']
|
39
|
-
t.verbose = true
|
40
|
-
end
|
41
|
-
rescue LoadError
|
42
|
-
puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
43
|
-
end
|
44
|
-
|
45
|
-
begin
|
46
|
-
require 'cucumber/rake/task'
|
47
|
-
Cucumber::Rake::Task.new(:features)
|
48
|
-
rescue LoadError
|
49
|
-
puts "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
|
50
|
-
end
|
51
|
-
|
52
34
|
task :default => :test
|
data/VERSION.yml
CHANGED
data/big_sitemap.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{big_sitemap}
|
8
|
-
s.version = "0.8.
|
8
|
+
s.version = "0.8.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Alex Rabarts", "Tobias Bielohlawek"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-03-14}
|
13
13
|
s.description = %q{A Sitemap generator specifically designed for large sites (although it works equally well with small sites)}
|
14
14
|
s.email = ["alexrabarts@gmail.com", "tobi@soundcloud.com"]
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/big_sitemap.rb
CHANGED
@@ -7,7 +7,7 @@ class BigSitemap
|
|
7
7
|
DEFAULTS = {
|
8
8
|
:max_per_sitemap => Builder::MAX_URLS,
|
9
9
|
:batch_size => 1001,
|
10
|
-
:
|
10
|
+
:document_path => 'sitemaps/',
|
11
11
|
:gzip => true,
|
12
12
|
|
13
13
|
# opinionated
|
@@ -24,36 +24,31 @@ class BigSitemap
|
|
24
24
|
|
25
25
|
def initialize(options={})
|
26
26
|
@options = DEFAULTS.merge options
|
27
|
-
|
28
|
-
@default_url_options = options.delete(:default_url_options) || {}
|
27
|
+
@options[:document_path] ||= @options[:path] #for legacy reasons
|
29
28
|
|
30
29
|
if @options[:max_per_sitemap] <= 1
|
31
30
|
raise ArgumentError, '":max_per_sitemap" must be greater than 1'
|
32
31
|
end
|
33
32
|
|
34
33
|
if @options[:url_options]
|
35
|
-
@
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
@default_url_options[:port] = uri.port
|
40
|
-
@default_url_options[:protocol] = uri.scheme
|
41
|
-
else
|
34
|
+
@options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s
|
35
|
+
end
|
36
|
+
|
37
|
+
unless @options[:base_url]
|
42
38
|
raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string'
|
43
39
|
end
|
40
|
+
@options[:url_path] ||= @options[:document_path]
|
44
41
|
|
45
42
|
if @options[:batch_size] > @options[:max_per_sitemap]
|
46
43
|
raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"'
|
47
44
|
end
|
48
45
|
|
49
|
-
@options[:
|
50
|
-
|
51
|
-
|
52
|
-
raise ArgumentError, 'Document root must be specified with the ":document_root" option'
|
46
|
+
@options[:document_full] ||= File.join(@options[:document_root], @options[:document_path])
|
47
|
+
unless @options[:document_full]
|
48
|
+
raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"'
|
53
49
|
end
|
54
50
|
|
55
|
-
@
|
56
|
-
Dir.mkdir(@file_path) unless File.exists? @file_path
|
51
|
+
Dir.mkdir(@options[:document_full]) unless File.exists?(@options[:document_full])
|
57
52
|
|
58
53
|
@sources = []
|
59
54
|
@models = []
|
@@ -100,14 +95,15 @@ class BigSitemap
|
|
100
95
|
|
101
96
|
def file_name(name)
|
102
97
|
name = table_name(name) unless name.is_a? String
|
103
|
-
"
|
98
|
+
File.join(@options[:document_full], "sitemap_#{name}")
|
104
99
|
end
|
105
100
|
|
106
|
-
def
|
101
|
+
def dir_files
|
102
|
+
File.join(@options[:document_full], "sitemap_*.{xml,xml.gz}")
|
107
103
|
end
|
108
104
|
|
109
105
|
def clean
|
110
|
-
Dir[
|
106
|
+
Dir[dir_files].each do |file|
|
111
107
|
FileUtils.rm file
|
112
108
|
end
|
113
109
|
self
|
@@ -161,8 +157,8 @@ class BigSitemap
|
|
161
157
|
|
162
158
|
if last_id && primary_column
|
163
159
|
find_options.update(:limit => limit, :offset => nil)
|
164
|
-
primary_column_value = last_id
|
165
|
-
find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} >
|
160
|
+
primary_column_value = escape_if_string last_id #escape '
|
161
|
+
find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} > #{primary_column_value})"].compact.join(' AND '))
|
166
162
|
end
|
167
163
|
|
168
164
|
model.send(find_method, find_options).each do |record|
|
@@ -176,12 +172,12 @@ class BigSitemap
|
|
176
172
|
|
177
173
|
param_method = pick_method(record, PARAM_METHODS)
|
178
174
|
|
179
|
-
location =
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
175
|
+
location =
|
176
|
+
if options[:location].is_a?(Proc)
|
177
|
+
options[:location].call(record)
|
178
|
+
else
|
179
|
+
File.join @options[:base_url], options[:path], record.send(param_method).to_s
|
180
|
+
end
|
185
181
|
|
186
182
|
change_frequency = options[:change_frequency] || 'weekly'
|
187
183
|
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
|
@@ -211,7 +207,7 @@ class BigSitemap
|
|
211
207
|
|
212
208
|
# Create a sitemap index document
|
213
209
|
def generate_sitemap_index(files = nil)
|
214
|
-
files ||= Dir[
|
210
|
+
files ||= Dir[dir_files]
|
215
211
|
with_sitemap 'index', :type => 'index' do |sitemap|
|
216
212
|
for path in files
|
217
213
|
next if path =~ /index/
|
@@ -238,7 +234,7 @@ class BigSitemap
|
|
238
234
|
"appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
|
239
235
|
)
|
240
236
|
else
|
241
|
-
|
237
|
+
STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
|
242
238
|
end
|
243
239
|
end
|
244
240
|
|
@@ -251,36 +247,27 @@ class BigSitemap
|
|
251
247
|
end
|
252
248
|
end
|
253
249
|
|
254
|
-
def root_url
|
255
|
-
@root_url ||= begin
|
256
|
-
url = ''
|
257
|
-
url << (@default_url_options[:protocol] || 'http')
|
258
|
-
url << '://' unless url.match('://')
|
259
|
-
url << @default_url_options[:host]
|
260
|
-
url << ":#{port}" if port = @default_url_options[:port] and port != 80
|
261
|
-
url
|
262
|
-
end
|
263
|
-
end
|
264
|
-
|
265
250
|
private
|
266
251
|
|
267
252
|
def prepare_update
|
268
253
|
@files_to_move = []
|
269
254
|
@sources.each do |model, options|
|
270
|
-
if options[:partial_update] && primary_column = options[:primary_column] && last_id = get_last_id(options[:filename])
|
271
|
-
primary_column_value = last_id
|
272
|
-
options[:conditions] = [options[:conditions], "(#{primary_column} >=
|
255
|
+
if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename]))
|
256
|
+
primary_column_value = escape_if_string last_id #escape '
|
257
|
+
options[:conditions] = [options[:conditions], "(#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
|
273
258
|
options[:start_part_id] = last_id
|
274
259
|
end
|
275
260
|
end
|
276
261
|
end
|
277
262
|
|
278
263
|
def lock!(lock_file = 'generator.lock')
|
279
|
-
File.
|
264
|
+
lock_file = File.join(@options[:document_full], lock_file)
|
265
|
+
File.open(lock_file, 'w', File::EXCL)
|
280
266
|
end
|
281
267
|
|
282
268
|
def unlock!(lock_file = 'generator.lock')
|
283
|
-
|
269
|
+
lock_file = File.join(@options[:document_full], lock_file)
|
270
|
+
FileUtils.rm lock_file
|
284
271
|
end
|
285
272
|
|
286
273
|
def with_sitemap(name, options={})
|
@@ -307,10 +294,6 @@ class BigSitemap
|
|
307
294
|
end
|
308
295
|
end
|
309
296
|
|
310
|
-
def strip_leading_slash(str)
|
311
|
-
str.sub(/^\//, '')
|
312
|
-
end
|
313
|
-
|
314
297
|
def get_last_id(filename)
|
315
298
|
Dir["#{filename}*.{xml,xml.gz}"].map do |file|
|
316
299
|
file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
|
@@ -328,44 +311,50 @@ class BigSitemap
|
|
328
311
|
method
|
329
312
|
end
|
330
313
|
|
314
|
+
def escape_if_string(value)
|
315
|
+
(value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'"
|
316
|
+
end
|
317
|
+
|
331
318
|
def url_for_sitemap(path)
|
332
|
-
[
|
319
|
+
File.join @options[:base_url], @options[:url_path], File.basename(path)
|
333
320
|
end
|
334
321
|
|
335
322
|
end
|
336
323
|
|
337
324
|
|
338
|
-
|
339
325
|
class BigSitemapRails < BigSitemap
|
340
326
|
|
341
|
-
|
327
|
+
if defined?(Rails) && Rails.version < "3"
|
328
|
+
include ActionController::UrlWriter
|
329
|
+
end
|
342
330
|
|
343
331
|
def initialize(options={})
|
332
|
+
raise "No Rails Environment loaded" unless defined? Rails
|
344
333
|
require 'action_controller'
|
345
334
|
|
346
|
-
|
347
|
-
|
335
|
+
if Rails.version >= "3"
|
336
|
+
self.class.send(:include, Rails.application.routes.url_helpers)
|
337
|
+
end
|
348
338
|
|
349
|
-
|
350
|
-
|
339
|
+
DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options)
|
340
|
+
super(options)
|
351
341
|
end
|
352
|
-
end
|
353
342
|
|
343
|
+
end
|
354
344
|
|
355
345
|
|
356
346
|
class BigSitemapMerb < BigSitemap
|
357
347
|
|
358
348
|
def initialize(options={})
|
349
|
+
raise "No Merb Environment loaded" unless defined? Merb
|
359
350
|
require 'extlib'
|
360
|
-
super
|
361
|
-
end
|
362
351
|
|
363
|
-
|
364
|
-
|
352
|
+
DEFAULTS.merge!(:document_root => "#{Merb.root}/public")
|
353
|
+
super(options)
|
365
354
|
end
|
366
355
|
|
367
356
|
def table_name(model)
|
368
357
|
Extlib::Inflection.tableize(model.to_s)
|
369
358
|
end
|
370
359
|
|
371
|
-
end
|
360
|
+
end
|
data/test/big_sitemap_test.rb
CHANGED
@@ -15,12 +15,20 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
15
15
|
assert_raise(ArgumentError) { BigSitemap.new(:document_root => tmp_dir) }
|
16
16
|
end
|
17
17
|
|
18
|
-
should 'generate the same base URL' do
|
18
|
+
should 'generate the same base URL with :base_url option' do
|
19
19
|
options = {:document_root => tmp_dir}
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
)
|
20
|
+
url = 'http://example.com'
|
21
|
+
sitemap = BigSitemap.new(options.merge(:base_url => url))
|
22
|
+
|
23
|
+
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
24
|
+
end
|
25
|
+
|
26
|
+
should 'generate the same base URL with :url_options option' do
|
27
|
+
options = {:document_root => tmp_dir}
|
28
|
+
url = 'http://example.com'
|
29
|
+
sitemap = BigSitemap.new(options.merge(:url_options => {:host => 'example.com'}))
|
30
|
+
|
31
|
+
assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
|
24
32
|
end
|
25
33
|
|
26
34
|
should 'generate a sitemap index file' do
|
@@ -287,6 +295,32 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
287
295
|
end
|
288
296
|
|
289
297
|
context 'partial update' do
|
298
|
+
|
299
|
+
context 'prepare_update' do
|
300
|
+
should 'generate correct condition for partial update' do
|
301
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
302
|
+
|
303
|
+
create_sitemap(:partial_update => true).clean
|
304
|
+
add_model(:num_items => 50) #TestModel
|
305
|
+
|
306
|
+
File.open("#{filename}_23.xml", 'w')
|
307
|
+
assert_equal "(id >= 23)", @sitemap.send(:prepare_update).first.last[:conditions]
|
308
|
+
|
309
|
+
File.open("#{filename}_42.xml", 'w')
|
310
|
+
assert_equal "(id >= 23) AND (id >= 42)", @sitemap.send(:prepare_update).first.last[:conditions]
|
311
|
+
end
|
312
|
+
|
313
|
+
should 'generate correct condition for partial update with custom column' do
|
314
|
+
filename = "#{sitemaps_dir}/sitemap_test_models"
|
315
|
+
|
316
|
+
create_sitemap(:partial_update => true).clean
|
317
|
+
add_model(:num_items => 50, :primary_column => 'name') #TestModel
|
318
|
+
|
319
|
+
File.open("#{filename}_666.xml", 'w')
|
320
|
+
assert_equal "(name >= 666)", @sitemap.send(:prepare_update).first.last[:conditions]
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
290
324
|
should 'generate for all xml files in directory and delete last file' do
|
291
325
|
TestModel.current_id = last_id = 27
|
292
326
|
filename = "#{sitemaps_dir}/sitemap_test_models"
|
@@ -354,6 +388,24 @@ class BigSitemapTest < Test::Unit::TestCase
|
|
354
388
|
assert_equal 3, elements("#{filename}_46.xml", 'loc').size
|
355
389
|
end
|
356
390
|
|
391
|
+
context 'escape' do
|
392
|
+
should 'add if not number' do
|
393
|
+
create_sitemap
|
394
|
+
data = {
|
395
|
+
42 => 42,
|
396
|
+
'23' => 23,
|
397
|
+
"test" => "'test'",
|
398
|
+
"test10" => "'test10'",
|
399
|
+
"10test" => "'10test'",
|
400
|
+
"10t' est" => "'10t\\' est'",
|
401
|
+
}
|
402
|
+
data.each do |key, value|
|
403
|
+
assert_equal value, @sitemap.send(:escape_if_string, key)
|
404
|
+
end
|
405
|
+
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
357
409
|
context 'lockfile' do
|
358
410
|
should 'create and delete lock file' do
|
359
411
|
sitemap = BigSitemap.new(:base_url => 'http://example.com', :document_root => tmp_dir)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: big_sitemap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 57
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 8
|
9
|
-
-
|
10
|
-
version: 0.8.
|
9
|
+
- 3
|
10
|
+
version: 0.8.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alex Rabarts
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-03-14 00:00:00 +00:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|