big_sitemap 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/History.txt +5 -0
- data/README.rdoc +1 -0
- data/VERSION +1 -1
- data/lib/big_sitemap.rb +5 -146
- metadata +9 -9
data/Gemfile.lock
CHANGED
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -64,6 +64,7 @@ Via gem:
|
|
64
64
|
* <code>:ping_yahoo</code> -- <code>false</code>, needs <code>:yahoo_app_id</code>
|
65
65
|
* <code>:ping_bing</code> -- <code>false</code>
|
66
66
|
* <code>:ping_ask</code> -- <code>false</code>
|
67
|
+
* <code>:ping_yandex</code> -- <code>false</code>
|
67
68
|
* <code>:partial_update</code> -- <code>false</code>
|
68
69
|
|
69
70
|
=== Change Frequency, Priority and Last Modified
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0
|
1
|
+
1.1.0
|
data/lib/big_sitemap.rb
CHANGED
@@ -6,7 +6,6 @@ require 'big_sitemap/builder'
|
|
6
6
|
class BigSitemap
|
7
7
|
DEFAULTS = {
|
8
8
|
:max_per_sitemap => Builder::MAX_URLS,
|
9
|
-
:batch_size => 1001, # TODO: Deprecate
|
10
9
|
:document_path => '/',
|
11
10
|
:gzip => true,
|
12
11
|
|
@@ -14,15 +13,10 @@ class BigSitemap
|
|
14
13
|
:ping_google => true,
|
15
14
|
:ping_yahoo => false, # needs :yahoo_app_id
|
16
15
|
:ping_bing => false,
|
17
|
-
:ping_ask => false
|
16
|
+
:ping_ask => false,
|
17
|
+
:ping_yandex => false
|
18
18
|
}
|
19
19
|
|
20
|
-
# TODO: Deprecate
|
21
|
-
COUNT_METHODS = [:count_for_sitemap, :count]
|
22
|
-
FIND_METHODS = [:find_for_sitemap, :all]
|
23
|
-
TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created]
|
24
|
-
PARAM_METHODS = [:to_param, :id]
|
25
|
-
|
26
20
|
class << self
|
27
21
|
def generate(options={}, &block)
|
28
22
|
@sitemap = self.new(options)
|
@@ -158,15 +152,8 @@ class BigSitemap
|
|
158
152
|
def generate(options={})
|
159
153
|
clean unless options[:partial_update]
|
160
154
|
|
161
|
-
# TODO: Ddeprecate
|
162
|
-
prepare_update
|
163
|
-
|
164
155
|
add_urls
|
165
156
|
|
166
|
-
# TODO: Deprecate
|
167
|
-
generate_models
|
168
|
-
generate_static
|
169
|
-
|
170
157
|
generate_sitemap_index
|
171
158
|
|
172
159
|
ping_search_engines
|
@@ -230,136 +217,13 @@ class BigSitemap
|
|
230
217
|
if @options[:ping_ask]
|
231
218
|
Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
|
232
219
|
end
|
233
|
-
end
|
234
|
-
|
235
|
-
# TODO: Deprecate
|
236
|
-
def get_last_id(filename)
|
237
|
-
Dir["#{filename}*.{xml,xml.gz}"].map do |file|
|
238
|
-
file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
|
239
|
-
end.sort.last
|
240
|
-
end
|
241
220
|
|
242
|
-
|
243
|
-
|
244
|
-
# TODO: Deprecate
|
245
|
-
def table_name(model)
|
246
|
-
model.table_name
|
247
|
-
end
|
248
|
-
|
249
|
-
# TODO: Deprecate
|
250
|
-
def generate_models
|
251
|
-
for model, options in @sources
|
252
|
-
with_sitemap(options.dup.merge({:name => model})) do |sitemap|
|
253
|
-
last_id = nil #id of last processed item
|
254
|
-
count_method = pick_method(model, COUNT_METHODS)
|
255
|
-
find_method = pick_method(model, FIND_METHODS)
|
256
|
-
raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil?
|
257
|
-
raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil?
|
258
|
-
|
259
|
-
find_options = {}
|
260
|
-
[:conditions, :limit, :joins, :select, :order, :include, :group].each do |key|
|
261
|
-
find_options[key] = options.delete(key)
|
262
|
-
end
|
263
|
-
|
264
|
-
# Keep the intial conditions for later user
|
265
|
-
conditions = find_options[:conditions]
|
266
|
-
|
267
|
-
primary_method = options.delete(:primary_column)
|
268
|
-
primary_column = "#{table_name(model)}.#{primary_method}"
|
269
|
-
|
270
|
-
count = model.send(count_method, find_options.merge(:select => (primary_column || '*'), :include => nil))
|
271
|
-
count = find_options[:limit].to_i if find_options[:limit] && find_options[:limit].to_i < count
|
272
|
-
num_sitemaps = 1
|
273
|
-
num_batches = 1
|
274
|
-
|
275
|
-
if count > @options[:batch_size]
|
276
|
-
num_batches = (count.to_f / @options[:batch_size].to_f).ceil
|
277
|
-
num_sitemaps = (count.to_f / @options[:max_per_sitemap].to_f).ceil
|
278
|
-
end
|
279
|
-
batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f
|
280
|
-
|
281
|
-
for sitemap_num in 1..num_sitemaps
|
282
|
-
# Work out the start and end batch numbers for this sitemap
|
283
|
-
batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i
|
284
|
-
batch_num_end = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1
|
285
|
-
|
286
|
-
for batch_num in batch_num_start..batch_num_end
|
287
|
-
offset = (batch_num - 1) * @options[:batch_size]
|
288
|
-
limit = (count - offset) < @options[:batch_size] ? (count - offset) : @options[:batch_size]
|
289
|
-
find_options.update(:limit => limit, :offset => offset) if num_batches > 1
|
290
|
-
|
291
|
-
if last_id && primary_column
|
292
|
-
find_options.update(:limit => limit, :offset => nil)
|
293
|
-
primary_column_value = escape_if_string last_id #escape '
|
294
|
-
find_options[:conditions] = [conditions, "(#{primary_column} > #{primary_column_value})"].compact.join(' AND ')
|
295
|
-
end
|
296
|
-
|
297
|
-
model.send(find_method, find_options).each do |record|
|
298
|
-
last_mod = options[:last_modified]
|
299
|
-
if last_mod.is_a?(Proc)
|
300
|
-
last_mod = last_mod.call(record)
|
301
|
-
elsif last_mod.nil?
|
302
|
-
last_mod_method = pick_method(record, TIMESTAMP_METHODS)
|
303
|
-
last_mod = last_mod_method.nil? ? Time.now : record.send(last_mod_method)
|
304
|
-
end
|
305
|
-
|
306
|
-
param_method = pick_method(record, PARAM_METHODS)
|
307
|
-
|
308
|
-
location =
|
309
|
-
if options[:location].is_a?(Proc)
|
310
|
-
options[:location].call(record)
|
311
|
-
else
|
312
|
-
File.join @options[:base_url], options[:path], record.send(param_method).to_s
|
313
|
-
end
|
314
|
-
|
315
|
-
change_frequency = options[:change_frequency]
|
316
|
-
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
|
317
|
-
|
318
|
-
priority = options[:priority]
|
319
|
-
pri = priority.is_a?(Proc) ? priority.call(record) : priority
|
320
|
-
|
321
|
-
last_id = primary_column ? record.send(primary_method) : nil
|
322
|
-
|
323
|
-
sitemap.add_url!(location, {
|
324
|
-
:last_modified => last_mod,
|
325
|
-
:change_frequency => freq,
|
326
|
-
:priority => pri,
|
327
|
-
:part_number => last_id
|
328
|
-
}) if location
|
329
|
-
end
|
330
|
-
end
|
331
|
-
end
|
332
|
-
end
|
221
|
+
if @options[:ping_yandex]
|
222
|
+
Net::HTTP.get('webmaster.yandex.ru', "/wmconsole/sitemap_list.xml?host=#{sitemap_uri}")
|
333
223
|
end
|
334
|
-
self
|
335
224
|
end
|
336
225
|
|
337
|
-
|
338
|
-
def generate_static
|
339
|
-
return self if Array(@static_pages).empty?
|
340
|
-
with_sitemap({:name => 'static', :type => 'static'}) do |sitemap|
|
341
|
-
@static_pages.each do |location, last_mod, freq, pri|
|
342
|
-
sitemap.add_url!(location, {
|
343
|
-
:last_modified => last_mod,
|
344
|
-
:change_frequency => freq,
|
345
|
-
:priority => pri
|
346
|
-
})
|
347
|
-
end
|
348
|
-
end
|
349
|
-
self
|
350
|
-
end
|
351
|
-
|
352
|
-
# TODO: Deprecate
|
353
|
-
def prepare_update
|
354
|
-
@files_to_move = []
|
355
|
-
@sources.each do |model, options|
|
356
|
-
if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename]))
|
357
|
-
primary_column_value = escape_if_string last_id #escape '
|
358
|
-
options[:conditions] = [options[:conditions], "(#{table_name(model)}.#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
|
359
|
-
options[:start_part_id] = last_id
|
360
|
-
end
|
361
|
-
end
|
362
|
-
end
|
226
|
+
private
|
363
227
|
|
364
228
|
def lock!(lock_file = 'generator.lock')
|
365
229
|
lock_file = File.join(@options[:document_full], lock_file)
|
@@ -405,11 +269,6 @@ class BigSitemap
|
|
405
269
|
method
|
406
270
|
end
|
407
271
|
|
408
|
-
# TODO: Deprecate
|
409
|
-
def escape_if_string(value)
|
410
|
-
(value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'"
|
411
|
-
end
|
412
|
-
|
413
272
|
def url_for_sitemap(path)
|
414
273
|
File.join @options[:base_url], @options[:url_path], File.basename(path)
|
415
274
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: big_sitemap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -14,7 +14,7 @@ date: 2013-02-06 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
17
|
-
requirement: &
|
17
|
+
requirement: &70266223468900 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70266223468900
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: shoulda
|
28
|
-
requirement: &
|
28
|
+
requirement: &70266223468380 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70266223468380
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: mocha
|
39
|
-
requirement: &
|
39
|
+
requirement: &70266223467920 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: '0'
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70266223467920
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: nokogiri
|
50
|
-
requirement: &
|
50
|
+
requirement: &70266223467400 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,7 +55,7 @@ dependencies:
|
|
55
55
|
version: '0'
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70266223467400
|
59
59
|
description: BigSitemap is a Sitemapgenerator suitable for applications with greater
|
60
60
|
than 50,000 URLs. It splits large Sitemaps into multiple files, gzips the files
|
61
61
|
to minimize bandwidth usage, batches database queries to minimize memory usage,
|