big_sitemap 0.8.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +8 -1
- data/Gemfile.lock +11 -0
- data/History.txt +21 -0
- data/README.rdoc +30 -94
- data/Rakefile +2 -2
- data/VERSION.yml +4 -4
- data/lib/big_sitemap.rb +181 -104
- data/lib/big_sitemap/builder.rb +28 -27
- data/test/big_sitemap_test.rb +152 -300
- data/test/fixtures/test_model.rb +1 -1
- metadata +69 -61
- data/.gitignore +0 -3
- data/big_sitemap.gemspec +0 -58
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,16 +1,27 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
git (1.2.5)
|
5
|
+
jeweler (1.6.4)
|
6
|
+
bundler (~> 1.0)
|
7
|
+
git (>= 1.2.5)
|
8
|
+
rake
|
9
|
+
json (1.6.1)
|
4
10
|
mocha (0.9.10)
|
5
11
|
rake
|
6
12
|
nokogiri (1.4.4)
|
7
13
|
rake (0.8.7)
|
14
|
+
rdoc (3.11)
|
15
|
+
json (~> 1.4)
|
8
16
|
shoulda (2.11.3)
|
9
17
|
|
10
18
|
PLATFORMS
|
11
19
|
ruby
|
12
20
|
|
13
21
|
DEPENDENCIES
|
22
|
+
jeweler
|
14
23
|
mocha
|
15
24
|
nokogiri
|
25
|
+
rake
|
26
|
+
rdoc
|
16
27
|
shoulda
|
data/History.txt
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
=== 1.0.0 / 2011-10-24
|
2
|
+
|
3
|
+
* API Change: Sitemaps are now generated using a block syntax. Find methods are no longer the responsibility of BigSitemap. Instead, sitemaps are generated using a block, in which you call your own find methods, passing the results to BigSitemap with the 'add' method. See the README for details.
|
4
|
+
* BigSitemapRails and BigSitemapMerb are now BigSitemap::Rails and BigSitemap::Merb, respectively.
|
5
|
+
* Sitemap files are now placed in the document root by default
|
6
|
+
* Sitemaps are now automatically cleaned before generating the new set
|
7
|
+
* Search engines are now pinged automatically when the sitemap is generated
|
8
|
+
* Lock files are now generated automatically
|
9
|
+
* Sitemap files are no longer split amongst your models
|
10
|
+
|
11
|
+
=== 0.8.5 / 2011-10-20
|
12
|
+
|
13
|
+
* Gzipped files now include indents and newlines
|
14
|
+
|
15
|
+
=== 0.8.4 / 2011-10-20
|
16
|
+
|
17
|
+
* Fixes an issue where joins where causing ambiguous "id" column
|
18
|
+
(https://github.com/alexrabarts/big_sitemap/pull/17)
|
19
|
+
* Fixes an issue with empty <loc> nodes
|
20
|
+
(https://github.com/alexrabarts/big_sitemap/pull/20)
|
21
|
+
|
1
22
|
=== 0.8.3 / 2011-03-08
|
2
23
|
|
3
24
|
* Separate URL and file paths are now supported via the :document_path
|
data/README.rdoc
CHANGED
@@ -1,41 +1,34 @@
|
|
1
1
|
= BigSitemap
|
2
2
|
|
3
|
-
BigSitemap is a {Sitemap}[http://sitemaps.org] generator suitable for applications with greater than 50,000 URLs. It splits large Sitemaps into multiple files, gzips the files to minimize bandwidth usage,
|
3
|
+
BigSitemap is a {Sitemap}[http://sitemaps.org] generator suitable for applications with greater than 50,000 URLs. It splits large Sitemaps into multiple files, gzips the files to minimize bandwidth usage, supports increment updates, can be set up with just a few lines of code and is compatible with just about any framework.
|
4
4
|
|
5
5
|
BigSitemap is best run periodically through a Rake/Thor task.
|
6
6
|
|
7
7
|
require 'big_sitemap'
|
8
8
|
|
9
|
-
|
10
|
-
:url_options => {:host => 'example.com'},
|
11
|
-
:document_root => "#{APP_ROOT}/public"
|
12
|
-
)
|
9
|
+
include Rails.application.routes.url_helpers # Allows access to Rails routes
|
13
10
|
|
14
|
-
|
15
|
-
|
11
|
+
BigSitemap.generate(:url_options => {:host => 'example.com'}, :document_root => "#{APP_ROOT}/public") do
|
12
|
+
# Add a static page
|
13
|
+
add '/about'
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
:change_frequency => 'daily',
|
22
|
-
:priority => 0.5
|
23
|
-
)
|
24
|
-
|
25
|
-
# Add a static resource
|
26
|
-
sitemap.add_static('http://example.com/about', Time.now, 'monthly', 0.1)
|
15
|
+
# Add some URLs from your Rails application
|
16
|
+
Post.find(:all).each do |post|
|
17
|
+
add post_path(post)
|
18
|
+
end
|
27
19
|
|
28
|
-
|
29
|
-
|
20
|
+
# Add some URLs with additional options
|
21
|
+
Product.find(:all).each do |product|
|
22
|
+
add product_path(product), :change_frequency => 'daily', :priority => 0.5
|
23
|
+
end
|
24
|
+
end
|
30
25
|
|
31
|
-
The code above will create a minimum of
|
26
|
+
The code above will create a minimum of two files:
|
32
27
|
|
33
28
|
1. public/sitemaps/sitemap_index.xml.gz
|
34
|
-
2. public/sitemaps/
|
35
|
-
3. public/sitemaps/sitemap_posts.xml.gz
|
36
|
-
4. public/sitemaps/sitemap_static.xml.gz
|
29
|
+
2. public/sitemaps/sitemap.xml.gz
|
37
30
|
|
38
|
-
If your sitemaps grow beyond 50,000 URLs (this limit can be overridden with the <code>:max_per_sitemap</code> option), the sitemap files will be partitioned into multiple files (<code>
|
31
|
+
If your sitemaps grow beyond 50,000 URLs (this limit can be overridden with the <code>:max_per_sitemap</code> option), the sitemap files will be partitioned into multiple files (<code>sitemap_1.xml.gz</code>, <code>sitemap_2.xml.gz</code>, ...).
|
39
32
|
|
40
33
|
=== Framework-specific Classes
|
41
34
|
|
@@ -43,7 +36,7 @@ Use the framework-specific classes to take advantage of built-in shortcuts.
|
|
43
36
|
|
44
37
|
==== Rails
|
45
38
|
|
46
|
-
<code>BigSiteMapRails</code>
|
39
|
+
<code>BigSiteMapRails</code> deals with setting the <code>:document_root</code> and <code>:url_options</code> initialization options.
|
47
40
|
|
48
41
|
==== Merb
|
49
42
|
|
@@ -63,11 +56,9 @@ Via gem:
|
|
63
56
|
* <code>:base_url</code> -- string alternative to <code>:url_options</code>, e.g. <code>'https://example.com:8080/'</code>
|
64
57
|
* <code>:url_path</code> -- string path_name to sitemaps folder, defaults to <code>:document_path</code>
|
65
58
|
* <code>:document_root</code> -- string
|
66
|
-
* <code>:document_path</code> -- string document path
|
67
|
-
* <code>:path</code> -- string, alias for ":document_path" for legacy reasons
|
59
|
+
* <code>:document_path</code> -- string document path for sitemaps, relative to :document_root, defaults to empty string (putting sitemap files in the document root directory)
|
68
60
|
* <code>:document_full</code> -- string absolute document path to generation folder - defaults to <code>:document_root/:document_path</code>
|
69
61
|
* <code>:max_per_sitemap</code> -- <code>50000</code>, which is the limit dictated by Google but can be less
|
70
|
-
* <code>:batch_size</code> -- <code>1001</code> (not <code>1000</code> due to a bug in DataMapper)
|
71
62
|
* <code>:gzip</code> -- <code>true</code>
|
72
63
|
* <code>:ping_google</code> -- <code>true</code>
|
73
64
|
* <code>:ping_yahoo</code> -- <code>false</code>, needs <code>:yahoo_app_id</code>
|
@@ -75,80 +66,25 @@ Via gem:
|
|
75
66
|
* <code>:ping_ask</code> -- <code>false</code>
|
76
67
|
* <code>:partial_update</code> -- <code>false</code>
|
77
68
|
|
78
|
-
=== Chaining
|
79
|
-
|
80
|
-
You can chain methods together:
|
81
|
-
|
82
|
-
BigSitemap.new(:url_options => {:host => 'example.com'}).add(Post).generate
|
83
|
-
|
84
|
-
With the Rails-specific class, you could even get away with as little code as:
|
85
|
-
|
86
|
-
BigSitemapRails.new.add(Post).generate
|
87
|
-
|
88
|
-
=== Pinging Search Engines
|
89
|
-
|
90
|
-
To ping search engines, call <code>ping_search_engines</code> after you generate the sitemap:
|
91
|
-
|
92
|
-
sitemap.generate.ping_search_engines
|
93
|
-
|
94
|
-
=== Location URLs
|
95
|
-
|
96
|
-
By default, URLs for the "loc" values are generated in the form:
|
97
|
-
|
98
|
-
:base_url/:path|<table_name>/<to_param>|<id>
|
99
|
-
|
100
|
-
Alternatively, you can pass a lambda. For example, to make use of your Rails route helper:
|
101
|
-
|
102
|
-
sitemap.add(Post,
|
103
|
-
:location => lambda { |post| post_url(post) }
|
104
|
-
)
|
105
|
-
|
106
69
|
=== Change Frequency, Priority and Last Modified
|
107
70
|
|
108
|
-
You can control "changefreq", "priority" and "lastmod" values for each record individually by passing
|
109
|
-
|
110
|
-
sitemap.add(Post,
|
111
|
-
:change_frequency => lambda { |post| ... },
|
112
|
-
:priority => lambda { |post| ... },
|
113
|
-
:last_modified => lambda { |post| ... }
|
114
|
-
)
|
115
|
-
|
116
|
-
=== Find Methods
|
117
|
-
|
118
|
-
Your models must provide either a <code>find_for_sitemap</code> or <code>all</code> class method that returns the instances that are to be included in the sitemap.
|
119
|
-
|
120
|
-
Additionally, you models must provide a <code>count_for_sitemap</code> or <code>count</code> class method that returns a count of the instances to be included.
|
121
|
-
|
122
|
-
If you're using ActiveRecord (Rails) or DataMapper then <code>all</code> and <code>count</code> are already provided and you can make use of any supported parameter: (:conditions, :limit, :joins, :select, :order, :include, :group)
|
123
|
-
|
124
|
-
sitemap.add(Track,
|
125
|
-
:select => "id, permalink, user_id, updated_at",
|
126
|
-
:include => :user,
|
127
|
-
:conditions => "public = 1 AND state = 'finished' AND user_id IS NOT NULL",
|
128
|
-
:order => "id ASC"
|
129
|
-
)
|
71
|
+
You can control "changefreq", "priority" and "lastmod" values for each record individually by passing them as optional arguments when adding URLs:
|
130
72
|
|
131
|
-
|
73
|
+
add(product_path(product), {
|
74
|
+
:change_frequency => 'daily',
|
75
|
+
:priority => 0.5,
|
76
|
+
:last_modified => product.updated_at
|
77
|
+
})
|
132
78
|
|
133
79
|
=== Partial Update
|
134
80
|
|
135
|
-
If you enable <code>:partial_update</code>, the filename will include
|
136
|
-
|
137
|
-
=== Lock Generation Process
|
81
|
+
If you enable <code>:partial_update</code>, the filename will include the id of the first entry. This is perfect to update just the last file with new entries without the need to re-generate files being already there. You must pass the entry's id in when adding the URL. For example:
|
138
82
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
sitemap.generate
|
83
|
+
BigSitemap.generate(:base_url => 'http://example.com', :partial_update => true) do
|
84
|
+
Widget.find_in_batches(:conditions => "id > #{get_last_id}").each do |widget|
|
85
|
+
add widget_path(widget), :id => widget.id
|
143
86
|
end
|
144
|
-
|
145
|
-
=== Cleaning the Sitemaps Directory
|
146
|
-
|
147
|
-
Calling the <code>clean</code> method will remove all files from the Sitemaps directory.
|
148
|
-
|
149
|
-
== Limitations
|
150
|
-
|
151
|
-
If your database is likely to shrink during the time it takes to create the sitemap then you might run into problems (the final, batched SQL select will overrun by setting a limit that is too large since it is calculated from the count, which is queried at the very beginning). In this case and your database uses incremental primary IDs then you might want to use the <code>:partial_update</code> option, which looks at the last ID instead of paginating.
|
87
|
+
end
|
152
88
|
|
153
89
|
== TODO
|
154
90
|
|
data/Rakefile
CHANGED
@@ -15,8 +15,8 @@ rescue LoadError
|
|
15
15
|
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
16
16
|
end
|
17
17
|
|
18
|
-
require '
|
19
|
-
|
18
|
+
require 'rdoc/task'
|
19
|
+
RDoc::Task.new do |rdoc|
|
20
20
|
rdoc.rdoc_dir = 'rdoc'
|
21
21
|
rdoc.title = 'big_sitemap'
|
22
22
|
rdoc.options << '--line-numbers' << '--inline-source'
|
data/VERSION.yml
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
---
|
2
|
-
:major:
|
3
|
-
:minor:
|
4
|
-
:patch:
|
1
|
+
---
|
2
|
+
:major: 1
|
3
|
+
:minor: 0
|
4
|
+
:patch: 0
|
5
5
|
:build:
|
data/lib/big_sitemap.rb
CHANGED
@@ -6,31 +6,57 @@ require 'big_sitemap/builder'
|
|
6
6
|
class BigSitemap
|
7
7
|
DEFAULTS = {
|
8
8
|
:max_per_sitemap => Builder::MAX_URLS,
|
9
|
-
:batch_size => 1001,
|
10
|
-
:document_path => '
|
9
|
+
:batch_size => 1001, # TODO: Deprecate
|
10
|
+
:document_path => '/',
|
11
11
|
:gzip => true,
|
12
12
|
|
13
|
-
#
|
13
|
+
# Opinionated
|
14
14
|
:ping_google => true,
|
15
15
|
:ping_yahoo => false, # needs :yahoo_app_id
|
16
16
|
:ping_bing => false,
|
17
17
|
:ping_ask => false
|
18
18
|
}
|
19
19
|
|
20
|
+
# TODO: Deprecate
|
20
21
|
COUNT_METHODS = [:count_for_sitemap, :count]
|
21
22
|
FIND_METHODS = [:find_for_sitemap, :all]
|
22
23
|
TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created]
|
23
24
|
PARAM_METHODS = [:to_param, :id]
|
24
25
|
|
26
|
+
class << self
|
27
|
+
def generate(options={}, &block)
|
28
|
+
@sitemap = self.new(options)
|
29
|
+
|
30
|
+
@sitemap.first_id_of_last_sitemap = first_id_of_last_sitemap
|
31
|
+
|
32
|
+
instance_eval(&block)
|
33
|
+
|
34
|
+
@sitemap.with_lock do
|
35
|
+
@sitemap.generate(options)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def first_id_of_last_sitemap
|
42
|
+
Dir["#{@sitemap.document_full}sitemap*.{xml,xml.gz}"].map do |file|
|
43
|
+
file.to_s.scan(/sitemap_(.+).xml/).flatten.last.to_i
|
44
|
+
end.sort.last
|
45
|
+
end
|
46
|
+
|
47
|
+
def add(path, options={})
|
48
|
+
@sitemap.add_path(path, options)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
25
52
|
def initialize(options={})
|
26
53
|
@options = DEFAULTS.merge options
|
27
|
-
@options[:document_path] ||= @options[:path] #for legacy reasons
|
28
54
|
|
29
55
|
if @options[:max_per_sitemap] <= 1
|
30
56
|
raise ArgumentError, '":max_per_sitemap" must be greater than 1'
|
31
57
|
end
|
32
58
|
|
33
|
-
if @options[:url_options]
|
59
|
+
if @options[:url_options] && !@options[:base_url]
|
34
60
|
@options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s
|
35
61
|
end
|
36
62
|
|
@@ -39,8 +65,8 @@ class BigSitemap
|
|
39
65
|
end
|
40
66
|
@options[:url_path] ||= @options[:document_path]
|
41
67
|
|
42
|
-
|
43
|
-
raise ArgumentError, '
|
68
|
+
unless @options[:document_root]
|
69
|
+
raise ArgumentError, 'Document root must be specified with the ":document_root" option"'
|
44
70
|
end
|
45
71
|
|
46
72
|
@options[:document_full] ||= File.join(@options[:document_root], @options[:document_path])
|
@@ -55,7 +81,20 @@ class BigSitemap
|
|
55
81
|
@sitemap_files = []
|
56
82
|
end
|
57
83
|
|
84
|
+
def first_id_of_last_sitemap
|
85
|
+
@first_id_of_last_sitemap
|
86
|
+
end
|
87
|
+
|
88
|
+
def first_id_of_last_sitemap=(first_id)
|
89
|
+
@first_id_of_last_sitemap = first_id
|
90
|
+
end
|
91
|
+
|
92
|
+
def document_full
|
93
|
+
@options[:document_full]
|
94
|
+
end
|
95
|
+
|
58
96
|
def add(model, options={})
|
97
|
+
warn 'BigSitemap#add is deprecated. Please use BigSitemap.generate and call add inside the block (in BigSitemap 1.0.0+). You will have to perform the find and generate the path for each record yourself.'
|
59
98
|
@models << model
|
60
99
|
|
61
100
|
filename_suffix = @models.count(model) - 1
|
@@ -72,7 +111,14 @@ class BigSitemap
|
|
72
111
|
self
|
73
112
|
end
|
74
113
|
|
114
|
+
def add_path(path, options)
|
115
|
+
@paths ||= []
|
116
|
+
@paths << [path, options]
|
117
|
+
self
|
118
|
+
end
|
119
|
+
|
75
120
|
def add_static(url, time = nil, frequency = nil, priority = nil)
|
121
|
+
warn 'BigSitemap#add_static is deprecated. Please use BigSitemap#add_path instead'
|
76
122
|
@static_pages ||= []
|
77
123
|
@static_pages << [url, time, frequency, priority]
|
78
124
|
self
|
@@ -89,38 +135,120 @@ class BigSitemap
|
|
89
135
|
STDERR.puts 'Lockfile exists' if $VERBOSE
|
90
136
|
end
|
91
137
|
|
92
|
-
def
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
name = table_name(name) unless name.is_a? String
|
98
|
-
File.join(@options[:document_full], "sitemap_#{name}")
|
138
|
+
def file_name(name=nil)
|
139
|
+
name = table_name(name) unless (name.nil? || name.is_a?(String))
|
140
|
+
prefix = 'sitemap'
|
141
|
+
prefix << '_' unless name.nil?
|
142
|
+
File.join(@options[:document_full], "#{prefix}#{name}")
|
99
143
|
end
|
100
144
|
|
101
145
|
def dir_files
|
102
|
-
File.join(@options[:document_full], "
|
146
|
+
File.join(@options[:document_full], "sitemap*.{xml,xml.gz}")
|
103
147
|
end
|
104
148
|
|
105
149
|
def clean
|
106
150
|
Dir[dir_files].each do |file|
|
107
151
|
FileUtils.rm file
|
108
152
|
end
|
153
|
+
|
109
154
|
self
|
110
155
|
end
|
111
156
|
|
112
|
-
|
157
|
+
# TODO: Deprecate (move to private)
|
158
|
+
def generate(options={})
|
159
|
+
clean unless options[:partial_update]
|
160
|
+
|
161
|
+
# TODO: Ddeprecate
|
113
162
|
prepare_update
|
114
163
|
|
164
|
+
add_urls
|
165
|
+
|
166
|
+
# TODO: Deprecate
|
115
167
|
generate_models
|
116
168
|
generate_static
|
169
|
+
|
117
170
|
generate_sitemap_index
|
171
|
+
|
172
|
+
ping_search_engines
|
173
|
+
|
174
|
+
self
|
175
|
+
end
|
176
|
+
|
177
|
+
def add_urls
|
178
|
+
return self if Array(@paths).empty?
|
179
|
+
|
180
|
+
with_sitemap do |builder|
|
181
|
+
@paths.each do |path, options|
|
182
|
+
url = File.join @options[:base_url], File.basename(path)
|
183
|
+
builder.add_url! url, options
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
118
187
|
self
|
119
188
|
end
|
120
189
|
|
190
|
+
# Create a sitemap index document
|
191
|
+
def generate_sitemap_index(files=nil)
|
192
|
+
files ||= Dir[dir_files]
|
193
|
+
|
194
|
+
with_sitemap({:name => 'index', :type => 'index'}) do |sitemap|
|
195
|
+
for path in files
|
196
|
+
next if path =~ /index/
|
197
|
+
sitemap.add_url! url_for_sitemap(path), :last_modified => File.stat(path).mtime
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
self
|
202
|
+
end
|
203
|
+
|
204
|
+
def ping_search_engines
|
205
|
+
require 'net/http'
|
206
|
+
require 'cgi'
|
207
|
+
|
208
|
+
sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last))
|
209
|
+
|
210
|
+
if @options[:ping_google]
|
211
|
+
Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}")
|
212
|
+
end
|
213
|
+
|
214
|
+
if @options[:ping_yahoo]
|
215
|
+
if @options[:yahoo_app_id]
|
216
|
+
Net::HTTP.get(
|
217
|
+
'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" +
|
218
|
+
"appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
|
219
|
+
)
|
220
|
+
else
|
221
|
+
STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
if @options[:ping_bing]
|
226
|
+
Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}")
|
227
|
+
end
|
228
|
+
|
229
|
+
if @options[:ping_ask]
|
230
|
+
Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# TODO: Deprecate
|
235
|
+
def get_last_id(filename)
|
236
|
+
Dir["#{filename}*.{xml,xml.gz}"].map do |file|
|
237
|
+
file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
|
238
|
+
end.sort.last
|
239
|
+
end
|
240
|
+
|
241
|
+
private
|
242
|
+
|
243
|
+
# TODO: Deprecate
|
244
|
+
def table_name(model)
|
245
|
+
model.table_name
|
246
|
+
end
|
247
|
+
|
248
|
+
# TODO: Deprecate
|
121
249
|
def generate_models
|
122
250
|
for model, options in @sources
|
123
|
-
with_sitemap(
|
251
|
+
with_sitemap(options.dup.merge({:name => model})) do |sitemap|
|
124
252
|
last_id = nil #id of last processed item
|
125
253
|
count_method = pick_method(model, COUNT_METHODS)
|
126
254
|
find_method = pick_method(model, FIND_METHODS)
|
@@ -132,7 +260,8 @@ class BigSitemap
|
|
132
260
|
find_options[key] = options.delete(key)
|
133
261
|
end
|
134
262
|
|
135
|
-
|
263
|
+
primary_method = options.delete(:primary_column)
|
264
|
+
primary_column = "#{table_name(model)}.#{primary_method}"
|
136
265
|
|
137
266
|
count = model.send(count_method, find_options.merge(:select => (primary_column || '*'), :include => nil))
|
138
267
|
count = find_options[:limit].to_i if find_options[:limit] && find_options[:limit].to_i < count
|
@@ -179,14 +308,20 @@ class BigSitemap
|
|
179
308
|
File.join @options[:base_url], options[:path], record.send(param_method).to_s
|
180
309
|
end
|
181
310
|
|
182
|
-
change_frequency = options[:change_frequency]
|
311
|
+
change_frequency = options[:change_frequency]
|
183
312
|
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
|
184
313
|
|
185
314
|
priority = options[:priority]
|
186
315
|
pri = priority.is_a?(Proc) ? priority.call(record) : priority
|
187
316
|
|
188
|
-
last_id = primary_column ? record.send(
|
189
|
-
|
317
|
+
last_id = primary_column ? record.send(primary_method) : nil
|
318
|
+
|
319
|
+
sitemap.add_url!(location, {
|
320
|
+
:last_modified => last_mod,
|
321
|
+
:change_frequency => freq,
|
322
|
+
:priority => pri,
|
323
|
+
:part_number => last_id
|
324
|
+
}) if location
|
190
325
|
end
|
191
326
|
end
|
192
327
|
end
|
@@ -195,66 +330,28 @@ class BigSitemap
|
|
195
330
|
self
|
196
331
|
end
|
197
332
|
|
333
|
+
# TODO: Deprecate
|
198
334
|
def generate_static
|
199
335
|
return self if Array(@static_pages).empty?
|
200
|
-
with_sitemap('static', :type => 'static') do |sitemap|
|
336
|
+
with_sitemap({:name => 'static', :type => 'static'}) do |sitemap|
|
201
337
|
@static_pages.each do |location, last_mod, freq, pri|
|
202
|
-
sitemap.add_url!(location,
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
# Create a sitemap index document
|
209
|
-
def generate_sitemap_index(files = nil)
|
210
|
-
files ||= Dir[dir_files]
|
211
|
-
with_sitemap 'index', :type => 'index' do |sitemap|
|
212
|
-
for path in files
|
213
|
-
next if path =~ /index/
|
214
|
-
sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime)
|
338
|
+
sitemap.add_url!(location, {
|
339
|
+
:last_modified => last_mod,
|
340
|
+
:change_frequency => freq,
|
341
|
+
:priority => pri
|
342
|
+
})
|
215
343
|
end
|
216
344
|
end
|
217
345
|
self
|
218
346
|
end
|
219
347
|
|
220
|
-
|
221
|
-
require 'net/http'
|
222
|
-
require 'cgi'
|
223
|
-
|
224
|
-
sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last))
|
225
|
-
|
226
|
-
if @options[:ping_google]
|
227
|
-
Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}")
|
228
|
-
end
|
229
|
-
|
230
|
-
if @options[:ping_yahoo]
|
231
|
-
if @options[:yahoo_app_id]
|
232
|
-
Net::HTTP.get(
|
233
|
-
'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" +
|
234
|
-
"appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
|
235
|
-
)
|
236
|
-
else
|
237
|
-
STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
if @options[:ping_bing]
|
242
|
-
Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}")
|
243
|
-
end
|
244
|
-
|
245
|
-
if @options[:ping_ask]
|
246
|
-
Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
|
247
|
-
end
|
248
|
-
end
|
249
|
-
|
250
|
-
private
|
251
|
-
|
348
|
+
# TODO: Deprecate
|
252
349
|
def prepare_update
|
253
350
|
@files_to_move = []
|
254
351
|
@sources.each do |model, options|
|
255
352
|
if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename]))
|
256
353
|
primary_column_value = escape_if_string last_id #escape '
|
257
|
-
options[:conditions] = [options[:conditions], "(#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
|
354
|
+
options[:conditions] = [options[:conditions], "(#{table_name(model)}.#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
|
258
355
|
options[:start_part_id] = last_id
|
259
356
|
end
|
260
357
|
end
|
@@ -270,12 +367,14 @@ class BigSitemap
|
|
270
367
|
FileUtils.rm lock_file
|
271
368
|
end
|
272
369
|
|
273
|
-
def with_sitemap(
|
274
|
-
options[:filename]
|
275
|
-
options[:type]
|
276
|
-
options[:max_urls]
|
277
|
-
options[:gzip]
|
278
|
-
options[:indent]
|
370
|
+
def with_sitemap(options={})
|
371
|
+
options[:filename] ||= file_name(options[:name])
|
372
|
+
options[:type] ||= 'sitemap'
|
373
|
+
options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym]
|
374
|
+
options[:gzip] ||= @options[:gzip]
|
375
|
+
options[:indent] ||= 2
|
376
|
+
options[:partial_update] ||= @options[:partial_update]
|
377
|
+
options[:start_part_id] ||= first_id_of_last_sitemap
|
279
378
|
|
280
379
|
sitemap = if options[:type] == 'index'
|
281
380
|
IndexBuilder.new(options)
|
@@ -290,16 +389,10 @@ class BigSitemap
|
|
290
389
|
yield sitemap
|
291
390
|
ensure
|
292
391
|
sitemap.close!
|
293
|
-
@sitemap_files.concat sitemap.
|
392
|
+
@sitemap_files.concat sitemap.filepaths!
|
294
393
|
end
|
295
394
|
end
|
296
395
|
|
297
|
-
def get_last_id(filename)
|
298
|
-
Dir["#{filename}*.{xml,xml.gz}"].map do |file|
|
299
|
-
file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
|
300
|
-
end.sort.last
|
301
|
-
end
|
302
|
-
|
303
396
|
def pick_method(model, candidates)
|
304
397
|
method = nil
|
305
398
|
candidates.each do |candidate|
|
@@ -311,6 +404,7 @@ class BigSitemap
|
|
311
404
|
method
|
312
405
|
end
|
313
406
|
|
407
|
+
# TODO: Deprecate
|
314
408
|
def escape_if_string(value)
|
315
409
|
(value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'"
|
316
410
|
end
|
@@ -323,38 +417,21 @@ end
|
|
323
417
|
|
324
418
|
|
325
419
|
class BigSitemapRails < BigSitemap
|
326
|
-
|
327
|
-
|
328
|
-
include ActionController::UrlWriter
|
329
|
-
end
|
330
|
-
|
331
|
-
def initialize(options={})
|
332
|
-
raise "No Rails Environment loaded" unless defined? Rails
|
333
|
-
require 'action_controller'
|
334
|
-
|
335
|
-
if Rails.version >= "3"
|
336
|
-
self.class.send(:include, Rails.application.routes.url_helpers)
|
337
|
-
end
|
420
|
+
def self.generate(options={}, &block)
|
421
|
+
raise 'No Rails Environment loaded' unless defined? Rails
|
338
422
|
|
339
423
|
DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options)
|
340
|
-
super(options)
|
424
|
+
super(options, &block)
|
341
425
|
end
|
342
|
-
|
343
426
|
end
|
344
427
|
|
345
428
|
|
346
429
|
class BigSitemapMerb < BigSitemap
|
347
|
-
|
348
|
-
|
349
|
-
raise "No Merb Environment loaded" unless defined? Merb
|
430
|
+
def self.generate(options={}, &block)
|
431
|
+
raise 'No Merb Environment loaded' unless defined? ::Merb
|
350
432
|
require 'extlib'
|
351
433
|
|
352
434
|
DEFAULTS.merge!(:document_root => "#{Merb.root}/public")
|
353
|
-
super(options)
|
435
|
+
super(options, &block)
|
354
436
|
end
|
355
|
-
|
356
|
-
def table_name(model)
|
357
|
-
Extlib::Inflection.tableize(model.to_s)
|
358
|
-
end
|
359
|
-
|
360
437
|
end
|