sc-big_sitemap 0.8.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/Gemfile.lock +23 -0
- data/History.txt +88 -0
- data/LICENSE +22 -0
- data/README.rdoc +179 -0
- data/Rakefile +69 -0
- data/VERSION +1 -0
- data/lib/big_sitemap.rb +367 -0
- data/lib/big_sitemap/builder.rb +184 -0
- data/sc-big_sitemap.gemspec +22 -0
- data/test/big_sitemap_test.rb +485 -0
- data/test/fixtures/test_model.rb +48 -0
- data/test/test_helper.rb +117 -0
- metadata +114 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
big_sitemap (0.8.3.3)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
mocha (0.9.10)
|
10
|
+
rake
|
11
|
+
nokogiri (1.4.4)
|
12
|
+
rake (0.8.7)
|
13
|
+
shoulda (2.11.3)
|
14
|
+
|
15
|
+
PLATFORMS
|
16
|
+
ruby
|
17
|
+
|
18
|
+
DEPENDENCIES
|
19
|
+
big_sitemap!
|
20
|
+
bundler
|
21
|
+
mocha
|
22
|
+
nokogiri
|
23
|
+
shoulda
|
data/History.txt
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
=== 0.8.3.1 / 2011-05-26
|
2
|
+
|
3
|
+
* added support for absolute :url_path
|
4
|
+
* added first basic mobile sitemap support
|
5
|
+
|
6
|
+
=== 0.8.3 / 2011-03-08
|
7
|
+
|
8
|
+
* Separate URL and file paths are now supported via the :document_path
|
9
|
+
and :url_path options
|
10
|
+
* Fixes an issue when initializing in Rails 3
|
11
|
+
|
12
|
+
=== 0.8.2 / 2011-01-25
|
13
|
+
|
14
|
+
* Fixes an issue where sitemap files were not being generated if the same model
|
15
|
+
was added more than once (fixes issue #5: https://github.com/alexrabarts/big_sitemap/issues/#issue/5)
|
16
|
+
|
17
|
+
=== 0.8.1 / 2011-01-25
|
18
|
+
|
19
|
+
* API change: Rails/Merb are no longer automatically detected - use BigSitemapRails and BigSitemapMerb instead
|
20
|
+
* API change: Rails' polymorphic_url helper is no longer used to generate URLs (use a lambda with the new :location option instead)
|
21
|
+
* Static resources can now be added using the add_static method
|
22
|
+
* Incremental updates are now available via the :partial_update option
|
23
|
+
* "loc" URL values can now be generated with lambdas
|
24
|
+
* Sitemap files can now be locked while being generated using the with_lock method
|
25
|
+
* Several bug fixes
|
26
|
+
|
27
|
+
=== 0.5.1 / 2009-09-07
|
28
|
+
|
29
|
+
* Fixes an issue with the :last_modified key being passed into the find method options
|
30
|
+
|
31
|
+
=== 0.5.0 / 2009-09-07
|
32
|
+
|
33
|
+
* Add support for lambdas when specifying lastmod
|
34
|
+
|
35
|
+
=== 0.4.0 / 2009-08-09
|
36
|
+
|
37
|
+
* Use Bing instead of Live/MSN. Note, this breaks backwards compatibility as
|
38
|
+
the old :ping_msn option is now :ping_bing.
|
39
|
+
|
40
|
+
=== 0.3.5 / 2009-08-05
|
41
|
+
|
42
|
+
* Fixed bugs in root_url generation and url_for_sitemap generation
|
43
|
+
|
44
|
+
=== 0.3.4 / 2009-07-02
|
45
|
+
|
46
|
+
* BigSitemap-specific options are no longer passed through to the ORM's find method
|
47
|
+
|
48
|
+
=== 0.3.2 / 2009-06-09
|
49
|
+
|
50
|
+
* Better handling of URLs when Rails' polymorphic_url isn't available in the model
|
51
|
+
|
52
|
+
=== 0.3.2 / 2009-06-09
|
53
|
+
|
54
|
+
* Fixes "uninitialized constant ActionController" error
|
55
|
+
* Fixes "Unknown key(s): path" error
|
56
|
+
|
57
|
+
=== 0.3.1 / 2009-04-18
|
58
|
+
|
59
|
+
* Fixes broken gemspec
|
60
|
+
|
61
|
+
=== 0.3.0 / 2009-04-06
|
62
|
+
|
63
|
+
* API change: Pass model through as first argument to add method, e.g.sitemap.add(Posts, {:path => 'articles'})
|
64
|
+
* API change: Use Rails' polymorphic_url helper to generate URLs if Rails is being used
|
65
|
+
* API change: Only ping search engines when ping_search_engines is explicitly called
|
66
|
+
* Add support for passing options through to the model's find method, e.g. :conditions
|
67
|
+
* Allow base URL to be specified as a hash as well as a string
|
68
|
+
* Add support for changefreq and priority
|
69
|
+
* Pluralize sitemap model filenames
|
70
|
+
* GZipping may optionally be turned off
|
71
|
+
|
72
|
+
=== 0.2.1 / 2009-03-12
|
73
|
+
|
74
|
+
* Normalize path arguments so it no longer matters whether a leading slash is used or not
|
75
|
+
|
76
|
+
=== 0.2.0 / 2009-03-11
|
77
|
+
|
78
|
+
* Methods are now chainable
|
79
|
+
|
80
|
+
=== 0.1.4 / 2009-03-11
|
81
|
+
|
82
|
+
* Add clean method to clear out Sitemaps directory
|
83
|
+
* Make methods chainable
|
84
|
+
|
85
|
+
=== 0.1.3 / 2009-03-10
|
86
|
+
|
87
|
+
* Initial release
|
88
|
+
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2009 Stateless Systems (http://statelesssystems.com)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
= BigSitemap
|
2
|
+
|
3
|
+
BigSitemap is a {Sitemap}[http://sitemaps.org] generator suitable for applications with greater than 50,000 URLs. It splits large Sitemaps into multiple files, gzips the files to minimize bandwidth usage, batches database queries to minimize memory usage, supports increment updates, can be set up with just a few lines of code and is compatible with just about any framework.
|
4
|
+
|
5
|
+
BigSitemap is best run periodically through a Rake/Thor task.
|
6
|
+
|
7
|
+
require 'big_sitemap'
|
8
|
+
|
9
|
+
sitemap = BigSitemap.new(
|
10
|
+
:url_options => {:host => 'example.com'},
|
11
|
+
:document_root => "#{APP_ROOT}/public"
|
12
|
+
)
|
13
|
+
|
14
|
+
# Add a model
|
15
|
+
sitemap.add Product
|
16
|
+
|
17
|
+
# Add another model with some options
|
18
|
+
sitemap.add(Post,
|
19
|
+
:conditions => {:published => true},
|
20
|
+
:path => 'articles',
|
21
|
+
:change_frequency => 'daily',
|
22
|
+
:priority => 0.5
|
23
|
+
)
|
24
|
+
|
25
|
+
# Add a static resource
|
26
|
+
sitemap.add_static('http://example.com/about', Time.now, 'monthly', 0.1)
|
27
|
+
|
28
|
+
# Generate the files
|
29
|
+
sitemap.generate
|
30
|
+
|
31
|
+
The code above will create a minimum of four files:
|
32
|
+
|
33
|
+
1. public/sitemaps/sitemap_index.xml.gz
|
34
|
+
2. public/sitemaps/sitemap_products.xml.gz
|
35
|
+
3. public/sitemaps/sitemap_posts.xml.gz
|
36
|
+
4. public/sitemaps/sitemap_static.xml.gz
|
37
|
+
|
38
|
+
If your sitemaps grow beyond 50,000 URLs (this limit can be overridden with the <code>:max_per_sitemap</code> option), the sitemap files will be partitioned into multiple files (<code>sitemap_products_1.xml.gz</code>, <code>sitemap_products_2.xml.gz</code>, ...).
|
39
|
+
|
40
|
+
=== Framework-specific Classes
|
41
|
+
|
42
|
+
Use the framework-specific classes to take advantage of built-in shortcuts.
|
43
|
+
|
44
|
+
==== Rails
|
45
|
+
|
46
|
+
<code>BigSiteMapRails</code> includes <code>UrlWriter</code> (useful for making use of your Rails routes - see the Location URLs section) and deals with setting the <code>:document_root</code> and <code>:url_options</code> initialization options.
|
47
|
+
|
48
|
+
==== Merb
|
49
|
+
|
50
|
+
<code>BigSitemapMerb</code> deals with setting the <code>:document_root</code> initialization option.
|
51
|
+
|
52
|
+
== Install
|
53
|
+
|
54
|
+
Via gem:
|
55
|
+
|
56
|
+
sudo gem install big_sitemap
|
57
|
+
|
58
|
+
== Advanced
|
59
|
+
|
60
|
+
=== Initialization Options
|
61
|
+
|
62
|
+
* <code>:url_options</code> -- hash with <code>:host</code>, optionally <code>:port</code> and <code>:protocol</code>
|
63
|
+
* <code>:base_url</code> -- string alternative to <code>:url_options</code>, e.g. <code>'https://example.com:8080/'</code>
|
64
|
+
* <code>:url_path</code> -- string path_name to sitemaps folder, defaults to <code>:document_path</code>
|
65
|
+
* <code>:document_root</code> -- string
|
66
|
+
* <code>:document_path</code> -- string document path to generation folder, relative to :document_root, defaults to <code>'sitemaps/'</code>
|
67
|
+
* <code>:path</code> -- string, alias for ":document_path" for legacy reasons
|
68
|
+
* <code>:document_full</code> -- string absolute document path to generation folder - defaults to <code>:document_root/:document_path</code>
|
69
|
+
* <code>:max_per_sitemap</code> -- <code>50000</code>, which is the limit dictated by Google but can be less
|
70
|
+
* <code>:batch_size</code> -- <code>1001</code> (not <code>1000</code> due to a bug in DataMapper)
|
71
|
+
* <code>:gzip</code> -- <code>true</code>
|
72
|
+
* <code>:ping_google</code> -- <code>true</code>
|
73
|
+
* <code>:ping_yahoo</code> -- <code>false</code>, needs <code>:yahoo_app_id</code>
|
74
|
+
* <code>:ping_bing</code> -- <code>false</code>
|
75
|
+
* <code>:ping_ask</code> -- <code>false</code>
|
76
|
+
* <code>:partial_update</code> -- <code>false</code>
|
77
|
+
|
78
|
+
=== Chaining
|
79
|
+
|
80
|
+
You can chain methods together:
|
81
|
+
|
82
|
+
BigSitemap.new(:url_options => {:host => 'example.com'}).add(Post).generate
|
83
|
+
|
84
|
+
With the Rails-specific class, you could even get away with as little code as:
|
85
|
+
|
86
|
+
BigSitemapRails.new.add(Post).generate
|
87
|
+
|
88
|
+
=== Pinging Search Engines
|
89
|
+
|
90
|
+
To ping search engines, call <code>ping_search_engines</code> after you generate the sitemap:
|
91
|
+
|
92
|
+
sitemap.generate.ping_search_engines
|
93
|
+
|
94
|
+
=== Location URLs
|
95
|
+
|
96
|
+
By default, URLs for the "loc" values are generated in the form:
|
97
|
+
|
98
|
+
:base_url/:path|<table_name>/<to_param>|<id>
|
99
|
+
|
100
|
+
Alternatively, you can pass a lambda. For example, to make use of your Rails route helper:
|
101
|
+
|
102
|
+
sitemap.add(Post,
|
103
|
+
:location => lambda { |post| post_url(post) }
|
104
|
+
)
|
105
|
+
|
106
|
+
=== Mobile Sitemap support
|
107
|
+
|
108
|
+
To create a mobile Sitemap for a model, just pass <code>:mobile => true</code>
|
109
|
+
|
110
|
+
sitemap.add(Post,
|
111
|
+
:location => lambda { |post| post_url(post) },
|
112
|
+
:mobile => true
|
113
|
+
)
|
114
|
+
|
115
|
+
=== Change Frequency, Priority and Last Modified
|
116
|
+
|
117
|
+
You can control "changefreq", "priority" and "lastmod" values for each record individually by passing lambdas instead of fixed values:
|
118
|
+
|
119
|
+
sitemap.add(Post,
|
120
|
+
:change_frequency => lambda { |post| ... },
|
121
|
+
:priority => lambda { |post| ... },
|
122
|
+
:last_modified => lambda { |post| ... }
|
123
|
+
)
|
124
|
+
|
125
|
+
=== Find Methods
|
126
|
+
|
127
|
+
Your models must provide either a <code>find_for_sitemap</code> or <code>all</code> class method that returns the instances that are to be included in the sitemap.
|
128
|
+
|
129
|
+
Additionally, you models must provide a <code>count_for_sitemap</code> or <code>count</code> class method that returns a count of the instances to be included.
|
130
|
+
|
131
|
+
If you're using ActiveRecord (Rails) or DataMapper then <code>all</code> and <code>count</code> are already provided and you can make use of any supported parameter: (:conditions, :limit, :joins, :select, :order, :include, :group)
|
132
|
+
|
133
|
+
sitemap.add(Track,
|
134
|
+
:select => "id, permalink, user_id, updated_at",
|
135
|
+
:include => :user,
|
136
|
+
:conditions => "public = 1 AND state = 'finished' AND user_id IS NOT NULL",
|
137
|
+
:order => "id ASC"
|
138
|
+
)
|
139
|
+
|
140
|
+
If you provide your own <code>find_for_sitemap</code> or <code>all</code> method then it should be able to handle the <code>:offset</code> and <code>:limit</code> options, in the same way that ActiveRecord and DataMapper handle them. This is especially important if you have more than 50,000 URLs.
|
141
|
+
|
142
|
+
=== Partial Update
|
143
|
+
|
144
|
+
If you enable <code>:partial_update</code>, the filename will include an id smaller than the id of the first entry. This is perfect to update just the last file with new entries without the need to re-generate files being already there.
|
145
|
+
|
146
|
+
=== Lock Generation Process
|
147
|
+
|
148
|
+
To prevent another process overwriting from the generated files, use the <code>with_lock</code> method:
|
149
|
+
|
150
|
+
sitemap.with_lock do
|
151
|
+
sitemap.generate
|
152
|
+
end
|
153
|
+
|
154
|
+
=== Cleaning the Sitemaps Directory
|
155
|
+
|
156
|
+
Calling the <code>clean</code> method will remove all files from the Sitemaps directory.
|
157
|
+
|
158
|
+
== Limitations
|
159
|
+
|
160
|
+
If your database is likely to shrink during the time it takes to create the sitemap then you might run into problems (the final, batched SQL select will overrun by setting a limit that is too large since it is calculated from the count, which is queried at the very beginning). In this case and your database uses incremental primary IDs then you might want to use the <code>:partial_update</code> option, which looks at the last ID instead of paginating.
|
161
|
+
|
162
|
+
== TODO
|
163
|
+
|
164
|
+
Tests for framework-specific components.
|
165
|
+
|
166
|
+
== Credits
|
167
|
+
|
168
|
+
Thanks to Alastair Brunton and Harry Love, who's work provided a starting point for this library.
|
169
|
+
|
170
|
+
Thanks also to those who have contributed patches:
|
171
|
+
|
172
|
+
* Mislav Marohnić
|
173
|
+
* Jeff Schoolcraft
|
174
|
+
* Dalibor Nasevic
|
175
|
+
* Tobias Bielohlawek, SoundCloud Ltd. {http://www.rngtng.com}[http://www.rngtng.com]
|
176
|
+
|
177
|
+
== Copyright
|
178
|
+
|
179
|
+
Copyright (c) 2010 Stateless Systems (http://statelesssystems.com). See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
|
2
|
+
#############################################################################
|
3
|
+
#
|
4
|
+
# Helper functions
|
5
|
+
#
|
6
|
+
#############################################################################
|
7
|
+
|
8
|
+
def name
|
9
|
+
@name ||= Dir['*.gemspec'].first.split('.').first
|
10
|
+
end
|
11
|
+
|
12
|
+
def gemspec_file
|
13
|
+
"#{name}.gemspec"
|
14
|
+
end
|
15
|
+
|
16
|
+
def gemspec
|
17
|
+
@gemspec ||= eval(IO.read(gemspec_file))
|
18
|
+
end
|
19
|
+
|
20
|
+
def gem_file
|
21
|
+
gemspec.file_name
|
22
|
+
end
|
23
|
+
|
24
|
+
#############################################################################
|
25
|
+
#
|
26
|
+
# Standard tasks
|
27
|
+
#
|
28
|
+
#############################################################################
|
29
|
+
|
30
|
+
desc "Deploys the builded gem to the soundcloud gem repository: gems.soundcloud.com"
|
31
|
+
task :release => :build do
|
32
|
+
remote_gem_host = 'soundcloud@gems.int.s-cloud.net'
|
33
|
+
remote_gem_path = '/srv/www/gems'
|
34
|
+
Dir.chdir File.dirname(__FILE__)
|
35
|
+
unless `git branch` =~ /^\* master$/
|
36
|
+
puts "You must be on the master branch to release!"
|
37
|
+
exit!
|
38
|
+
end
|
39
|
+
if `git fetch --tags && git tag`.split(/\n/).include?(gem_file)
|
40
|
+
raise "Version #{gem_file} already deployed"
|
41
|
+
end
|
42
|
+
sh <<-END
|
43
|
+
git commit -a --allow-empty -m 'Release #{gem_file}'
|
44
|
+
git tag -a #{gem_file} -m 'Version #{gem_file}'
|
45
|
+
git push origin master
|
46
|
+
git push origin --tags
|
47
|
+
scp pkg/#{gem_file} #{remote_gem_host}:#{remote_gem_path}/gems && \
|
48
|
+
ssh #{remote_gem_host} 'gem generate_index -d #{remote_gem_path}'
|
49
|
+
END
|
50
|
+
end
|
51
|
+
|
52
|
+
require 'bundler/gem_tasks'
|
53
|
+
|
54
|
+
require 'rdoc/task'
|
55
|
+
RDoc::Task.new do |rd|
|
56
|
+
rd.main = "README.rdoc"
|
57
|
+
rd.title = 'big_sitemap'
|
58
|
+
rd.options << '--line-numbers' << '--inline-source'
|
59
|
+
rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
|
60
|
+
end
|
61
|
+
|
62
|
+
require 'rake/testtask'
|
63
|
+
Rake::TestTask.new(:test) do |t|
|
64
|
+
t.libs << 'lib' << 'test' << Rake.original_dir
|
65
|
+
t.pattern = 'test/**/*_test.rb'
|
66
|
+
t.verbose = false
|
67
|
+
end
|
68
|
+
|
69
|
+
task :default => :test
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.8.3.4
|
data/lib/big_sitemap.rb
ADDED
@@ -0,0 +1,367 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
require 'big_sitemap/builder'
|
5
|
+
|
6
|
+
class BigSitemap
|
7
|
+
DEFAULTS = {
|
8
|
+
:max_per_sitemap => Builder::MAX_URLS,
|
9
|
+
:batch_size => 1001,
|
10
|
+
:document_path => 'sitemaps/',
|
11
|
+
:gzip => true,
|
12
|
+
|
13
|
+
# opinionated
|
14
|
+
:ping_google => true,
|
15
|
+
:ping_yahoo => false, # needs :yahoo_app_id
|
16
|
+
:ping_bing => false,
|
17
|
+
:ping_ask => false
|
18
|
+
}
|
19
|
+
|
20
|
+
COUNT_METHODS = [:count_for_sitemap, :count]
|
21
|
+
FIND_METHODS = [:find_for_sitemap, :all]
|
22
|
+
TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created]
|
23
|
+
PARAM_METHODS = [:to_param, :id]
|
24
|
+
|
25
|
+
def initialize(options={})
|
26
|
+
@options = DEFAULTS.merge options
|
27
|
+
@options[:document_path] ||= @options[:path] #for legacy reasons
|
28
|
+
|
29
|
+
if @options[:max_per_sitemap] <= 1
|
30
|
+
raise ArgumentError, '":max_per_sitemap" must be greater than 1'
|
31
|
+
end
|
32
|
+
|
33
|
+
if @options[:url_options]
|
34
|
+
@options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s
|
35
|
+
end
|
36
|
+
|
37
|
+
unless @options[:base_url]
|
38
|
+
raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string'
|
39
|
+
end
|
40
|
+
@options[:url_path] ||= @options[:document_path]
|
41
|
+
@options[:url_path] = File.join(@options[:base_url], @options[:url_path]) unless @options[:url_path].include?('http')
|
42
|
+
|
43
|
+
if @options[:batch_size] > @options[:max_per_sitemap]
|
44
|
+
raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"'
|
45
|
+
end
|
46
|
+
|
47
|
+
@options[:document_full] ||= File.join(@options[:document_root], @options[:document_path])
|
48
|
+
unless @options[:document_full]
|
49
|
+
raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"'
|
50
|
+
end
|
51
|
+
|
52
|
+
Dir.mkdir(@options[:document_full]) unless File.exists?(@options[:document_full])
|
53
|
+
|
54
|
+
@sources = []
|
55
|
+
@models = []
|
56
|
+
@sitemap_files = []
|
57
|
+
end
|
58
|
+
|
59
|
+
def add(model, options={})
|
60
|
+
@models << model
|
61
|
+
|
62
|
+
filename_suffix = @models.count(model) - 1
|
63
|
+
|
64
|
+
options[:path] ||= table_name(model)
|
65
|
+
options[:filename] ||= file_name(model)
|
66
|
+
options[:primary_column] ||= 'id' if model.new.respond_to?('id')
|
67
|
+
options[:partial_update] = @options[:partial_update] && options[:partial_update] != false
|
68
|
+
|
69
|
+
options[:filename] << "_#{filename_suffix}" unless filename_suffix == 0
|
70
|
+
|
71
|
+
@sources << [model, options.dup]
|
72
|
+
|
73
|
+
self
|
74
|
+
end
|
75
|
+
|
76
|
+
def add_static(url, time = nil, frequency = nil, priority = nil)
|
77
|
+
@static_pages ||= []
|
78
|
+
@static_pages << [url, time, frequency, priority]
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
def with_lock
|
83
|
+
lock!
|
84
|
+
begin
|
85
|
+
yield
|
86
|
+
ensure
|
87
|
+
unlock!
|
88
|
+
end
|
89
|
+
rescue Errno::EACCES => e
|
90
|
+
STDERR.puts 'Lockfile exists' if $VERBOSE
|
91
|
+
end
|
92
|
+
|
93
|
+
def table_name(model)
|
94
|
+
model.table_name
|
95
|
+
end
|
96
|
+
|
97
|
+
def file_name(name)
|
98
|
+
name = table_name(name) unless name.is_a? String
|
99
|
+
File.join(@options[:document_full], "sitemap_#{name}")
|
100
|
+
end
|
101
|
+
|
102
|
+
def dir_files
|
103
|
+
File.join(@options[:document_full], "sitemap_*.{xml,xml.gz}")
|
104
|
+
end
|
105
|
+
|
106
|
+
def clean
|
107
|
+
Dir[dir_files].each do |file|
|
108
|
+
FileUtils.rm file
|
109
|
+
end
|
110
|
+
self
|
111
|
+
end
|
112
|
+
|
113
|
+
def generate
|
114
|
+
prepare_update
|
115
|
+
|
116
|
+
generate_models
|
117
|
+
generate_static
|
118
|
+
generate_sitemap_index
|
119
|
+
self
|
120
|
+
end
|
121
|
+
|
122
|
+
def generate_models
|
123
|
+
for model, options in @sources
|
124
|
+
with_sitemap(model, options.dup) do |sitemap|
|
125
|
+
last_id = nil #id of last processed item
|
126
|
+
count_method = pick_method(model, COUNT_METHODS)
|
127
|
+
find_method = pick_method(model, FIND_METHODS)
|
128
|
+
raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil?
|
129
|
+
raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil?
|
130
|
+
|
131
|
+
find_options = {}
|
132
|
+
[:conditions, :limit, :joins, :select, :order, :include, :group].each do |key|
|
133
|
+
find_options[key] = options.delete(key)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Keep the initial conditions for later use
|
137
|
+
conditions = find_options[:conditions]
|
138
|
+
|
139
|
+
primary_column = options.delete(:primary_column)
|
140
|
+
|
141
|
+
count = model.send(count_method, find_options.merge(:select => (primary_column || '*'), :include => nil))
|
142
|
+
count = find_options[:limit].to_i if find_options[:limit] && find_options[:limit].to_i < count
|
143
|
+
num_sitemaps = 1
|
144
|
+
num_batches = 1
|
145
|
+
|
146
|
+
if count > @options[:batch_size]
|
147
|
+
num_batches = (count.to_f / @options[:batch_size].to_f).ceil
|
148
|
+
num_sitemaps = (count.to_f / @options[:max_per_sitemap].to_f).ceil
|
149
|
+
end
|
150
|
+
batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f
|
151
|
+
|
152
|
+
for sitemap_num in 1..num_sitemaps
|
153
|
+
# Work out the start and end batch numbers for this sitemap
|
154
|
+
batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i
|
155
|
+
batch_num_end = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1
|
156
|
+
|
157
|
+
for batch_num in batch_num_start..batch_num_end
|
158
|
+
offset = (batch_num - 1) * @options[:batch_size]
|
159
|
+
limit = (count - offset) < @options[:batch_size] ? (count - offset) : @options[:batch_size]
|
160
|
+
find_options.update(:limit => limit, :offset => offset) if num_batches > 1
|
161
|
+
|
162
|
+
if last_id && primary_column
|
163
|
+
find_options.update(:limit => limit, :offset => nil)
|
164
|
+
primary_column_value = escape_if_string last_id #escape '
|
165
|
+
find_options[:conditions] = [conditions, "(#{primary_column} > #{primary_column_value})"].compact.join(' AND ')
|
166
|
+
end
|
167
|
+
|
168
|
+
model.send(find_method, find_options).each do |record|
|
169
|
+
last_mod = options[:last_modified]
|
170
|
+
if last_mod.is_a?(Proc)
|
171
|
+
last_mod = last_mod.call(record)
|
172
|
+
elsif last_mod.nil?
|
173
|
+
last_mod_method = pick_method(record, TIMESTAMP_METHODS)
|
174
|
+
last_mod = last_mod_method.nil? ? Time.now : record.send(last_mod_method)
|
175
|
+
end
|
176
|
+
|
177
|
+
param_method = pick_method(record, PARAM_METHODS)
|
178
|
+
|
179
|
+
location =
|
180
|
+
if options[:location].is_a?(Proc)
|
181
|
+
options[:location].call(record)
|
182
|
+
else
|
183
|
+
File.join @options[:base_url], options[:path], record.send(param_method).to_s
|
184
|
+
end
|
185
|
+
|
186
|
+
change_frequency = options[:change_frequency] || 'weekly'
|
187
|
+
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
|
188
|
+
|
189
|
+
priority = options[:priority]
|
190
|
+
pri = priority.is_a?(Proc) ? priority.call(record) : priority
|
191
|
+
|
192
|
+
last_id = primary_column ? record.send(primary_column) : nil
|
193
|
+
sitemap.add_url!(location, last_mod, freq, pri, last_id)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
self
|
200
|
+
end
|
201
|
+
|
202
|
+
def generate_static
|
203
|
+
return self if Array(@static_pages).empty?
|
204
|
+
with_sitemap('static', :type => 'static') do |sitemap|
|
205
|
+
@static_pages.each do |location, last_mod, freq, pri|
|
206
|
+
sitemap.add_url!(location, last_mod, freq, pri)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
self
|
210
|
+
end
|
211
|
+
|
212
|
+
# Create a sitemap index document
|
213
|
+
def generate_sitemap_index(files = nil)
|
214
|
+
files ||= Dir[dir_files]
|
215
|
+
with_sitemap 'index', :type => 'index' do |sitemap|
|
216
|
+
for path in files
|
217
|
+
next if path =~ /index/
|
218
|
+
sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
self
|
222
|
+
end
|
223
|
+
|
224
|
+
def ping_search_engines(index_file)
|
225
|
+
require 'net/http'
|
226
|
+
require 'cgi'
|
227
|
+
|
228
|
+
index_file ||= @sitemap_files.last
|
229
|
+
sitemap_uri = CGI::escape(url_for_sitemap(index_file))
|
230
|
+
|
231
|
+
if @options[:ping_google]
|
232
|
+
Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}")
|
233
|
+
end
|
234
|
+
|
235
|
+
if @options[:ping_yahoo]
|
236
|
+
if @options[:yahoo_app_id]
|
237
|
+
Net::HTTP.get(
|
238
|
+
'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" +
|
239
|
+
"appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
|
240
|
+
)
|
241
|
+
else
|
242
|
+
STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
if @options[:ping_bing]
|
247
|
+
Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}")
|
248
|
+
end
|
249
|
+
|
250
|
+
if @options[:ping_ask]
|
251
|
+
Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
def prepare_update
|
258
|
+
@files_to_move = []
|
259
|
+
@sources.each do |model, options|
|
260
|
+
if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename]))
|
261
|
+
primary_column_value = escape_if_string last_id #escape '
|
262
|
+
options[:conditions] = [options[:conditions], "(#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
|
263
|
+
options[:start_part_id] = last_id
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def lock!(lock_file = 'generator.lock')
|
269
|
+
lock_file = File.join(@options[:document_full], lock_file)
|
270
|
+
File.open(lock_file, 'w', File::EXCL)
|
271
|
+
end
|
272
|
+
|
273
|
+
def unlock!(lock_file = 'generator.lock')
|
274
|
+
lock_file = File.join(@options[:document_full], lock_file)
|
275
|
+
FileUtils.rm lock_file
|
276
|
+
end
|
277
|
+
|
278
|
+
def with_sitemap(name, options={})
|
279
|
+
options[:filename] ||= file_name(name)
|
280
|
+
options[:type] ||= 'sitemap'
|
281
|
+
options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym]
|
282
|
+
options[:gzip] ||= @options[:gzip]
|
283
|
+
options[:indent] = options[:gzip] ? 0 : 2
|
284
|
+
|
285
|
+
sitemap = if options[:type] == 'index'
|
286
|
+
IndexBuilder.new(options)
|
287
|
+
elsif options[:geo]
|
288
|
+
options[:filename] << '_kml'
|
289
|
+
GeoBuilder.new(options)
|
290
|
+
elsif options[:mobile]
|
291
|
+
MobileBuilder.new(options)
|
292
|
+
else
|
293
|
+
Builder.new(options)
|
294
|
+
end
|
295
|
+
|
296
|
+
begin
|
297
|
+
yield sitemap
|
298
|
+
ensure
|
299
|
+
sitemap.close!
|
300
|
+
@sitemap_files.concat sitemap.paths!
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def get_last_id(filename)
|
305
|
+
Dir["#{filename}*.{xml,xml.gz}"].map do |file|
|
306
|
+
file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
|
307
|
+
end.sort.last
|
308
|
+
end
|
309
|
+
|
310
|
+
def pick_method(model, candidates)
|
311
|
+
method = nil
|
312
|
+
candidates.each do |candidate|
|
313
|
+
if model.respond_to? candidate
|
314
|
+
method = candidate
|
315
|
+
break
|
316
|
+
end
|
317
|
+
end
|
318
|
+
method
|
319
|
+
end
|
320
|
+
|
321
|
+
def escape_if_string(value)
|
322
|
+
(value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'"
|
323
|
+
end
|
324
|
+
|
325
|
+
def url_for_sitemap(path)
|
326
|
+
File.join @options[:url_path], File.basename(path)
|
327
|
+
end
|
328
|
+
|
329
|
+
end
|
330
|
+
|
331
|
+
|
332
|
+
class BigSitemapRails < BigSitemap
|
333
|
+
|
334
|
+
if defined?(Rails) && Rails.version < "3"
|
335
|
+
include ActionController::UrlWriter
|
336
|
+
end
|
337
|
+
|
338
|
+
def initialize(options={})
|
339
|
+
raise "No Rails Environment loaded" unless defined? Rails
|
340
|
+
require 'action_controller'
|
341
|
+
|
342
|
+
if Rails.version >= "3"
|
343
|
+
self.class.send(:include, Rails.application.routes.url_helpers)
|
344
|
+
end
|
345
|
+
|
346
|
+
DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options)
|
347
|
+
super(options)
|
348
|
+
end
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
|
353
|
+
class BigSitemapMerb < BigSitemap
|
354
|
+
|
355
|
+
def initialize(options={})
|
356
|
+
raise "No Merb Environment loaded" unless defined? Merb
|
357
|
+
require 'extlib'
|
358
|
+
|
359
|
+
DEFAULTS.merge!(:document_root => "#{Merb.root}/public")
|
360
|
+
super(options)
|
361
|
+
end
|
362
|
+
|
363
|
+
def table_name(model)
|
364
|
+
Extlib::Inflection.tableize(model.to_s)
|
365
|
+
end
|
366
|
+
|
367
|
+
end
|