sitemap_generator_ftbpro 5.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +35 -0
- data/MIT-LICENSE +20 -0
- data/README.md +1139 -0
- data/Rakefile +43 -0
- data/VERSION +1 -0
- data/lib/capistrano/sitemap_generator.rb +1 -0
- data/lib/capistrano/tasks/sitemap_generator.cap +36 -0
- data/lib/sitemap_generator/adapters/file_adapter.rb +43 -0
- data/lib/sitemap_generator/adapters/fog_adapter.rb +28 -0
- data/lib/sitemap_generator/adapters/s3_adapter.rb +41 -0
- data/lib/sitemap_generator/adapters/wave_adapter.rb +21 -0
- data/lib/sitemap_generator/adapters.rb +0 -0
- data/lib/sitemap_generator/application.rb +49 -0
- data/lib/sitemap_generator/builder/sitemap_file.rb +171 -0
- data/lib/sitemap_generator/builder/sitemap_index_file.rb +149 -0
- data/lib/sitemap_generator/builder/sitemap_index_url.rb +28 -0
- data/lib/sitemap_generator/builder/sitemap_url.rb +250 -0
- data/lib/sitemap_generator/builder.rb +8 -0
- data/lib/sitemap_generator/core_ext/big_decimal.rb +45 -0
- data/lib/sitemap_generator/core_ext/numeric.rb +48 -0
- data/lib/sitemap_generator/core_ext.rb +3 -0
- data/lib/sitemap_generator/helpers/number_helper.rb +237 -0
- data/lib/sitemap_generator/interpreter.rb +80 -0
- data/lib/sitemap_generator/link_set.rb +665 -0
- data/lib/sitemap_generator/railtie.rb +7 -0
- data/lib/sitemap_generator/sitemap_location.rb +192 -0
- data/lib/sitemap_generator/sitemap_namer.rb +75 -0
- data/lib/sitemap_generator/tasks.rb +53 -0
- data/lib/sitemap_generator/templates.rb +41 -0
- data/lib/sitemap_generator/utilities.rb +181 -0
- data/lib/sitemap_generator.rb +82 -0
- data/lib/tasks/sitemap_generator_tasks.rake +1 -0
- data/rails/install.rb +2 -0
- data/rails/uninstall.rb +2 -0
- data/spec/blueprint.rb +15 -0
- data/spec/files/sitemap.create.rb +12 -0
- data/spec/files/sitemap.groups.rb +49 -0
- data/spec/sitemap_generator/adapters/s3_adapter_spec.rb +23 -0
- data/spec/sitemap_generator/alternate_sitemap_spec.rb +79 -0
- data/spec/sitemap_generator/application_spec.rb +69 -0
- data/spec/sitemap_generator/builder/sitemap_file_spec.rb +110 -0
- data/spec/sitemap_generator/builder/sitemap_index_file_spec.rb +124 -0
- data/spec/sitemap_generator/builder/sitemap_index_url_spec.rb +28 -0
- data/spec/sitemap_generator/builder/sitemap_url_spec.rb +186 -0
- data/spec/sitemap_generator/core_ext/bigdecimal_spec.rb +20 -0
- data/spec/sitemap_generator/core_ext/numeric_spec.rb +43 -0
- data/spec/sitemap_generator/file_adaptor_spec.rb +20 -0
- data/spec/sitemap_generator/geo_sitemap_spec.rb +30 -0
- data/spec/sitemap_generator/helpers/number_helper_spec.rb +196 -0
- data/spec/sitemap_generator/interpreter_spec.rb +90 -0
- data/spec/sitemap_generator/link_set_spec.rb +864 -0
- data/spec/sitemap_generator/mobile_sitemap_spec.rb +27 -0
- data/spec/sitemap_generator/news_sitemap_spec.rb +42 -0
- data/spec/sitemap_generator/pagemap_sitemap_spec.rb +57 -0
- data/spec/sitemap_generator/sitemap_generator_spec.rb +582 -0
- data/spec/sitemap_generator/sitemap_groups_spec.rb +144 -0
- data/spec/sitemap_generator/sitemap_location_spec.rb +210 -0
- data/spec/sitemap_generator/sitemap_namer_spec.rb +96 -0
- data/spec/sitemap_generator/templates_spec.rb +24 -0
- data/spec/sitemap_generator/utilities/existence_spec.rb +26 -0
- data/spec/sitemap_generator/utilities/hash_spec.rb +57 -0
- data/spec/sitemap_generator/utilities/rounding_spec.rb +31 -0
- data/spec/sitemap_generator/utilities_spec.rb +101 -0
- data/spec/sitemap_generator/video_sitemap_spec.rb +117 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/file_macros.rb +39 -0
- data/spec/support/schemas/siteindex.xsd +73 -0
- data/spec/support/schemas/sitemap-geo.xsd +41 -0
- data/spec/support/schemas/sitemap-mobile.xsd +32 -0
- data/spec/support/schemas/sitemap-news.xsd +159 -0
- data/spec/support/schemas/sitemap-pagemap.xsd +97 -0
- data/spec/support/schemas/sitemap-video.xsd +643 -0
- data/spec/support/schemas/sitemap.xsd +115 -0
- data/spec/support/xml_macros.rb +67 -0
- data/templates/sitemap.rb +27 -0
- metadata +226 -0
@@ -0,0 +1,665 @@
|
|
1
|
+
require 'builder'
|
2
|
+
|
3
|
+
# A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
|
4
|
+
# which lists all the sitemap files written.
|
5
|
+
module SitemapGenerator
|
6
|
+
class LinkSet
|
7
|
+
@@requires_finalization_opts = [:filename, :sitemaps_path, :sitemaps_host, :namer]
|
8
|
+
@@new_location_opts = [:filename, :sitemaps_path, :namer]
|
9
|
+
|
10
|
+
attr_reader :default_host, :sitemaps_path, :filename, :create_index
|
11
|
+
attr_accessor :include_root, :include_index, :adapter, :yield_sitemap
|
12
|
+
attr_writer :verbose
|
13
|
+
|
14
|
+
# Create a new sitemap index and sitemap files. Pass a block with calls to the following
|
15
|
+
# methods:
|
16
|
+
# * +add+ - Add a link to the current sitemap
|
17
|
+
# * +group+ - Start a new group of sitemaps
|
18
|
+
#
|
19
|
+
# == Options
|
20
|
+
#
|
21
|
+
# Any option supported by +new+ can be passed. The options will be
|
22
|
+
# set on the instance using the accessor methods. This is provided mostly
|
23
|
+
# as a convenience.
|
24
|
+
#
|
25
|
+
# In addition to the options to +new+, the following options are supported:
|
26
|
+
# * <tt>:finalize</tt> - The sitemaps are written as they get full and at the end
|
27
|
+
# of the block. Pass +false+ as the value to prevent the sitemap or sitemap index
|
28
|
+
# from being finalized. Default is +true+.
|
29
|
+
#
|
30
|
+
# If you are calling +create+ more than once in your sitemap configuration file,
|
31
|
+
# make sure that you set a different +sitemaps_path+ or +filename+ for each call otherwise
|
32
|
+
# the sitemaps may be overwritten.
|
33
|
+
def create(opts={}, &block)
|
34
|
+
reset!
|
35
|
+
exclude_keys = opts.delete(:exclude_keys) || []
|
36
|
+
@schemas= SitemapGenerator::SCHEMAS.reject{ |k, v| exclude_keys.include? k }
|
37
|
+
set_options(opts)
|
38
|
+
if verbose
|
39
|
+
start_time = Time.now
|
40
|
+
puts "In '#{sitemap_index.location.public_path}':"
|
41
|
+
end
|
42
|
+
interpreter.eval(:yield_sitemap => yield_sitemap?, &block)
|
43
|
+
finalize!
|
44
|
+
end_time = Time.now if verbose
|
45
|
+
output(sitemap_index.stats_summary(:time_taken => end_time - start_time)) if verbose
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# Constructor
|
50
|
+
#
|
51
|
+
# == Options:
|
52
|
+
# * <tt>:adapter</tt> - instance of a class with a write method which takes a SitemapGenerator::Location
|
53
|
+
# and raw XML data and persists it. The default adapter is a SitemapGenerator::FileAdapter
|
54
|
+
# which simply writes files to the filesystem. You can use a SitemapGenerator::WaveAdapter
|
55
|
+
# for uploading sitemaps to remote servers - useful for read-only hosts such as Heroku. Or
|
56
|
+
# you can provide an instance of your own class to provide custom behavior.
|
57
|
+
#
|
58
|
+
# * <tt>:default_host</tt> - host including protocol to use in all sitemap links
|
59
|
+
# e.g. http://en.google.ca
|
60
|
+
#
|
61
|
+
# * <tt>:public_path</tt> - Full or relative path to the directory to write sitemaps into.
|
62
|
+
# Defaults to the <tt>public/</tt> directory in your application root directory or
|
63
|
+
# the current working directory.
|
64
|
+
#
|
65
|
+
# * <tt>:sitemaps_host</tt> - String. <b>Host including protocol</b> to use when generating
|
66
|
+
# a link to a sitemap file i.e. the hostname of the server where the sitemaps are hosted.
|
67
|
+
# The value will differ from the hostname in your sitemap links.
|
68
|
+
# For example: `'http://amazon.aws.com/'`.
|
69
|
+
#
|
70
|
+
# Note that `include_index` is automatically turned off when the `sitemaps_host` does
|
71
|
+
# not match `default_host`. Because the link to the sitemap index file that would
|
72
|
+
# otherwise be added would point to a different host than the rest of the links in
|
73
|
+
# the sitemap. Something that the sitemap rules forbid.
|
74
|
+
#
|
75
|
+
# * <tt>:sitemaps_path</tt> - path fragment within public to write sitemaps
|
76
|
+
# to e.g. 'en/'. Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
|
77
|
+
#
|
78
|
+
# * <tt>:filename</tt> - symbol giving the base name for files (default <tt>:sitemap</tt>).
|
79
|
+
# The names are generated like "#{filename}.xml.gz", "#{filename}1.xml.gz", "#{filename}2.xml.gz"
|
80
|
+
# with the first file being the index if you have more than one sitemap file.
|
81
|
+
#
|
82
|
+
# * <tt>:include_index</tt> - Boolean. Whether to <b>add a link pointing to the sitemap index<b>
|
83
|
+
# to the current sitemap. This points search engines to your Sitemap Index to
|
84
|
+
# include it in the indexing of your site. Default is `false`. Turned off when
|
85
|
+
# `sitemaps_host` is set or within a `group()` block. Turned off because Google can complain
|
86
|
+
# about nested indexing and because if a robot is already reading your sitemap, they
|
87
|
+
# probably know about the index.
|
88
|
+
#
|
89
|
+
# * <tt>:include_root</tt> - Boolean. Whether to **add the root** url i.e. '/' to the
|
90
|
+
# current sitemap. Default is `true`. Turned off within a `group()` block.
|
91
|
+
#
|
92
|
+
# * <tt>:search_engines</tt> - Hash. A hash of search engine names mapped to
|
93
|
+
# ping URLs. See ping_search_engines.
|
94
|
+
#
|
95
|
+
# * <tt>:verbose</tt> - If +true+, output a summary line for each sitemap and sitemap
|
96
|
+
# index that is created. Default is +false+.
|
97
|
+
#
|
98
|
+
# * <tt>:create_index</tt> - Supported values: `true`, `false`, `:auto`. Default: `:auto`.
|
99
|
+
# Whether to create a sitemap index file. If `true` an index file is always created,
|
100
|
+
# regardless of how many links are in your sitemap. If `false` an index file is never
|
101
|
+
# created. If `:auto` an index file is created only if your sitemap has more than
|
102
|
+
# one sitemap file.
|
103
|
+
#
|
104
|
+
# * <tt>:namer</tt> - A <tt>SitemapGenerator::SimpleNamer</tt> instance for generating the sitemap
|
105
|
+
# and index file names. See <tt>:filename</tt> if you don't need to do anything fancy, and can
|
106
|
+
# accept the default naming conventions.
|
107
|
+
#
|
108
|
+
# * <tt>:compress</tt> - Specifies which files to compress with gzip. Default is `true`. Accepted values:
|
109
|
+
# * `true` - Boolean; compress all files.
|
110
|
+
# * `false` - Boolean; write out only uncompressed files.
|
111
|
+
# * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files.
|
112
|
+
#
|
113
|
+
# The compression setting applies to groups too. So :all_but_first will have the same effect (the first
|
114
|
+
# file in the group will not be compressed, the rest will). So if you require different behaviour for your
|
115
|
+
# groups, pass in a `:compress` option e.g. <tt>group(:compress => false) { add('/link') }</tt>
|
116
|
+
#
|
117
|
+
# KJV: When adding a new option be sure to include it in `options_for_group()` if
|
118
|
+
# the option should be inherited by groups.
|
119
|
+
def initialize(options={})
|
120
|
+
options = SitemapGenerator::Utilities.reverse_merge(options,
|
121
|
+
:include_root => true,
|
122
|
+
:include_index => false,
|
123
|
+
:filename => :sitemap,
|
124
|
+
:search_engines => {
|
125
|
+
:google => "http://www.google.com/webmasters/tools/ping?sitemap=%s",
|
126
|
+
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=%s"
|
127
|
+
},
|
128
|
+
:create_index => :auto,
|
129
|
+
:compress => true
|
130
|
+
)
|
131
|
+
options.each_pair { |k, v| instance_variable_set("@#{k}".to_sym, v) }
|
132
|
+
|
133
|
+
# If an index is passed in, protect it from modification.
|
134
|
+
# Sitemaps can be added to the index but nothing else can be changed.
|
135
|
+
if options[:sitemap_index]
|
136
|
+
@protect_index = true
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Add a link to a Sitemap. If a new Sitemap is required, one will be created for
|
141
|
+
# you.
|
142
|
+
#
|
143
|
+
# link - string link e.g. '/merchant', '/article/1' or whatever.
|
144
|
+
# options - see README.
|
145
|
+
# host - host for the link, defaults to your <tt>default_host</tt>.
|
146
|
+
def add(link, options={})
|
147
|
+
add_default_links if !@added_default_links
|
148
|
+
sitemap.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => @default_host))
|
149
|
+
rescue SitemapGenerator::SitemapFullError
|
150
|
+
finalize_sitemap!
|
151
|
+
retry
|
152
|
+
rescue SitemapGenerator::SitemapFinalizedError
|
153
|
+
@sitemap = sitemap.new
|
154
|
+
retry
|
155
|
+
end
|
156
|
+
|
157
|
+
# Add a link to the Sitemap Index.
|
158
|
+
# * link - A string link e.g. '/sitemaps/sitemap1.xml.gz' or a SitemapFile instance.
|
159
|
+
# * options - A hash of options including `:lastmod`, ':priority`, ':changefreq` and `:host`
|
160
|
+
#
|
161
|
+
# The `:host` option defaults to the value of `sitemaps_host` which is the host where your
|
162
|
+
# sitemaps reside. If no `sitemaps_host` is set, the `default_host` is used.
|
163
|
+
def add_to_index(link, options={})
|
164
|
+
sitemap_index.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => sitemaps_host))
|
165
|
+
end
|
166
|
+
|
167
|
+
# Create a new group of sitemap files.
|
168
|
+
#
|
169
|
+
# Returns a new LinkSet instance with the options passed in set on it. All groups
|
170
|
+
# share the sitemap index, which is not affected by any of the options passed here.
|
171
|
+
#
|
172
|
+
# === Options
|
173
|
+
# Any of the options to LinkSet.new. Except for <tt>:public_path</tt> which is shared
|
174
|
+
# by all groups.
|
175
|
+
#
|
176
|
+
# The current options are inherited by the new group of sitemaps. The only exceptions
|
177
|
+
# being <tt>:include_index</tt> and <tt>:include_root</tt> which default to +false+.
|
178
|
+
#
|
179
|
+
# Pass a block to add links to the new LinkSet. If you pass a block the sitemaps will
|
180
|
+
# be finalized when the block returns.
|
181
|
+
#
|
182
|
+
# If you are not changing any of the location settings like <tt>filename<tt>,
|
183
|
+
# <tt>sitemaps_path</tt>, <tt>sitemaps_host</tt> or <tt>namer</tt>,
|
184
|
+
# links you add within the group will be added to the current sitemap.
|
185
|
+
# Otherwise the current sitemap file is finalized and a new sitemap file started,
|
186
|
+
# using the options you specified.
|
187
|
+
#
|
188
|
+
# Most commonly, you'll want to give the group's files a distinct name using
|
189
|
+
# the <tt>filename</tt> option.
|
190
|
+
#
|
191
|
+
# Options like <tt>:default_host</tt> can be used and it will only affect the links
|
192
|
+
# within the group. Links added outside of the group will revert to the previous
|
193
|
+
# +default_host+.
|
194
|
+
def group(opts={}, &block)
|
195
|
+
@created_group = true
|
196
|
+
original_opts = opts.dup
|
197
|
+
|
198
|
+
if (@@requires_finalization_opts & original_opts.keys).empty?
|
199
|
+
# If no new filename or path is specified reuse the default sitemap file.
|
200
|
+
# A new location object will be set on it for the duration of the group.
|
201
|
+
original_opts[:sitemap] = sitemap
|
202
|
+
elsif original_opts.key?(:sitemaps_host) && (@@new_location_opts & original_opts.keys).empty?
|
203
|
+
# If no location options are provided we are creating the next sitemap in the
|
204
|
+
# current series, so finalize and inherit the namer.
|
205
|
+
finalize_sitemap!
|
206
|
+
original_opts[:namer] = namer
|
207
|
+
end
|
208
|
+
|
209
|
+
opts = options_for_group(original_opts)
|
210
|
+
@group = SitemapGenerator::LinkSet.new(opts)
|
211
|
+
if opts.key?(:sitemap)
|
212
|
+
# If the group is sharing the current sitemap, set the
|
213
|
+
# new location options on the location object.
|
214
|
+
@original_location = @sitemap.location.dup
|
215
|
+
@sitemap.location.merge!(@group.sitemap_location)
|
216
|
+
if block_given?
|
217
|
+
@group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
|
218
|
+
@sitemap.location.merge!(@original_location)
|
219
|
+
end
|
220
|
+
else
|
221
|
+
# Handle the case where a user only has one group, and it's being written
|
222
|
+
# to a new sitemap file. They would expect there to be an index. So force
|
223
|
+
# index creation. If there is more than one group, we would have an index anyways,
|
224
|
+
# so it's safe to force index creation in these other cases. In the case that
|
225
|
+
# the groups reuse the current sitemap, don't force index creation because
|
226
|
+
# we want the default behaviour i.e. only an index if more than one sitemap file.
|
227
|
+
# Don't force index creation if the user specifically requested no index. This
|
228
|
+
# unfortunately means that if they set it to :auto they may be getting an index
|
229
|
+
# when they didn't expect one, but you shouldn't be using groups if you only have
|
230
|
+
# one sitemap and don't want an index. Rather, just add the links directly in the create()
|
231
|
+
# block.
|
232
|
+
@group.send(:create_index=, true, true) if @group.create_index != false
|
233
|
+
|
234
|
+
if block_given?
|
235
|
+
@group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
|
236
|
+
@group.finalize_sitemap!
|
237
|
+
end
|
238
|
+
end
|
239
|
+
@group
|
240
|
+
end
|
241
|
+
|
242
|
+
# Ping search engines to notify them of updated sitemaps.
|
243
|
+
#
|
244
|
+
# Search engines are already notified for you if you run `rake sitemap:refresh`.
|
245
|
+
# If you want to ping search engines separately to your sitemap generation, run
|
246
|
+
# `rake sitemap:refresh:no_ping` and then run a rake task or script
|
247
|
+
# which calls this method as in the example below.
|
248
|
+
#
|
249
|
+
# == Arguments
|
250
|
+
# * sitemap_index_url - The full URL to your sitemap index file.
|
251
|
+
# If not provided the location is based on the `host` you have
|
252
|
+
# set and any other options like your `sitemaps_path`. The URL
|
253
|
+
# will be CGI escaped for you when included as part of the
|
254
|
+
# search engine ping URL.
|
255
|
+
#
|
256
|
+
# == Options
|
257
|
+
# A hash of one or more search engines to ping in addition to the
|
258
|
+
# default search engines. The key is the name of the search engine
|
259
|
+
# as a string or symbol and the value is the full URL to ping with
|
260
|
+
# a string interpolation that will be replaced by the CGI escaped sitemap
|
261
|
+
# index URL. If you have any literal percent characters in your URL you
|
262
|
+
# need to escape them with `%%`. For example if your sitemap index URL
|
263
|
+
# is `http://example.com/sitemap.xml.gz` and your
|
264
|
+
# ping url is `http://example.com/100%%/ping?url=%s`
|
265
|
+
# then the final URL that is pinged will be `http://example.com/100%/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz`
|
266
|
+
#
|
267
|
+
# == Examples
|
268
|
+
#
|
269
|
+
# Both of these examples will ping the default search engines in addition to `http://superengine.com/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz`
|
270
|
+
#
|
271
|
+
# SitemapGenerator::Sitemap.host('http://example.com/')
|
272
|
+
# SitemapGenerator::Sitemap.ping_search_engines(:super_engine => 'http://superengine.com/ping?url=%s')
|
273
|
+
#
|
274
|
+
# Is equivalent to:
|
275
|
+
#
|
276
|
+
# SitemapGenerator::Sitemap.ping_search_engines('http://example.com/sitemap.xml.gz', :super_engine => 'http://superengine.com/ping?url=%s')
|
277
|
+
def ping_search_engines(*args)
|
278
|
+
require 'cgi/session'
|
279
|
+
require 'open-uri'
|
280
|
+
require 'timeout'
|
281
|
+
|
282
|
+
engines = args.last.is_a?(Hash) ? args.pop : {}
|
283
|
+
unescaped_url = args.shift || sitemap_index_url
|
284
|
+
index_url = CGI.escape(unescaped_url)
|
285
|
+
|
286
|
+
output("\n")
|
287
|
+
output("Pinging with URL '#{unescaped_url}':")
|
288
|
+
search_engines.merge(engines).each do |engine, link|
|
289
|
+
link = link % index_url
|
290
|
+
name = Utilities.titleize(engine.to_s)
|
291
|
+
begin
|
292
|
+
Timeout::timeout(10) {
|
293
|
+
open(link)
|
294
|
+
}
|
295
|
+
output(" Successful ping of #{name}")
|
296
|
+
rescue Timeout::Error, StandardError => e
|
297
|
+
output("Ping failed for #{name}: #{e.inspect} (URL #{link})")
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Return a count of the total number of links in all sitemaps
|
303
|
+
def link_count
|
304
|
+
sitemap_index.total_link_count
|
305
|
+
end
|
306
|
+
|
307
|
+
# Return the host to use in links to the sitemap files. This defaults to your
|
308
|
+
# +default_host+.
|
309
|
+
def sitemaps_host
|
310
|
+
@sitemaps_host || @default_host
|
311
|
+
end
|
312
|
+
|
313
|
+
# Lazy-initialize a sitemap instance and return it.
|
314
|
+
def sitemap
|
315
|
+
@sitemap ||= SitemapGenerator::Builder::SitemapFile.new(sitemap_location,
|
316
|
+
schemas)
|
317
|
+
end
|
318
|
+
|
319
|
+
# Lazy-initialize a sitemap index instance and return it.
|
320
|
+
def sitemap_index
|
321
|
+
@sitemap_index ||= SitemapGenerator::Builder::SitemapIndexFile.new(sitemap_index_location)
|
322
|
+
end
|
323
|
+
|
324
|
+
# Return the full url to the sitemap index file. When `create_index` is `false`
|
325
|
+
# the first sitemap is technically the index, so this will be its URL. It's important
|
326
|
+
# to use this method to get the index url because `sitemap_index.location.url` will
|
327
|
+
# not be correct in such situations.
|
328
|
+
#
|
329
|
+
# KJV: This is somewhat confusing.
|
330
|
+
def sitemap_index_url
|
331
|
+
sitemap_index.index_url
|
332
|
+
end
|
333
|
+
|
334
|
+
# All done. Write out remaining files.
|
335
|
+
def finalize!
|
336
|
+
finalize_sitemap!
|
337
|
+
finalize_sitemap_index!
|
338
|
+
end
|
339
|
+
|
340
|
+
# Return a boolean indicating hether to add a link to the sitemap index file
|
341
|
+
# to the current sitemap. This points search engines to your Sitemap Index so
|
342
|
+
# they include it in the indexing of your site, but is not strictly neccessary.
|
343
|
+
# Default is `true`. Turned off when `sitemaps_host` is set or within a `group()` block.
|
344
|
+
def include_index?
|
345
|
+
if default_host && sitemaps_host && sitemaps_host != default_host
|
346
|
+
false
|
347
|
+
else
|
348
|
+
@include_index
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
# Return a boolean indicating whether to automatically add the root url i.e. '/' to the
|
353
|
+
# current sitemap. Default is `true`. Turned off within a `group()` block.
|
354
|
+
def include_root?
|
355
|
+
!!@include_root
|
356
|
+
end
|
357
|
+
|
358
|
+
# Set verbose on the instance or by setting ENV['VERBOSE'] to true or false.
|
359
|
+
# By default verbose is true. When running rake tasks, pass the <tt>-s</tt>
|
360
|
+
# option to rake to turn verbose off.
|
361
|
+
def verbose
|
362
|
+
if @verbose.nil?
|
363
|
+
@verbose = SitemapGenerator.verbose.nil? ? true : SitemapGenerator.verbose
|
364
|
+
end
|
365
|
+
@verbose
|
366
|
+
end
|
367
|
+
|
368
|
+
def schemas
|
369
|
+
@schemas || SCHEMAS
|
370
|
+
end
|
371
|
+
|
372
|
+
# Return a boolean indicating whether or not to yield the sitemap.
|
373
|
+
def yield_sitemap?
|
374
|
+
@yield_sitemap.nil? ? SitemapGenerator.yield_sitemap? : !!@yield_sitemap
|
375
|
+
end
|
376
|
+
|
377
|
+
protected
|
378
|
+
|
379
|
+
# Set each option on this instance using accessor methods. This will affect
|
380
|
+
# both the sitemap and the sitemap index.
|
381
|
+
#
|
382
|
+
# If both `filename` and `namer` are passed, set filename first so it
|
383
|
+
# doesn't override the latter.
|
384
|
+
def set_options(opts={})
|
385
|
+
opts = opts.dup
|
386
|
+
%w(filename namer).each do |key|
|
387
|
+
if value = opts.delete(key.to_sym)
|
388
|
+
send("#{key}=", value)
|
389
|
+
end
|
390
|
+
end
|
391
|
+
opts.each_pair do |key, value|
|
392
|
+
send("#{key}=", value)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
# Given +opts+, modify it and return it prepped for creating a new group from this LinkSet.
|
397
|
+
# If <tt>:public_path</tt> is present in +opts+ it is removed because groups cannot
|
398
|
+
# change the public path.
|
399
|
+
def options_for_group(opts)
|
400
|
+
opts = SitemapGenerator::Utilities.reverse_merge(opts,
|
401
|
+
:include_index => false,
|
402
|
+
:include_root => false,
|
403
|
+
:sitemap_index => sitemap_index
|
404
|
+
)
|
405
|
+
opts.delete(:public_path)
|
406
|
+
|
407
|
+
# Reverse merge the current settings
|
408
|
+
# KJV: This hash could be a problem because it needs to be maintained
|
409
|
+
# when new options are added, but can easily be missed. We really could
|
410
|
+
# do with a separate SitemapOptions class.
|
411
|
+
current_settings = [
|
412
|
+
:include_root,
|
413
|
+
:include_index,
|
414
|
+
:sitemaps_path,
|
415
|
+
:public_path,
|
416
|
+
:sitemaps_host,
|
417
|
+
:verbose,
|
418
|
+
:default_host,
|
419
|
+
:adapter,
|
420
|
+
:create_index,
|
421
|
+
:compress,
|
422
|
+
:schemas
|
423
|
+
].inject({}) do |hash, key|
|
424
|
+
if !(value = instance_variable_get(:"@#{key}")).nil?
|
425
|
+
hash[key] = value
|
426
|
+
end
|
427
|
+
hash
|
428
|
+
end
|
429
|
+
SitemapGenerator::Utilities.reverse_merge!(opts, current_settings)
|
430
|
+
opts
|
431
|
+
end
|
432
|
+
|
433
|
+
# Add default links if those options are turned on. Record the fact that we have done so
|
434
|
+
# in an instance variable.
|
435
|
+
def add_default_links
|
436
|
+
if include_root?
|
437
|
+
sitemap.add('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0, :host => @default_host)
|
438
|
+
end
|
439
|
+
if include_index?
|
440
|
+
sitemap.add(sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
441
|
+
end
|
442
|
+
@added_default_links = true
|
443
|
+
end
|
444
|
+
|
445
|
+
# Finalize a sitemap by including it in the index and outputting a summary line.
|
446
|
+
# Do nothing if it has already been finalized.
|
447
|
+
#
|
448
|
+
# Don't finalize if the sitemap is empty.
|
449
|
+
#
|
450
|
+
# Add the default links if they have not been added yet and no groups have been created.
|
451
|
+
# If the default links haven't been added we know that the sitemap is empty,
|
452
|
+
# because they are added on the first call to add(). This ensure that if the
|
453
|
+
# block passed to create() is empty the default links are still included in the
|
454
|
+
# sitemap.
|
455
|
+
def finalize_sitemap!
|
456
|
+
return if sitemap.finalized? || sitemap.empty? && @created_group
|
457
|
+
add_default_links if !@added_default_links && !@created_group
|
458
|
+
# This will finalize it. We add to the index even if not creating an index because
|
459
|
+
# the index keeps track of how many links are in our sitemaps and we need this info
|
460
|
+
# for the summary line. Also the index determines which file gets the first name
|
461
|
+
# so everything has to go via the index.
|
462
|
+
add_to_index(sitemap) unless sitemap.empty?
|
463
|
+
end
|
464
|
+
|
465
|
+
# Finalize a sitemap index and output a summary line. Do nothing if it has already
|
466
|
+
# been finalized.
|
467
|
+
def finalize_sitemap_index!
|
468
|
+
return if @protect_index || sitemap_index.finalized?
|
469
|
+
sitemap_index.finalize!
|
470
|
+
sitemap_index.write
|
471
|
+
end
|
472
|
+
|
473
|
+
# Return the interpreter linked to this instance.
|
474
|
+
def interpreter
|
475
|
+
require 'sitemap_generator/interpreter'
|
476
|
+
@interpreter ||= SitemapGenerator::Interpreter.new(:link_set => self)
|
477
|
+
end
|
478
|
+
|
479
|
+
# Reset this instance. Keep the same options, but return to the same state
|
480
|
+
# as before any sitemaps were created.
|
481
|
+
def reset!
|
482
|
+
@sitemap_index = nil if @sitemap_index && @sitemap_index.finalized? && !@protect_index
|
483
|
+
@sitemap = nil if @sitemap && @sitemap.finalized?
|
484
|
+
self.namer.reset
|
485
|
+
@added_default_links = false
|
486
|
+
end
|
487
|
+
|
488
|
+
# Write the given string to STDOUT. Used so that the sitemap config can be
|
489
|
+
# evaluated and some info output to STDOUT in a lazy fasion.
|
490
|
+
def output(string)
|
491
|
+
return unless verbose
|
492
|
+
puts string
|
493
|
+
end
|
494
|
+
|
495
|
+
module LocationHelpers
|
496
|
+
public
|
497
|
+
|
498
|
+
# Set the host name, including protocol, that will be used by default on each
|
499
|
+
# of your sitemap links. You can pass a different host in your options to `add`
|
500
|
+
# if you need to change it on a per-link basis.
|
501
|
+
def default_host=(value)
|
502
|
+
@default_host = value
|
503
|
+
update_location_info(:host, value)
|
504
|
+
end
|
505
|
+
|
506
|
+
# Set the public_path. This path gives the location of your public directory.
|
507
|
+
# The default is the public/ directory in your Rails root. Or if Rails is not
|
508
|
+
# found, it defaults to public/ in the current directory (of the process).
|
509
|
+
#
|
510
|
+
# Example: 'tmp/' if you don't want to generate in public for some reason.
|
511
|
+
#
|
512
|
+
# Set to nil to use the current directory.
|
513
|
+
def public_path=(value)
|
514
|
+
@public_path = Pathname.new(SitemapGenerator::Utilities.append_slash(value))
|
515
|
+
if @public_path.relative?
|
516
|
+
@public_path = SitemapGenerator.app.root + @public_path
|
517
|
+
end
|
518
|
+
update_location_info(:public_path, @public_path)
|
519
|
+
@public_path
|
520
|
+
end
|
521
|
+
|
522
|
+
# Return a Pathname with the full path to the public directory
|
523
|
+
def public_path
|
524
|
+
@public_path ||= self.send(:public_path=, 'public/')
|
525
|
+
end
|
526
|
+
|
527
|
+
# Set the sitemaps_path. This path gives the location to write sitemaps to
|
528
|
+
# relative to your public_path.
|
529
|
+
# Example: 'sitemaps/' to generate your sitemaps in 'public/sitemaps/'.
|
530
|
+
def sitemaps_path=(value)
|
531
|
+
@sitemaps_path = value
|
532
|
+
update_location_info(:sitemaps_path, value)
|
533
|
+
end
|
534
|
+
|
535
|
+
# Set the host name, including protocol, that will be used on all links to your sitemap
|
536
|
+
# files. Useful when the server that hosts the sitemaps is not on the same host as
|
537
|
+
# the links in the sitemap.
|
538
|
+
#
|
539
|
+
# Note that `include_index` will be turned off to avoid adding a link to a sitemap with
|
540
|
+
# a different host than the other links.
|
541
|
+
def sitemaps_host=(value)
|
542
|
+
@sitemaps_host = value
|
543
|
+
update_location_info(:host, value)
|
544
|
+
end
|
545
|
+
|
546
|
+
# Set the filename base to use when generating sitemaps (and the sitemap index).
|
547
|
+
#
|
548
|
+
# === Example
|
549
|
+
# <tt>filename = :sitemap</tt>
|
550
|
+
#
|
551
|
+
# === Generates
|
552
|
+
# <tt>sitemap.xml.gz, sitemap1.xml.gz, sitemap2.xml.gz, ...</tt>
|
553
|
+
def filename=(value)
|
554
|
+
@filename = value
|
555
|
+
self.namer = SitemapGenerator::SimpleNamer.new(@filename)
|
556
|
+
end
|
557
|
+
|
558
|
+
# Set the search engines hash to a new hash of search engine names mapped to
|
559
|
+
# ping URLs (see ping_search_engines). If the value is nil it is converted
|
560
|
+
# to an empty hash.
|
561
|
+
# === Example
|
562
|
+
# <tt>search_engines = { :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=%s" }</tt>
|
563
|
+
def search_engines=(value)
|
564
|
+
@search_engines = value || {}
|
565
|
+
end
|
566
|
+
|
567
|
+
# Return the hash of search engines.
|
568
|
+
def search_engines
|
569
|
+
@search_engines || {}
|
570
|
+
end
|
571
|
+
|
572
|
+
# Return a new +SitemapLocation+ instance with the current options included
|
573
|
+
def sitemap_location
|
574
|
+
SitemapGenerator::SitemapLocation.new(
|
575
|
+
:host => sitemaps_host,
|
576
|
+
:namer => namer,
|
577
|
+
:public_path => public_path,
|
578
|
+
:sitemaps_path => @sitemaps_path,
|
579
|
+
:adapter => @adapter,
|
580
|
+
:verbose => verbose,
|
581
|
+
:compress => @compress
|
582
|
+
)
|
583
|
+
end
|
584
|
+
|
585
|
+
# Return a new +SitemapIndexLocation+ instance with the current options included
|
586
|
+
def sitemap_index_location
|
587
|
+
SitemapGenerator::SitemapLocation.new(
|
588
|
+
:host => sitemaps_host,
|
589
|
+
:namer => namer,
|
590
|
+
:public_path => public_path,
|
591
|
+
:sitemaps_path => @sitemaps_path,
|
592
|
+
:adapter => @adapter,
|
593
|
+
:verbose => verbose,
|
594
|
+
:create_index => @create_index,
|
595
|
+
:compress => @compress
|
596
|
+
)
|
597
|
+
end
|
598
|
+
|
599
|
+
# Set the value of +create_index+ on the SitemapIndexLocation object of the
|
600
|
+
# SitemapIndexFile.
|
601
|
+
#
|
602
|
+
# Whether to create a sitemap index file. Supported values: `true`, `false`, `:auto`.
|
603
|
+
# If `true` an index file is always created, regardless of how many links
|
604
|
+
# are in your sitemap. If `false` an index file is never created.
|
605
|
+
# If `:auto` an index file is created only if your sitemap has more than
|
606
|
+
# one sitemap file.
|
607
|
+
def create_index=(value, force=false)
|
608
|
+
@create_index = value
|
609
|
+
# Allow overriding the protected status of the index when we are creating a group.
|
610
|
+
# Because sometimes we need to force an index in that case. But generally we don't
|
611
|
+
# want to allow people to mess with this value if the index is protected.
|
612
|
+
@sitemap_index.location[:create_index] = value if @sitemap_index && ((!@sitemap_index.finalized? && !@protect_index) || force)
|
613
|
+
end
|
614
|
+
|
615
|
+
# Set the namer to use to generate the sitemap (and index) file names.
|
616
|
+
# This should be an instance of <tt>SitemapGenerator::SimpleNamer</tt>
|
617
|
+
def namer=(value)
|
618
|
+
@namer = value
|
619
|
+
@sitemap.location[:namer] = value if @sitemap && !@sitemap.finalized?
|
620
|
+
@sitemap_index.location[:namer] = value if @sitemap_index && !@sitemap_index.finalized? && !@protect_index
|
621
|
+
end
|
622
|
+
|
623
|
+
# Return the namer object. If it is not set, looks for it on
|
624
|
+
# the current sitemap and if there is no sitemap, creates a new one using
|
625
|
+
# the current filename.
|
626
|
+
def namer
|
627
|
+
@namer ||= @sitemap && @sitemap.location.namer || SitemapGenerator::SimpleNamer.new(@filename)
|
628
|
+
end
|
629
|
+
|
630
|
+
# Set the value of the compress setting.
|
631
|
+
#
|
632
|
+
# Values:
|
633
|
+
# * `true` - Boolean; compress all files
|
634
|
+
# * `false` - Boolean; write out only uncompressed files
|
635
|
+
# * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files.
|
636
|
+
#
|
637
|
+
# The compression setting applies to groups too. So :all_but_first will have the same effect (the first
|
638
|
+
# file in the group will not be compressed, the rest will). So if you require different behaviour for your
|
639
|
+
# groups, pass in a `:compress` option e.g. <tt>group(:compress => false) { add('/link') }</tt>
|
640
|
+
def compress=(value)
|
641
|
+
@compress = value
|
642
|
+
@sitemap_index.location[:compress] = @compress if @sitemap_index
|
643
|
+
@sitemap.location[:compress] = @compress if @sitemap
|
644
|
+
end
|
645
|
+
|
646
|
+
# Return the current compression setting. Its value determines which files will be gzip'ed.
|
647
|
+
# See the setter for documentation of its values.
|
648
|
+
def compress
|
649
|
+
@compress
|
650
|
+
end
|
651
|
+
|
652
|
+
protected
|
653
|
+
|
654
|
+
# Update the given attribute on the current sitemap index and sitemap file location objects.
|
655
|
+
# But don't create the index or sitemap files yet if they are not already created.
|
656
|
+
def update_location_info(attribute, value, opts={})
|
657
|
+
opts = SitemapGenerator::Utilities.reverse_merge(opts, :include_index => !@protect_index)
|
658
|
+
@sitemap_index.location[attribute] = value if opts[:include_index] && @sitemap_index && !@sitemap_index.finalized?
|
659
|
+
@sitemap.location[attribute] = value if @sitemap && !@sitemap.finalized?
|
660
|
+
end
|
661
|
+
end
|
662
|
+
include LocationHelpers
|
663
|
+
end
|
664
|
+
end
|
665
|
+
|