sitemap_generator_ftbpro 5.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +13 -0
  3. data/Gemfile.lock +35 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.md +1139 -0
  6. data/Rakefile +43 -0
  7. data/VERSION +1 -0
  8. data/lib/capistrano/sitemap_generator.rb +1 -0
  9. data/lib/capistrano/tasks/sitemap_generator.cap +36 -0
  10. data/lib/sitemap_generator/adapters/file_adapter.rb +43 -0
  11. data/lib/sitemap_generator/adapters/fog_adapter.rb +28 -0
  12. data/lib/sitemap_generator/adapters/s3_adapter.rb +41 -0
  13. data/lib/sitemap_generator/adapters/wave_adapter.rb +21 -0
  14. data/lib/sitemap_generator/adapters.rb +0 -0
  15. data/lib/sitemap_generator/application.rb +49 -0
  16. data/lib/sitemap_generator/builder/sitemap_file.rb +171 -0
  17. data/lib/sitemap_generator/builder/sitemap_index_file.rb +149 -0
  18. data/lib/sitemap_generator/builder/sitemap_index_url.rb +28 -0
  19. data/lib/sitemap_generator/builder/sitemap_url.rb +250 -0
  20. data/lib/sitemap_generator/builder.rb +8 -0
  21. data/lib/sitemap_generator/core_ext/big_decimal.rb +45 -0
  22. data/lib/sitemap_generator/core_ext/numeric.rb +48 -0
  23. data/lib/sitemap_generator/core_ext.rb +3 -0
  24. data/lib/sitemap_generator/helpers/number_helper.rb +237 -0
  25. data/lib/sitemap_generator/interpreter.rb +80 -0
  26. data/lib/sitemap_generator/link_set.rb +665 -0
  27. data/lib/sitemap_generator/railtie.rb +7 -0
  28. data/lib/sitemap_generator/sitemap_location.rb +192 -0
  29. data/lib/sitemap_generator/sitemap_namer.rb +75 -0
  30. data/lib/sitemap_generator/tasks.rb +53 -0
  31. data/lib/sitemap_generator/templates.rb +41 -0
  32. data/lib/sitemap_generator/utilities.rb +181 -0
  33. data/lib/sitemap_generator.rb +82 -0
  34. data/lib/tasks/sitemap_generator_tasks.rake +1 -0
  35. data/rails/install.rb +2 -0
  36. data/rails/uninstall.rb +2 -0
  37. data/spec/blueprint.rb +15 -0
  38. data/spec/files/sitemap.create.rb +12 -0
  39. data/spec/files/sitemap.groups.rb +49 -0
  40. data/spec/sitemap_generator/adapters/s3_adapter_spec.rb +23 -0
  41. data/spec/sitemap_generator/alternate_sitemap_spec.rb +79 -0
  42. data/spec/sitemap_generator/application_spec.rb +69 -0
  43. data/spec/sitemap_generator/builder/sitemap_file_spec.rb +110 -0
  44. data/spec/sitemap_generator/builder/sitemap_index_file_spec.rb +124 -0
  45. data/spec/sitemap_generator/builder/sitemap_index_url_spec.rb +28 -0
  46. data/spec/sitemap_generator/builder/sitemap_url_spec.rb +186 -0
  47. data/spec/sitemap_generator/core_ext/bigdecimal_spec.rb +20 -0
  48. data/spec/sitemap_generator/core_ext/numeric_spec.rb +43 -0
  49. data/spec/sitemap_generator/file_adaptor_spec.rb +20 -0
  50. data/spec/sitemap_generator/geo_sitemap_spec.rb +30 -0
  51. data/spec/sitemap_generator/helpers/number_helper_spec.rb +196 -0
  52. data/spec/sitemap_generator/interpreter_spec.rb +90 -0
  53. data/spec/sitemap_generator/link_set_spec.rb +864 -0
  54. data/spec/sitemap_generator/mobile_sitemap_spec.rb +27 -0
  55. data/spec/sitemap_generator/news_sitemap_spec.rb +42 -0
  56. data/spec/sitemap_generator/pagemap_sitemap_spec.rb +57 -0
  57. data/spec/sitemap_generator/sitemap_generator_spec.rb +582 -0
  58. data/spec/sitemap_generator/sitemap_groups_spec.rb +144 -0
  59. data/spec/sitemap_generator/sitemap_location_spec.rb +210 -0
  60. data/spec/sitemap_generator/sitemap_namer_spec.rb +96 -0
  61. data/spec/sitemap_generator/templates_spec.rb +24 -0
  62. data/spec/sitemap_generator/utilities/existence_spec.rb +26 -0
  63. data/spec/sitemap_generator/utilities/hash_spec.rb +57 -0
  64. data/spec/sitemap_generator/utilities/rounding_spec.rb +31 -0
  65. data/spec/sitemap_generator/utilities_spec.rb +101 -0
  66. data/spec/sitemap_generator/video_sitemap_spec.rb +117 -0
  67. data/spec/spec_helper.rb +24 -0
  68. data/spec/support/file_macros.rb +39 -0
  69. data/spec/support/schemas/siteindex.xsd +73 -0
  70. data/spec/support/schemas/sitemap-geo.xsd +41 -0
  71. data/spec/support/schemas/sitemap-mobile.xsd +32 -0
  72. data/spec/support/schemas/sitemap-news.xsd +159 -0
  73. data/spec/support/schemas/sitemap-pagemap.xsd +97 -0
  74. data/spec/support/schemas/sitemap-video.xsd +643 -0
  75. data/spec/support/schemas/sitemap.xsd +115 -0
  76. data/spec/support/xml_macros.rb +67 -0
  77. data/templates/sitemap.rb +27 -0
  78. metadata +226 -0
@@ -0,0 +1,665 @@
1
+ require 'builder'
2
+
3
+ # A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
4
+ # which lists all the sitemap files written.
5
+ module SitemapGenerator
6
+ class LinkSet
7
+ @@requires_finalization_opts = [:filename, :sitemaps_path, :sitemaps_host, :namer]
8
+ @@new_location_opts = [:filename, :sitemaps_path, :namer]
9
+
10
+ attr_reader :default_host, :sitemaps_path, :filename, :create_index
11
+ attr_accessor :include_root, :include_index, :adapter, :yield_sitemap
12
+ attr_writer :verbose
13
+
14
+ # Create a new sitemap index and sitemap files. Pass a block with calls to the following
15
+ # methods:
16
+ # * +add+ - Add a link to the current sitemap
17
+ # * +group+ - Start a new group of sitemaps
18
+ #
19
+ # == Options
20
+ #
21
+ # Any option supported by +new+ can be passed. The options will be
22
+ # set on the instance using the accessor methods. This is provided mostly
23
+ # as a convenience.
24
+ #
25
+ # In addition to the options to +new+, the following options are supported:
26
+ # * <tt>:finalize</tt> - The sitemaps are written as they get full and at the end
27
+ # of the block. Pass +false+ as the value to prevent the sitemap or sitemap index
28
+ # from being finalized. Default is +true+.
29
+ #
30
+ # If you are calling +create+ more than once in your sitemap configuration file,
31
+ # make sure that you set a different +sitemaps_path+ or +filename+ for each call otherwise
32
+ # the sitemaps may be overwritten.
33
+ def create(opts={}, &block)
34
+ reset!
35
+ exclude_keys = opts.delete(:exclude_keys) || []
36
+ @schemas= SitemapGenerator::SCHEMAS.reject{ |k, v| exclude_keys.include? k }
37
+ set_options(opts)
38
+ if verbose
39
+ start_time = Time.now
40
+ puts "In '#{sitemap_index.location.public_path}':"
41
+ end
42
+ interpreter.eval(:yield_sitemap => yield_sitemap?, &block)
43
+ finalize!
44
+ end_time = Time.now if verbose
45
+ output(sitemap_index.stats_summary(:time_taken => end_time - start_time)) if verbose
46
+ self
47
+ end
48
+
49
+ # Constructor
50
+ #
51
+ # == Options:
52
+ # * <tt>:adapter</tt> - instance of a class with a write method which takes a SitemapGenerator::Location
53
+ # and raw XML data and persists it. The default adapter is a SitemapGenerator::FileAdapter
54
+ # which simply writes files to the filesystem. You can use a SitemapGenerator::WaveAdapter
55
+ # for uploading sitemaps to remote servers - useful for read-only hosts such as Heroku. Or
56
+ # you can provide an instance of your own class to provide custom behavior.
57
+ #
58
+ # * <tt>:default_host</tt> - host including protocol to use in all sitemap links
59
+ # e.g. http://en.google.ca
60
+ #
61
+ # * <tt>:public_path</tt> - Full or relative path to the directory to write sitemaps into.
62
+ # Defaults to the <tt>public/</tt> directory in your application root directory or
63
+ # the current working directory.
64
+ #
65
+ # * <tt>:sitemaps_host</tt> - String. <b>Host including protocol</b> to use when generating
66
+ # a link to a sitemap file i.e. the hostname of the server where the sitemaps are hosted.
67
+ # The value will differ from the hostname in your sitemap links.
68
+ # For example: `'http://amazon.aws.com/'`.
69
+ #
70
+ # Note that `include_index` is automatically turned off when the `sitemaps_host` does
71
+ # not match `default_host`. Because the link to the sitemap index file that would
72
+ # otherwise be added would point to a different host than the rest of the links in
73
+ # the sitemap. Something that the sitemap rules forbid.
74
+ #
75
+ # * <tt>:sitemaps_path</tt> - path fragment within public to write sitemaps
76
+ # to e.g. 'en/'. Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
77
+ #
78
+ # * <tt>:filename</tt> - symbol giving the base name for files (default <tt>:sitemap</tt>).
79
+ # The names are generated like "#{filename}.xml.gz", "#{filename}1.xml.gz", "#{filename}2.xml.gz"
80
+ # with the first file being the index if you have more than one sitemap file.
81
+ #
82
+ # * <tt>:include_index</tt> - Boolean. Whether to <b>add a link pointing to the sitemap index<b>
83
+ # to the current sitemap. This points search engines to your Sitemap Index to
84
+ # include it in the indexing of your site. Default is `false`. Turned off when
85
+ # `sitemaps_host` is set or within a `group()` block. Turned off because Google can complain
86
+ # about nested indexing and because if a robot is already reading your sitemap, they
87
+ # probably know about the index.
88
+ #
89
+ # * <tt>:include_root</tt> - Boolean. Whether to **add the root** url i.e. '/' to the
90
+ # current sitemap. Default is `true`. Turned off within a `group()` block.
91
+ #
92
+ # * <tt>:search_engines</tt> - Hash. A hash of search engine names mapped to
93
+ # ping URLs. See ping_search_engines.
94
+ #
95
+ # * <tt>:verbose</tt> - If +true+, output a summary line for each sitemap and sitemap
96
+ # index that is created. Default is +false+.
97
+ #
98
+ # * <tt>:create_index</tt> - Supported values: `true`, `false`, `:auto`. Default: `:auto`.
99
+ # Whether to create a sitemap index file. If `true` an index file is always created,
100
+ # regardless of how many links are in your sitemap. If `false` an index file is never
101
+ # created. If `:auto` an index file is created only if your sitemap has more than
102
+ # one sitemap file.
103
+ #
104
+ # * <tt>:namer</tt> - A <tt>SitemapGenerator::SimpleNamer</tt> instance for generating the sitemap
105
+ # and index file names. See <tt>:filename</tt> if you don't need to do anything fancy, and can
106
+ # accept the default naming conventions.
107
+ #
108
+ # * <tt>:compress</tt> - Specifies which files to compress with gzip. Default is `true`. Accepted values:
109
+ # * `true` - Boolean; compress all files.
110
+ # * `false` - Boolean; write out only uncompressed files.
111
+ # * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files.
112
+ #
113
+ # The compression setting applies to groups too. So :all_but_first will have the same effect (the first
114
+ # file in the group will not be compressed, the rest will). So if you require different behaviour for your
115
+ # groups, pass in a `:compress` option e.g. <tt>group(:compress => false) { add('/link') }</tt>
116
+ #
117
+ # KJV: When adding a new option be sure to include it in `options_for_group()` if
118
+ # the option should be inherited by groups.
119
+ def initialize(options={})
120
+ options = SitemapGenerator::Utilities.reverse_merge(options,
121
+ :include_root => true,
122
+ :include_index => false,
123
+ :filename => :sitemap,
124
+ :search_engines => {
125
+ :google => "http://www.google.com/webmasters/tools/ping?sitemap=%s",
126
+ :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=%s"
127
+ },
128
+ :create_index => :auto,
129
+ :compress => true
130
+ )
131
+ options.each_pair { |k, v| instance_variable_set("@#{k}".to_sym, v) }
132
+
133
+ # If an index is passed in, protect it from modification.
134
+ # Sitemaps can be added to the index but nothing else can be changed.
135
+ if options[:sitemap_index]
136
+ @protect_index = true
137
+ end
138
+ end
139
+
140
+ # Add a link to a Sitemap. If a new Sitemap is required, one will be created for
141
+ # you.
142
+ #
143
+ # link - string link e.g. '/merchant', '/article/1' or whatever.
144
+ # options - see README.
145
+ # host - host for the link, defaults to your <tt>default_host</tt>.
146
+ def add(link, options={})
147
+ add_default_links if !@added_default_links
148
+ sitemap.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => @default_host))
149
+ rescue SitemapGenerator::SitemapFullError
150
+ finalize_sitemap!
151
+ retry
152
+ rescue SitemapGenerator::SitemapFinalizedError
153
+ @sitemap = sitemap.new
154
+ retry
155
+ end
156
+
157
+ # Add a link to the Sitemap Index.
158
+ # * link - A string link e.g. '/sitemaps/sitemap1.xml.gz' or a SitemapFile instance.
159
+ # * options - A hash of options including `:lastmod`, ':priority`, ':changefreq` and `:host`
160
+ #
161
+ # The `:host` option defaults to the value of `sitemaps_host` which is the host where your
162
+ # sitemaps reside. If no `sitemaps_host` is set, the `default_host` is used.
163
+ def add_to_index(link, options={})
164
+ sitemap_index.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => sitemaps_host))
165
+ end
166
+
167
+ # Create a new group of sitemap files.
168
+ #
169
+ # Returns a new LinkSet instance with the options passed in set on it. All groups
170
+ # share the sitemap index, which is not affected by any of the options passed here.
171
+ #
172
+ # === Options
173
+ # Any of the options to LinkSet.new. Except for <tt>:public_path</tt> which is shared
174
+ # by all groups.
175
+ #
176
+ # The current options are inherited by the new group of sitemaps. The only exceptions
177
+ # being <tt>:include_index</tt> and <tt>:include_root</tt> which default to +false+.
178
+ #
179
+ # Pass a block to add links to the new LinkSet. If you pass a block the sitemaps will
180
+ # be finalized when the block returns.
181
+ #
182
+ # If you are not changing any of the location settings like <tt>filename<tt>,
183
+ # <tt>sitemaps_path</tt>, <tt>sitemaps_host</tt> or <tt>namer</tt>,
184
+ # links you add within the group will be added to the current sitemap.
185
+ # Otherwise the current sitemap file is finalized and a new sitemap file started,
186
+ # using the options you specified.
187
+ #
188
+ # Most commonly, you'll want to give the group's files a distinct name using
189
+ # the <tt>filename</tt> option.
190
+ #
191
+ # Options like <tt>:default_host</tt> can be used and it will only affect the links
192
+ # within the group. Links added outside of the group will revert to the previous
193
+ # +default_host+.
194
+ def group(opts={}, &block)
195
+ @created_group = true
196
+ original_opts = opts.dup
197
+
198
+ if (@@requires_finalization_opts & original_opts.keys).empty?
199
+ # If no new filename or path is specified reuse the default sitemap file.
200
+ # A new location object will be set on it for the duration of the group.
201
+ original_opts[:sitemap] = sitemap
202
+ elsif original_opts.key?(:sitemaps_host) && (@@new_location_opts & original_opts.keys).empty?
203
+ # If no location options are provided we are creating the next sitemap in the
204
+ # current series, so finalize and inherit the namer.
205
+ finalize_sitemap!
206
+ original_opts[:namer] = namer
207
+ end
208
+
209
+ opts = options_for_group(original_opts)
210
+ @group = SitemapGenerator::LinkSet.new(opts)
211
+ if opts.key?(:sitemap)
212
+ # If the group is sharing the current sitemap, set the
213
+ # new location options on the location object.
214
+ @original_location = @sitemap.location.dup
215
+ @sitemap.location.merge!(@group.sitemap_location)
216
+ if block_given?
217
+ @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
218
+ @sitemap.location.merge!(@original_location)
219
+ end
220
+ else
221
+ # Handle the case where a user only has one group, and it's being written
222
+ # to a new sitemap file. They would expect there to be an index. So force
223
+ # index creation. If there is more than one group, we would have an index anyways,
224
+ # so it's safe to force index creation in these other cases. In the case that
225
+ # the groups reuse the current sitemap, don't force index creation because
226
+ # we want the default behaviour i.e. only an index if more than one sitemap file.
227
+ # Don't force index creation if the user specifically requested no index. This
228
+ # unfortunately means that if they set it to :auto they may be getting an index
229
+ # when they didn't expect one, but you shouldn't be using groups if you only have
230
+ # one sitemap and don't want an index. Rather, just add the links directly in the create()
231
+ # block.
232
+ @group.send(:create_index=, true, true) if @group.create_index != false
233
+
234
+ if block_given?
235
+ @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
236
+ @group.finalize_sitemap!
237
+ end
238
+ end
239
+ @group
240
+ end
241
+
242
+ # Ping search engines to notify them of updated sitemaps.
243
+ #
244
+ # Search engines are already notified for you if you run `rake sitemap:refresh`.
245
+ # If you want to ping search engines separately to your sitemap generation, run
246
+ # `rake sitemap:refresh:no_ping` and then run a rake task or script
247
+ # which calls this method as in the example below.
248
+ #
249
+ # == Arguments
250
+ # * sitemap_index_url - The full URL to your sitemap index file.
251
+ # If not provided the location is based on the `host` you have
252
+ # set and any other options like your `sitemaps_path`. The URL
253
+ # will be CGI escaped for you when included as part of the
254
+ # search engine ping URL.
255
+ #
256
+ # == Options
257
+ # A hash of one or more search engines to ping in addition to the
258
+ # default search engines. The key is the name of the search engine
259
+ # as a string or symbol and the value is the full URL to ping with
260
+ # a string interpolation that will be replaced by the CGI escaped sitemap
261
+ # index URL. If you have any literal percent characters in your URL you
262
+ # need to escape them with `%%`. For example if your sitemap index URL
263
+ # is `http://example.com/sitemap.xml.gz` and your
264
+ # ping url is `http://example.com/100%%/ping?url=%s`
265
+ # then the final URL that is pinged will be `http://example.com/100%/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz`
266
+ #
267
+ # == Examples
268
+ #
269
+ # Both of these examples will ping the default search engines in addition to `http://superengine.com/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz`
270
+ #
271
+ # SitemapGenerator::Sitemap.host('http://example.com/')
272
+ # SitemapGenerator::Sitemap.ping_search_engines(:super_engine => 'http://superengine.com/ping?url=%s')
273
+ #
274
+ # Is equivalent to:
275
+ #
276
+ # SitemapGenerator::Sitemap.ping_search_engines('http://example.com/sitemap.xml.gz', :super_engine => 'http://superengine.com/ping?url=%s')
277
+ def ping_search_engines(*args)
278
+ require 'cgi/session'
279
+ require 'open-uri'
280
+ require 'timeout'
281
+
282
+ engines = args.last.is_a?(Hash) ? args.pop : {}
283
+ unescaped_url = args.shift || sitemap_index_url
284
+ index_url = CGI.escape(unescaped_url)
285
+
286
+ output("\n")
287
+ output("Pinging with URL '#{unescaped_url}':")
288
+ search_engines.merge(engines).each do |engine, link|
289
+ link = link % index_url
290
+ name = Utilities.titleize(engine.to_s)
291
+ begin
292
+ Timeout::timeout(10) {
293
+ open(link)
294
+ }
295
+ output(" Successful ping of #{name}")
296
+ rescue Timeout::Error, StandardError => e
297
+ output("Ping failed for #{name}: #{e.inspect} (URL #{link})")
298
+ end
299
+ end
300
+ end
301
+
302
+ # Return a count of the total number of links in all sitemaps
303
+ def link_count
304
+ sitemap_index.total_link_count
305
+ end
306
+
307
+ # Return the host to use in links to the sitemap files. This defaults to your
308
+ # +default_host+.
309
+ def sitemaps_host
310
+ @sitemaps_host || @default_host
311
+ end
312
+
313
+ # Lazy-initialize a sitemap instance and return it.
314
+ def sitemap
315
+ @sitemap ||= SitemapGenerator::Builder::SitemapFile.new(sitemap_location,
316
+ schemas)
317
+ end
318
+
319
+ # Lazy-initialize a sitemap index instance and return it.
320
+ def sitemap_index
321
+ @sitemap_index ||= SitemapGenerator::Builder::SitemapIndexFile.new(sitemap_index_location)
322
+ end
323
+
324
+ # Return the full url to the sitemap index file. When `create_index` is `false`
325
+ # the first sitemap is technically the index, so this will be its URL. It's important
326
+ # to use this method to get the index url because `sitemap_index.location.url` will
327
+ # not be correct in such situations.
328
+ #
329
+ # KJV: This is somewhat confusing.
330
+ def sitemap_index_url
331
+ sitemap_index.index_url
332
+ end
333
+
334
+ # All done. Write out remaining files.
335
+ def finalize!
336
+ finalize_sitemap!
337
+ finalize_sitemap_index!
338
+ end
339
+
340
+ # Return a boolean indicating hether to add a link to the sitemap index file
341
+ # to the current sitemap. This points search engines to your Sitemap Index so
342
+ # they include it in the indexing of your site, but is not strictly neccessary.
343
+ # Default is `true`. Turned off when `sitemaps_host` is set or within a `group()` block.
344
+ def include_index?
345
+ if default_host && sitemaps_host && sitemaps_host != default_host
346
+ false
347
+ else
348
+ @include_index
349
+ end
350
+ end
351
+
352
+ # Return a boolean indicating whether to automatically add the root url i.e. '/' to the
353
+ # current sitemap. Default is `true`. Turned off within a `group()` block.
354
+ def include_root?
355
+ !!@include_root
356
+ end
357
+
358
+ # Set verbose on the instance or by setting ENV['VERBOSE'] to true or false.
359
+ # By default verbose is true. When running rake tasks, pass the <tt>-s</tt>
360
+ # option to rake to turn verbose off.
361
+ def verbose
362
+ if @verbose.nil?
363
+ @verbose = SitemapGenerator.verbose.nil? ? true : SitemapGenerator.verbose
364
+ end
365
+ @verbose
366
+ end
367
+
368
+ def schemas
369
+ @schemas || SCHEMAS
370
+ end
371
+
372
+ # Return a boolean indicating whether or not to yield the sitemap.
373
+ def yield_sitemap?
374
+ @yield_sitemap.nil? ? SitemapGenerator.yield_sitemap? : !!@yield_sitemap
375
+ end
376
+
377
+ protected
378
+
379
+ # Set each option on this instance using accessor methods. This will affect
380
+ # both the sitemap and the sitemap index.
381
+ #
382
+ # If both `filename` and `namer` are passed, set filename first so it
383
+ # doesn't override the latter.
384
+ def set_options(opts={})
385
+ opts = opts.dup
386
+ %w(filename namer).each do |key|
387
+ if value = opts.delete(key.to_sym)
388
+ send("#{key}=", value)
389
+ end
390
+ end
391
+ opts.each_pair do |key, value|
392
+ send("#{key}=", value)
393
+ end
394
+ end
395
+
396
+ # Given +opts+, modify it and return it prepped for creating a new group from this LinkSet.
397
+ # If <tt>:public_path</tt> is present in +opts+ it is removed because groups cannot
398
+ # change the public path.
399
+ def options_for_group(opts)
400
+ opts = SitemapGenerator::Utilities.reverse_merge(opts,
401
+ :include_index => false,
402
+ :include_root => false,
403
+ :sitemap_index => sitemap_index
404
+ )
405
+ opts.delete(:public_path)
406
+
407
+ # Reverse merge the current settings
408
+ # KJV: This hash could be a problem because it needs to be maintained
409
+ # when new options are added, but can easily be missed. We really could
410
+ # do with a separate SitemapOptions class.
411
+ current_settings = [
412
+ :include_root,
413
+ :include_index,
414
+ :sitemaps_path,
415
+ :public_path,
416
+ :sitemaps_host,
417
+ :verbose,
418
+ :default_host,
419
+ :adapter,
420
+ :create_index,
421
+ :compress,
422
+ :schemas
423
+ ].inject({}) do |hash, key|
424
+ if !(value = instance_variable_get(:"@#{key}")).nil?
425
+ hash[key] = value
426
+ end
427
+ hash
428
+ end
429
+ SitemapGenerator::Utilities.reverse_merge!(opts, current_settings)
430
+ opts
431
+ end
432
+
433
+ # Add default links if those options are turned on. Record the fact that we have done so
434
+ # in an instance variable.
435
+ def add_default_links
436
+ if include_root?
437
+ sitemap.add('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0, :host => @default_host)
438
+ end
439
+ if include_index?
440
+ sitemap.add(sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
441
+ end
442
+ @added_default_links = true
443
+ end
444
+
445
+ # Finalize a sitemap by including it in the index and outputting a summary line.
446
+ # Do nothing if it has already been finalized.
447
+ #
448
+ # Don't finalize if the sitemap is empty.
449
+ #
450
+ # Add the default links if they have not been added yet and no groups have been created.
451
+ # If the default links haven't been added we know that the sitemap is empty,
452
+ # because they are added on the first call to add(). This ensure that if the
453
+ # block passed to create() is empty the default links are still included in the
454
+ # sitemap.
455
+ def finalize_sitemap!
456
+ return if sitemap.finalized? || sitemap.empty? && @created_group
457
+ add_default_links if !@added_default_links && !@created_group
458
+ # This will finalize it. We add to the index even if not creating an index because
459
+ # the index keeps track of how many links are in our sitemaps and we need this info
460
+ # for the summary line. Also the index determines which file gets the first name
461
+ # so everything has to go via the index.
462
+ add_to_index(sitemap) unless sitemap.empty?
463
+ end
464
+
465
+ # Finalize a sitemap index and output a summary line. Do nothing if it has already
466
+ # been finalized.
467
+ def finalize_sitemap_index!
468
+ return if @protect_index || sitemap_index.finalized?
469
+ sitemap_index.finalize!
470
+ sitemap_index.write
471
+ end
472
+
473
+ # Return the interpreter linked to this instance.
474
+ def interpreter
475
+ require 'sitemap_generator/interpreter'
476
+ @interpreter ||= SitemapGenerator::Interpreter.new(:link_set => self)
477
+ end
478
+
479
+ # Reset this instance. Keep the same options, but return to the same state
480
+ # as before any sitemaps were created.
481
+ def reset!
482
+ @sitemap_index = nil if @sitemap_index && @sitemap_index.finalized? && !@protect_index
483
+ @sitemap = nil if @sitemap && @sitemap.finalized?
484
+ self.namer.reset
485
+ @added_default_links = false
486
+ end
487
+
488
+ # Write the given string to STDOUT. Used so that the sitemap config can be
489
+ # evaluated and some info output to STDOUT in a lazy fasion.
490
+ def output(string)
491
+ return unless verbose
492
+ puts string
493
+ end
494
+
495
+ module LocationHelpers
496
+ public
497
+
498
+ # Set the host name, including protocol, that will be used by default on each
499
+ # of your sitemap links. You can pass a different host in your options to `add`
500
+ # if you need to change it on a per-link basis.
501
+ def default_host=(value)
502
+ @default_host = value
503
+ update_location_info(:host, value)
504
+ end
505
+
506
+ # Set the public_path. This path gives the location of your public directory.
507
+ # The default is the public/ directory in your Rails root. Or if Rails is not
508
+ # found, it defaults to public/ in the current directory (of the process).
509
+ #
510
+ # Example: 'tmp/' if you don't want to generate in public for some reason.
511
+ #
512
+ # Set to nil to use the current directory.
513
+ def public_path=(value)
514
+ @public_path = Pathname.new(SitemapGenerator::Utilities.append_slash(value))
515
+ if @public_path.relative?
516
+ @public_path = SitemapGenerator.app.root + @public_path
517
+ end
518
+ update_location_info(:public_path, @public_path)
519
+ @public_path
520
+ end
521
+
522
+ # Return a Pathname with the full path to the public directory
523
+ def public_path
524
+ @public_path ||= self.send(:public_path=, 'public/')
525
+ end
526
+
527
+ # Set the sitemaps_path. This path gives the location to write sitemaps to
528
+ # relative to your public_path.
529
+ # Example: 'sitemaps/' to generate your sitemaps in 'public/sitemaps/'.
530
+ def sitemaps_path=(value)
531
+ @sitemaps_path = value
532
+ update_location_info(:sitemaps_path, value)
533
+ end
534
+
535
+ # Set the host name, including protocol, that will be used on all links to your sitemap
536
+ # files. Useful when the server that hosts the sitemaps is not on the same host as
537
+ # the links in the sitemap.
538
+ #
539
+ # Note that `include_index` will be turned off to avoid adding a link to a sitemap with
540
+ # a different host than the other links.
541
+ def sitemaps_host=(value)
542
+ @sitemaps_host = value
543
+ update_location_info(:host, value)
544
+ end
545
+
546
+ # Set the filename base to use when generating sitemaps (and the sitemap index).
547
+ #
548
+ # === Example
549
+ # <tt>filename = :sitemap</tt>
550
+ #
551
+ # === Generates
552
+ # <tt>sitemap.xml.gz, sitemap1.xml.gz, sitemap2.xml.gz, ...</tt>
553
+ def filename=(value)
554
+ @filename = value
555
+ self.namer = SitemapGenerator::SimpleNamer.new(@filename)
556
+ end
557
+
558
+ # Set the search engines hash to a new hash of search engine names mapped to
559
+ # ping URLs (see ping_search_engines). If the value is nil it is converted
560
+ # to an empty hash.
561
+ # === Example
562
+ # <tt>search_engines = { :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=%s" }</tt>
563
+ def search_engines=(value)
564
+ @search_engines = value || {}
565
+ end
566
+
567
+ # Return the hash of search engines.
568
+ def search_engines
569
+ @search_engines || {}
570
+ end
571
+
572
+ # Return a new +SitemapLocation+ instance with the current options included
573
+ def sitemap_location
574
+ SitemapGenerator::SitemapLocation.new(
575
+ :host => sitemaps_host,
576
+ :namer => namer,
577
+ :public_path => public_path,
578
+ :sitemaps_path => @sitemaps_path,
579
+ :adapter => @adapter,
580
+ :verbose => verbose,
581
+ :compress => @compress
582
+ )
583
+ end
584
+
585
+ # Return a new +SitemapIndexLocation+ instance with the current options included
586
+ def sitemap_index_location
587
+ SitemapGenerator::SitemapLocation.new(
588
+ :host => sitemaps_host,
589
+ :namer => namer,
590
+ :public_path => public_path,
591
+ :sitemaps_path => @sitemaps_path,
592
+ :adapter => @adapter,
593
+ :verbose => verbose,
594
+ :create_index => @create_index,
595
+ :compress => @compress
596
+ )
597
+ end
598
+
599
+ # Set the value of +create_index+ on the SitemapIndexLocation object of the
600
+ # SitemapIndexFile.
601
+ #
602
+ # Whether to create a sitemap index file. Supported values: `true`, `false`, `:auto`.
603
+ # If `true` an index file is always created, regardless of how many links
604
+ # are in your sitemap. If `false` an index file is never created.
605
+ # If `:auto` an index file is created only if your sitemap has more than
606
+ # one sitemap file.
607
+ def create_index=(value, force=false)
608
+ @create_index = value
609
+ # Allow overriding the protected status of the index when we are creating a group.
610
+ # Because sometimes we need to force an index in that case. But generally we don't
611
+ # want to allow people to mess with this value if the index is protected.
612
+ @sitemap_index.location[:create_index] = value if @sitemap_index && ((!@sitemap_index.finalized? && !@protect_index) || force)
613
+ end
614
+
615
+ # Set the namer to use to generate the sitemap (and index) file names.
616
+ # This should be an instance of <tt>SitemapGenerator::SimpleNamer</tt>
617
+ def namer=(value)
618
+ @namer = value
619
+ @sitemap.location[:namer] = value if @sitemap && !@sitemap.finalized?
620
+ @sitemap_index.location[:namer] = value if @sitemap_index && !@sitemap_index.finalized? && !@protect_index
621
+ end
622
+
623
+ # Return the namer object. If it is not set, looks for it on
624
+ # the current sitemap and if there is no sitemap, creates a new one using
625
+ # the current filename.
626
+ def namer
627
+ @namer ||= @sitemap && @sitemap.location.namer || SitemapGenerator::SimpleNamer.new(@filename)
628
+ end
629
+
630
+ # Set the value of the compress setting.
631
+ #
632
+ # Values:
633
+ # * `true` - Boolean; compress all files
634
+ # * `false` - Boolean; write out only uncompressed files
635
+ # * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files.
636
+ #
637
+ # The compression setting applies to groups too. So :all_but_first will have the same effect (the first
638
+ # file in the group will not be compressed, the rest will). So if you require different behaviour for your
639
+ # groups, pass in a `:compress` option e.g. <tt>group(:compress => false) { add('/link') }</tt>
640
+ def compress=(value)
641
+ @compress = value
642
+ @sitemap_index.location[:compress] = @compress if @sitemap_index
643
+ @sitemap.location[:compress] = @compress if @sitemap
644
+ end
645
+
646
+ # Return the current compression setting. Its value determines which files will be gzip'ed.
647
+ # See the setter for documentation of its values.
648
+ def compress
649
+ @compress
650
+ end
651
+
652
+ protected
653
+
654
+ # Update the given attribute on the current sitemap index and sitemap file location objects.
655
+ # But don't create the index or sitemap files yet if they are not already created.
656
+ def update_location_info(attribute, value, opts={})
657
+ opts = SitemapGenerator::Utilities.reverse_merge(opts, :include_index => !@protect_index)
658
+ @sitemap_index.location[attribute] = value if opts[:include_index] && @sitemap_index && !@sitemap_index.finalized?
659
+ @sitemap.location[attribute] = value if @sitemap && !@sitemap.finalized?
660
+ end
661
+ end
662
+ include LocationHelpers
663
+ end
664
+ end
665
+
@@ -0,0 +1,7 @@
1
+ module SitemapGenerator
2
+ class Railtie < Rails::Railtie
3
+ rake_tasks do
4
+ require File.expand_path('../tasks', __FILE__)
5
+ end
6
+ end
7
+ end