sitemap_generator_ftbpro 5.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +13 -0
  3. data/Gemfile.lock +35 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.md +1139 -0
  6. data/Rakefile +43 -0
  7. data/VERSION +1 -0
  8. data/lib/capistrano/sitemap_generator.rb +1 -0
  9. data/lib/capistrano/tasks/sitemap_generator.cap +36 -0
  10. data/lib/sitemap_generator/adapters/file_adapter.rb +43 -0
  11. data/lib/sitemap_generator/adapters/fog_adapter.rb +28 -0
  12. data/lib/sitemap_generator/adapters/s3_adapter.rb +41 -0
  13. data/lib/sitemap_generator/adapters/wave_adapter.rb +21 -0
  14. data/lib/sitemap_generator/adapters.rb +0 -0
  15. data/lib/sitemap_generator/application.rb +49 -0
  16. data/lib/sitemap_generator/builder/sitemap_file.rb +171 -0
  17. data/lib/sitemap_generator/builder/sitemap_index_file.rb +149 -0
  18. data/lib/sitemap_generator/builder/sitemap_index_url.rb +28 -0
  19. data/lib/sitemap_generator/builder/sitemap_url.rb +250 -0
  20. data/lib/sitemap_generator/builder.rb +8 -0
  21. data/lib/sitemap_generator/core_ext/big_decimal.rb +45 -0
  22. data/lib/sitemap_generator/core_ext/numeric.rb +48 -0
  23. data/lib/sitemap_generator/core_ext.rb +3 -0
  24. data/lib/sitemap_generator/helpers/number_helper.rb +237 -0
  25. data/lib/sitemap_generator/interpreter.rb +80 -0
  26. data/lib/sitemap_generator/link_set.rb +665 -0
  27. data/lib/sitemap_generator/railtie.rb +7 -0
  28. data/lib/sitemap_generator/sitemap_location.rb +192 -0
  29. data/lib/sitemap_generator/sitemap_namer.rb +75 -0
  30. data/lib/sitemap_generator/tasks.rb +53 -0
  31. data/lib/sitemap_generator/templates.rb +41 -0
  32. data/lib/sitemap_generator/utilities.rb +181 -0
  33. data/lib/sitemap_generator.rb +82 -0
  34. data/lib/tasks/sitemap_generator_tasks.rake +1 -0
  35. data/rails/install.rb +2 -0
  36. data/rails/uninstall.rb +2 -0
  37. data/spec/blueprint.rb +15 -0
  38. data/spec/files/sitemap.create.rb +12 -0
  39. data/spec/files/sitemap.groups.rb +49 -0
  40. data/spec/sitemap_generator/adapters/s3_adapter_spec.rb +23 -0
  41. data/spec/sitemap_generator/alternate_sitemap_spec.rb +79 -0
  42. data/spec/sitemap_generator/application_spec.rb +69 -0
  43. data/spec/sitemap_generator/builder/sitemap_file_spec.rb +110 -0
  44. data/spec/sitemap_generator/builder/sitemap_index_file_spec.rb +124 -0
  45. data/spec/sitemap_generator/builder/sitemap_index_url_spec.rb +28 -0
  46. data/spec/sitemap_generator/builder/sitemap_url_spec.rb +186 -0
  47. data/spec/sitemap_generator/core_ext/bigdecimal_spec.rb +20 -0
  48. data/spec/sitemap_generator/core_ext/numeric_spec.rb +43 -0
  49. data/spec/sitemap_generator/file_adaptor_spec.rb +20 -0
  50. data/spec/sitemap_generator/geo_sitemap_spec.rb +30 -0
  51. data/spec/sitemap_generator/helpers/number_helper_spec.rb +196 -0
  52. data/spec/sitemap_generator/interpreter_spec.rb +90 -0
  53. data/spec/sitemap_generator/link_set_spec.rb +864 -0
  54. data/spec/sitemap_generator/mobile_sitemap_spec.rb +27 -0
  55. data/spec/sitemap_generator/news_sitemap_spec.rb +42 -0
  56. data/spec/sitemap_generator/pagemap_sitemap_spec.rb +57 -0
  57. data/spec/sitemap_generator/sitemap_generator_spec.rb +582 -0
  58. data/spec/sitemap_generator/sitemap_groups_spec.rb +144 -0
  59. data/spec/sitemap_generator/sitemap_location_spec.rb +210 -0
  60. data/spec/sitemap_generator/sitemap_namer_spec.rb +96 -0
  61. data/spec/sitemap_generator/templates_spec.rb +24 -0
  62. data/spec/sitemap_generator/utilities/existence_spec.rb +26 -0
  63. data/spec/sitemap_generator/utilities/hash_spec.rb +57 -0
  64. data/spec/sitemap_generator/utilities/rounding_spec.rb +31 -0
  65. data/spec/sitemap_generator/utilities_spec.rb +101 -0
  66. data/spec/sitemap_generator/video_sitemap_spec.rb +117 -0
  67. data/spec/spec_helper.rb +24 -0
  68. data/spec/support/file_macros.rb +39 -0
  69. data/spec/support/schemas/siteindex.xsd +73 -0
  70. data/spec/support/schemas/sitemap-geo.xsd +41 -0
  71. data/spec/support/schemas/sitemap-mobile.xsd +32 -0
  72. data/spec/support/schemas/sitemap-news.xsd +159 -0
  73. data/spec/support/schemas/sitemap-pagemap.xsd +97 -0
  74. data/spec/support/schemas/sitemap-video.xsd +643 -0
  75. data/spec/support/schemas/sitemap.xsd +115 -0
  76. data/spec/support/xml_macros.rb +67 -0
  77. data/templates/sitemap.rb +27 -0
  78. metadata +226 -0
@@ -0,0 +1,665 @@
1
+ require 'builder'
2
+
3
+ # A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
4
+ # which lists all the sitemap files written.
5
+ module SitemapGenerator
6
+ class LinkSet
7
+ @@requires_finalization_opts = [:filename, :sitemaps_path, :sitemaps_host, :namer]
8
+ @@new_location_opts = [:filename, :sitemaps_path, :namer]
9
+
10
+ attr_reader :default_host, :sitemaps_path, :filename, :create_index
11
+ attr_accessor :include_root, :include_index, :adapter, :yield_sitemap
12
+ attr_writer :verbose
13
+
14
+ # Create a new sitemap index and sitemap files. Pass a block with calls to the following
15
+ # methods:
16
+ # * +add+ - Add a link to the current sitemap
17
+ # * +group+ - Start a new group of sitemaps
18
+ #
19
+ # == Options
20
+ #
21
+ # Any option supported by +new+ can be passed. The options will be
22
+ # set on the instance using the accessor methods. This is provided mostly
23
+ # as a convenience.
24
+ #
25
+ # In addition to the options to +new+, the following options are supported:
26
+ # * <tt>:finalize</tt> - The sitemaps are written as they get full and at the end
27
+ # of the block. Pass +false+ as the value to prevent the sitemap or sitemap index
28
+ # from being finalized. Default is +true+.
29
+ #
30
+ # If you are calling +create+ more than once in your sitemap configuration file,
31
+ # make sure that you set a different +sitemaps_path+ or +filename+ for each call otherwise
32
+ # the sitemaps may be overwritten.
33
+ def create(opts={}, &block)
34
+ reset!
35
+ exclude_keys = opts.delete(:exclude_keys) || []
36
+ @schemas= SitemapGenerator::SCHEMAS.reject{ |k, v| exclude_keys.include? k }
37
+ set_options(opts)
38
+ if verbose
39
+ start_time = Time.now
40
+ puts "In '#{sitemap_index.location.public_path}':"
41
+ end
42
+ interpreter.eval(:yield_sitemap => yield_sitemap?, &block)
43
+ finalize!
44
+ end_time = Time.now if verbose
45
+ output(sitemap_index.stats_summary(:time_taken => end_time - start_time)) if verbose
46
+ self
47
+ end
48
+
49
+ # Constructor
50
+ #
51
+ # == Options:
52
+ # * <tt>:adapter</tt> - instance of a class with a write method which takes a SitemapGenerator::Location
53
+ # and raw XML data and persists it. The default adapter is a SitemapGenerator::FileAdapter
54
+ # which simply writes files to the filesystem. You can use a SitemapGenerator::WaveAdapter
55
+ # for uploading sitemaps to remote servers - useful for read-only hosts such as Heroku. Or
56
+ # you can provide an instance of your own class to provide custom behavior.
57
+ #
58
+ # * <tt>:default_host</tt> - host including protocol to use in all sitemap links
59
+ # e.g. http://en.google.ca
60
+ #
61
+ # * <tt>:public_path</tt> - Full or relative path to the directory to write sitemaps into.
62
+ # Defaults to the <tt>public/</tt> directory in your application root directory or
63
+ # the current working directory.
64
+ #
65
+ # * <tt>:sitemaps_host</tt> - String. <b>Host including protocol</b> to use when generating
66
+ # a link to a sitemap file i.e. the hostname of the server where the sitemaps are hosted.
67
+ # The value will differ from the hostname in your sitemap links.
68
+ # For example: `'http://amazon.aws.com/'`.
69
+ #
70
+ # Note that `include_index` is automatically turned off when the `sitemaps_host` does
71
+ # not match `default_host`. Because the link to the sitemap index file that would
72
+ # otherwise be added would point to a different host than the rest of the links in
73
+ # the sitemap. Something that the sitemap rules forbid.
74
+ #
75
+ # * <tt>:sitemaps_path</tt> - path fragment within public to write sitemaps
76
+ # to e.g. 'en/'. Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
77
+ #
78
+ # * <tt>:filename</tt> - symbol giving the base name for files (default <tt>:sitemap</tt>).
79
+ # The names are generated like "#{filename}.xml.gz", "#{filename}1.xml.gz", "#{filename}2.xml.gz"
80
+ # with the first file being the index if you have more than one sitemap file.
81
+ #
82
+ # * <tt>:include_index</tt> - Boolean. Whether to <b>add a link pointing to the sitemap index<b>
83
+ # to the current sitemap. This points search engines to your Sitemap Index to
84
+ # include it in the indexing of your site. Default is `false`. Turned off when
85
+ # `sitemaps_host` is set or within a `group()` block. Turned off because Google can complain
86
+ # about nested indexing and because if a robot is already reading your sitemap, they
87
+ # probably know about the index.
88
+ #
89
+ # * <tt>:include_root</tt> - Boolean. Whether to **add the root** url i.e. '/' to the
90
+ # current sitemap. Default is `true`. Turned off within a `group()` block.
91
+ #
92
+ # * <tt>:search_engines</tt> - Hash. A hash of search engine names mapped to
93
+ # ping URLs. See ping_search_engines.
94
+ #
95
+ # * <tt>:verbose</tt> - If +true+, output a summary line for each sitemap and sitemap
96
+ # index that is created. Default is +false+.
97
+ #
98
+ # * <tt>:create_index</tt> - Supported values: `true`, `false`, `:auto`. Default: `:auto`.
99
+ # Whether to create a sitemap index file. If `true` an index file is always created,
100
+ # regardless of how many links are in your sitemap. If `false` an index file is never
101
+ # created. If `:auto` an index file is created only if your sitemap has more than
102
+ # one sitemap file.
103
+ #
104
+ # * <tt>:namer</tt> - A <tt>SitemapGenerator::SimpleNamer</tt> instance for generating the sitemap
105
+ # and index file names. See <tt>:filename</tt> if you don't need to do anything fancy, and can
106
+ # accept the default naming conventions.
107
+ #
108
+ # * <tt>:compress</tt> - Specifies which files to compress with gzip. Default is `true`. Accepted values:
109
+ # * `true` - Boolean; compress all files.
110
+ # * `false` - Boolean; write out only uncompressed files.
111
+ # * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files.
112
+ #
113
+ # The compression setting applies to groups too. So :all_but_first will have the same effect (the first
114
+ # file in the group will not be compressed, the rest will). So if you require different behaviour for your
115
+ # groups, pass in a `:compress` option e.g. <tt>group(:compress => false) { add('/link') }</tt>
116
+ #
117
+ # KJV: When adding a new option be sure to include it in `options_for_group()` if
118
+ # the option should be inherited by groups.
119
+ def initialize(options={})
120
+ options = SitemapGenerator::Utilities.reverse_merge(options,
121
+ :include_root => true,
122
+ :include_index => false,
123
+ :filename => :sitemap,
124
+ :search_engines => {
125
+ :google => "http://www.google.com/webmasters/tools/ping?sitemap=%s",
126
+ :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=%s"
127
+ },
128
+ :create_index => :auto,
129
+ :compress => true
130
+ )
131
+ options.each_pair { |k, v| instance_variable_set("@#{k}".to_sym, v) }
132
+
133
+ # If an index is passed in, protect it from modification.
134
+ # Sitemaps can be added to the index but nothing else can be changed.
135
+ if options[:sitemap_index]
136
+ @protect_index = true
137
+ end
138
+ end
139
+
140
+ # Add a link to a Sitemap. If a new Sitemap is required, one will be created for
141
+ # you.
142
+ #
143
+ # link - string link e.g. '/merchant', '/article/1' or whatever.
144
+ # options - see README.
145
+ # host - host for the link, defaults to your <tt>default_host</tt>.
146
+ def add(link, options={})
147
+ add_default_links if !@added_default_links
148
+ sitemap.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => @default_host))
149
+ rescue SitemapGenerator::SitemapFullError
150
+ finalize_sitemap!
151
+ retry
152
+ rescue SitemapGenerator::SitemapFinalizedError
153
+ @sitemap = sitemap.new
154
+ retry
155
+ end
156
+
157
+ # Add a link to the Sitemap Index.
158
+ # * link - A string link e.g. '/sitemaps/sitemap1.xml.gz' or a SitemapFile instance.
159
+ # * options - A hash of options including `:lastmod`, ':priority`, ':changefreq` and `:host`
160
+ #
161
+ # The `:host` option defaults to the value of `sitemaps_host` which is the host where your
162
+ # sitemaps reside. If no `sitemaps_host` is set, the `default_host` is used.
163
+ def add_to_index(link, options={})
164
+ sitemap_index.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => sitemaps_host))
165
+ end
166
+
167
+ # Create a new group of sitemap files.
168
+ #
169
+ # Returns a new LinkSet instance with the options passed in set on it. All groups
170
+ # share the sitemap index, which is not affected by any of the options passed here.
171
+ #
172
+ # === Options
173
+ # Any of the options to LinkSet.new. Except for <tt>:public_path</tt> which is shared
174
+ # by all groups.
175
+ #
176
+ # The current options are inherited by the new group of sitemaps. The only exceptions
177
+ # being <tt>:include_index</tt> and <tt>:include_root</tt> which default to +false+.
178
+ #
179
+ # Pass a block to add links to the new LinkSet. If you pass a block the sitemaps will
180
+ # be finalized when the block returns.
181
+ #
182
+ # If you are not changing any of the location settings like <tt>filename<tt>,
183
+ # <tt>sitemaps_path</tt>, <tt>sitemaps_host</tt> or <tt>namer</tt>,
184
+ # links you add within the group will be added to the current sitemap.
185
+ # Otherwise the current sitemap file is finalized and a new sitemap file started,
186
+ # using the options you specified.
187
+ #
188
+ # Most commonly, you'll want to give the group's files a distinct name using
189
+ # the <tt>filename</tt> option.
190
+ #
191
+ # Options like <tt>:default_host</tt> can be used and it will only affect the links
192
+ # within the group. Links added outside of the group will revert to the previous
193
+ # +default_host+.
194
+ def group(opts={}, &block)
195
+ @created_group = true
196
+ original_opts = opts.dup
197
+
198
+ if (@@requires_finalization_opts & original_opts.keys).empty?
199
+ # If no new filename or path is specified reuse the default sitemap file.
200
+ # A new location object will be set on it for the duration of the group.
201
+ original_opts[:sitemap] = sitemap
202
+ elsif original_opts.key?(:sitemaps_host) && (@@new_location_opts & original_opts.keys).empty?
203
+ # If no location options are provided we are creating the next sitemap in the
204
+ # current series, so finalize and inherit the namer.
205
+ finalize_sitemap!
206
+ original_opts[:namer] = namer
207
+ end
208
+
209
+ opts = options_for_group(original_opts)
210
+ @group = SitemapGenerator::LinkSet.new(opts)
211
+ if opts.key?(:sitemap)
212
+ # If the group is sharing the current sitemap, set the
213
+ # new location options on the location object.
214
+ @original_location = @sitemap.location.dup
215
+ @sitemap.location.merge!(@group.sitemap_location)
216
+ if block_given?
217
+ @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
218
+ @sitemap.location.merge!(@original_location)
219
+ end
220
+ else
221
+ # Handle the case where a user only has one group, and it's being written
222
+ # to a new sitemap file. They would expect there to be an index. So force
223
+ # index creation. If there is more than one group, we would have an index anyways,
224
+ # so it's safe to force index creation in these other cases. In the case that
225
+ # the groups reuse the current sitemap, don't force index creation because
226
+ # we want the default behaviour i.e. only an index if more than one sitemap file.
227
+ # Don't force index creation if the user specifically requested no index. This
228
+ # unfortunately means that if they set it to :auto they may be getting an index
229
+ # when they didn't expect one, but you shouldn't be using groups if you only have
230
+ # one sitemap and don't want an index. Rather, just add the links directly in the create()
231
+ # block.
232
+ @group.send(:create_index=, true, true) if @group.create_index != false
233
+
234
+ if block_given?
235
+ @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
236
+ @group.finalize_sitemap!
237
+ end
238
+ end
239
+ @group
240
+ end
241
+
242
+ # Ping search engines to notify them of updated sitemaps.
243
+ #
244
+ # Search engines are already notified for you if you run `rake sitemap:refresh`.
245
+ # If you want to ping search engines separately to your sitemap generation, run
246
+ # `rake sitemap:refresh:no_ping` and then run a rake task or script
247
+ # which calls this method as in the example below.
248
+ #
249
+ # == Arguments
250
+ # * sitemap_index_url - The full URL to your sitemap index file.
251
+ # If not provided the location is based on the `host` you have
252
+ # set and any other options like your `sitemaps_path`. The URL
253
+ # will be CGI escaped for you when included as part of the
254
+ # search engine ping URL.
255
+ #
256
+ # == Options
257
+ # A hash of one or more search engines to ping in addition to the
258
+ # default search engines. The key is the name of the search engine
259
+ # as a string or symbol and the value is the full URL to ping with
260
+ # a string interpolation that will be replaced by the CGI escaped sitemap
261
+ # index URL. If you have any literal percent characters in your URL you
262
+ # need to escape them with `%%`. For example if your sitemap index URL
263
+ # is `http://example.com/sitemap.xml.gz` and your
264
+ # ping url is `http://example.com/100%%/ping?url=%s`
265
+ # then the final URL that is pinged will be `http://example.com/100%/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz`
266
+ #
267
+ # == Examples
268
+ #
269
+ # Both of these examples will ping the default search engines in addition to `http://superengine.com/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz`
270
+ #
271
+ # SitemapGenerator::Sitemap.host('http://example.com/')
272
+ # SitemapGenerator::Sitemap.ping_search_engines(:super_engine => 'http://superengine.com/ping?url=%s')
273
+ #
274
+ # Is equivalent to:
275
+ #
276
+ # SitemapGenerator::Sitemap.ping_search_engines('http://example.com/sitemap.xml.gz', :super_engine => 'http://superengine.com/ping?url=%s')
277
+ def ping_search_engines(*args)
278
+ require 'cgi/session'
279
+ require 'open-uri'
280
+ require 'timeout'
281
+
282
+ engines = args.last.is_a?(Hash) ? args.pop : {}
283
+ unescaped_url = args.shift || sitemap_index_url
284
+ index_url = CGI.escape(unescaped_url)
285
+
286
+ output("\n")
287
+ output("Pinging with URL '#{unescaped_url}':")
288
+ search_engines.merge(engines).each do |engine, link|
289
+ link = link % index_url
290
+ name = Utilities.titleize(engine.to_s)
291
+ begin
292
+ Timeout::timeout(10) {
293
+ open(link)
294
+ }
295
+ output(" Successful ping of #{name}")
296
+ rescue Timeout::Error, StandardError => e
297
+ output("Ping failed for #{name}: #{e.inspect} (URL #{link})")
298
+ end
299
+ end
300
+ end
301
+
302
+ # Return a count of the total number of links in all sitemaps
303
+ def link_count
304
+ sitemap_index.total_link_count
305
+ end
306
+
307
+ # Return the host to use in links to the sitemap files. This defaults to your
308
+ # +default_host+.
309
+ def sitemaps_host
310
+ @sitemaps_host || @default_host
311
+ end
312
+
313
+ # Lazy-initialize a sitemap instance and return it.
314
+ def sitemap
315
+ @sitemap ||= SitemapGenerator::Builder::SitemapFile.new(sitemap_location,
316
+ schemas)
317
+ end
318
+
319
+ # Lazy-initialize a sitemap index instance and return it.
320
+ def sitemap_index
321
+ @sitemap_index ||= SitemapGenerator::Builder::SitemapIndexFile.new(sitemap_index_location)
322
+ end
323
+
324
+ # Return the full url to the sitemap index file. When `create_index` is `false`
325
+ # the first sitemap is technically the index, so this will be its URL. It's important
326
+ # to use this method to get the index url because `sitemap_index.location.url` will
327
+ # not be correct in such situations.
328
+ #
329
+ # KJV: This is somewhat confusing.
330
+ def sitemap_index_url
331
+ sitemap_index.index_url
332
+ end
333
+
334
+ # All done. Write out remaining files.
335
+ def finalize!
336
+ finalize_sitemap!
337
+ finalize_sitemap_index!
338
+ end
339
+
340
+ # Return a boolean indicating hether to add a link to the sitemap index file
341
+ # to the current sitemap. This points search engines to your Sitemap Index so
342
+ # they include it in the indexing of your site, but is not strictly neccessary.
343
+ # Default is `true`. Turned off when `sitemaps_host` is set or within a `group()` block.
344
+ def include_index?
345
+ if default_host && sitemaps_host && sitemaps_host != default_host
346
+ false
347
+ else
348
+ @include_index
349
+ end
350
+ end
351
+
352
+ # Return a boolean indicating whether to automatically add the root url i.e. '/' to the
353
+ # current sitemap. Default is `true`. Turned off within a `group()` block.
354
+ def include_root?
355
+ !!@include_root
356
+ end
357
+
358
+ # Set verbose on the instance or by setting ENV['VERBOSE'] to true or false.
359
+ # By default verbose is true. When running rake tasks, pass the <tt>-s</tt>
360
+ # option to rake to turn verbose off.
361
+ def verbose
362
+ if @verbose.nil?
363
+ @verbose = SitemapGenerator.verbose.nil? ? true : SitemapGenerator.verbose
364
+ end
365
+ @verbose
366
+ end
367
+
368
+ def schemas
369
+ @schemas || SCHEMAS
370
+ end
371
+
372
+ # Return a boolean indicating whether or not to yield the sitemap.
373
+ def yield_sitemap?
374
+ @yield_sitemap.nil? ? SitemapGenerator.yield_sitemap? : !!@yield_sitemap
375
+ end
376
+
377
+ protected
378
+
379
+ # Set each option on this instance using accessor methods. This will affect
380
+ # both the sitemap and the sitemap index.
381
+ #
382
+ # If both `filename` and `namer` are passed, set filename first so it
383
+ # doesn't override the latter.
384
+ def set_options(opts={})
385
+ opts = opts.dup
386
+ %w(filename namer).each do |key|
387
+ if value = opts.delete(key.to_sym)
388
+ send("#{key}=", value)
389
+ end
390
+ end
391
+ opts.each_pair do |key, value|
392
+ send("#{key}=", value)
393
+ end
394
+ end
395
+
396
+ # Given +opts+, modify it and return it prepped for creating a new group from this LinkSet.
397
+ # If <tt>:public_path</tt> is present in +opts+ it is removed because groups cannot
398
+ # change the public path.
399
+ def options_for_group(opts)
400
+ opts = SitemapGenerator::Utilities.reverse_merge(opts,
401
+ :include_index => false,
402
+ :include_root => false,
403
+ :sitemap_index => sitemap_index
404
+ )
405
+ opts.delete(:public_path)
406
+
407
+ # Reverse merge the current settings
408
+ # KJV: This hash could be a problem because it needs to be maintained
409
+ # when new options are added, but can easily be missed. We really could
410
+ # do with a separate SitemapOptions class.
411
+ current_settings = [
412
+ :include_root,
413
+ :include_index,
414
+ :sitemaps_path,
415
+ :public_path,
416
+ :sitemaps_host,
417
+ :verbose,
418
+ :default_host,
419
+ :adapter,
420
+ :create_index,
421
+ :compress,
422
+ :schemas
423
+ ].inject({}) do |hash, key|
424
+ if !(value = instance_variable_get(:"@#{key}")).nil?
425
+ hash[key] = value
426
+ end
427
+ hash
428
+ end
429
+ SitemapGenerator::Utilities.reverse_merge!(opts, current_settings)
430
+ opts
431
+ end
432
+
433
+ # Add default links if those options are turned on. Record the fact that we have done so
434
+ # in an instance variable.
435
+ def add_default_links
436
+ if include_root?
437
+ sitemap.add('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0, :host => @default_host)
438
+ end
439
+ if include_index?
440
+ sitemap.add(sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
441
+ end
442
+ @added_default_links = true
443
+ end
444
+
445
+ # Finalize a sitemap by including it in the index and outputting a summary line.
446
+ # Do nothing if it has already been finalized.
447
+ #
448
+ # Don't finalize if the sitemap is empty.
449
+ #
450
+ # Add the default links if they have not been added yet and no groups have been created.
451
+ # If the default links haven't been added we know that the sitemap is empty,
452
+ # because they are added on the first call to add(). This ensure that if the
453
+ # block passed to create() is empty the default links are still included in the
454
+ # sitemap.
455
+ def finalize_sitemap!
456
+ return if sitemap.finalized? || sitemap.empty? && @created_group
457
+ add_default_links if !@added_default_links && !@created_group
458
+ # This will finalize it. We add to the index even if not creating an index because
459
+ # the index keeps track of how many links are in our sitemaps and we need this info
460
+ # for the summary line. Also the index determines which file gets the first name
461
+ # so everything has to go via the index.
462
+ add_to_index(sitemap) unless sitemap.empty?
463
+ end
464
+
465
+ # Finalize a sitemap index and output a summary line. Do nothing if it has already
466
+ # been finalized.
467
+ def finalize_sitemap_index!
468
+ return if @protect_index || sitemap_index.finalized?
469
+ sitemap_index.finalize!
470
+ sitemap_index.write
471
+ end
472
+
473
+ # Return the interpreter linked to this instance.
474
+ def interpreter
475
+ require 'sitemap_generator/interpreter'
476
+ @interpreter ||= SitemapGenerator::Interpreter.new(:link_set => self)
477
+ end
478
+
479
+ # Reset this instance. Keep the same options, but return to the same state
480
+ # as before any sitemaps were created.
481
+ def reset!
482
+ @sitemap_index = nil if @sitemap_index && @sitemap_index.finalized? && !@protect_index
483
+ @sitemap = nil if @sitemap && @sitemap.finalized?
484
+ self.namer.reset
485
+ @added_default_links = false
486
+ end
487
+
488
+ # Write the given string to STDOUT. Used so that the sitemap config can be
489
+ # evaluated and some info output to STDOUT in a lazy fasion.
490
+ def output(string)
491
+ return unless verbose
492
+ puts string
493
+ end
494
+
495
+ module LocationHelpers
496
+ public
497
+
498
+ # Set the host name, including protocol, that will be used by default on each
499
+ # of your sitemap links. You can pass a different host in your options to `add`
500
+ # if you need to change it on a per-link basis.
501
+ def default_host=(value)
502
+ @default_host = value
503
+ update_location_info(:host, value)
504
+ end
505
+
506
+ # Set the public_path. This path gives the location of your public directory.
507
+ # The default is the public/ directory in your Rails root. Or if Rails is not
508
+ # found, it defaults to public/ in the current directory (of the process).
509
+ #
510
+ # Example: 'tmp/' if you don't want to generate in public for some reason.
511
+ #
512
+ # Set to nil to use the current directory.
513
+ def public_path=(value)
514
+ @public_path = Pathname.new(SitemapGenerator::Utilities.append_slash(value))
515
+ if @public_path.relative?
516
+ @public_path = SitemapGenerator.app.root + @public_path
517
+ end
518
+ update_location_info(:public_path, @public_path)
519
+ @public_path
520
+ end
521
+
522
+ # Return a Pathname with the full path to the public directory
523
+ def public_path
524
+ @public_path ||= self.send(:public_path=, 'public/')
525
+ end
526
+
527
+ # Set the sitemaps_path. This path gives the location to write sitemaps to
528
+ # relative to your public_path.
529
+ # Example: 'sitemaps/' to generate your sitemaps in 'public/sitemaps/'.
530
+ def sitemaps_path=(value)
531
+ @sitemaps_path = value
532
+ update_location_info(:sitemaps_path, value)
533
+ end
534
+
535
+ # Set the host name, including protocol, that will be used on all links to your sitemap
536
+ # files. Useful when the server that hosts the sitemaps is not on the same host as
537
+ # the links in the sitemap.
538
+ #
539
+ # Note that `include_index` will be turned off to avoid adding a link to a sitemap with
540
+ # a different host than the other links.
541
+ def sitemaps_host=(value)
542
+ @sitemaps_host = value
543
+ update_location_info(:host, value)
544
+ end
545
+
546
+ # Set the filename base to use when generating sitemaps (and the sitemap index).
547
+ #
548
+ # === Example
549
+ # <tt>filename = :sitemap</tt>
550
+ #
551
+ # === Generates
552
+ # <tt>sitemap.xml.gz, sitemap1.xml.gz, sitemap2.xml.gz, ...</tt>
553
+ def filename=(value)
554
+ @filename = value
555
+ self.namer = SitemapGenerator::SimpleNamer.new(@filename)
556
+ end
557
+
558
+ # Set the search engines hash to a new hash of search engine names mapped to
559
+ # ping URLs (see ping_search_engines). If the value is nil it is converted
560
+ # to an empty hash.
561
+ # === Example
562
+ # <tt>search_engines = { :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=%s" }</tt>
563
+ def search_engines=(value)
564
+ @search_engines = value || {}
565
+ end
566
+
567
+ # Return the hash of search engines.
568
+ def search_engines
569
+ @search_engines || {}
570
+ end
571
+
572
+ # Return a new +SitemapLocation+ instance with the current options included
573
+ def sitemap_location
574
+ SitemapGenerator::SitemapLocation.new(
575
+ :host => sitemaps_host,
576
+ :namer => namer,
577
+ :public_path => public_path,
578
+ :sitemaps_path => @sitemaps_path,
579
+ :adapter => @adapter,
580
+ :verbose => verbose,
581
+ :compress => @compress
582
+ )
583
+ end
584
+
585
+ # Return a new +SitemapIndexLocation+ instance with the current options included
586
+ def sitemap_index_location
587
+ SitemapGenerator::SitemapLocation.new(
588
+ :host => sitemaps_host,
589
+ :namer => namer,
590
+ :public_path => public_path,
591
+ :sitemaps_path => @sitemaps_path,
592
+ :adapter => @adapter,
593
+ :verbose => verbose,
594
+ :create_index => @create_index,
595
+ :compress => @compress
596
+ )
597
+ end
598
+
599
+ # Set the value of +create_index+ on the SitemapIndexLocation object of the
600
+ # SitemapIndexFile.
601
+ #
602
+ # Whether to create a sitemap index file. Supported values: `true`, `false`, `:auto`.
603
+ # If `true` an index file is always created, regardless of how many links
604
+ # are in your sitemap. If `false` an index file is never created.
605
+ # If `:auto` an index file is created only if your sitemap has more than
606
+ # one sitemap file.
607
+ def create_index=(value, force=false)
608
+ @create_index = value
609
+ # Allow overriding the protected status of the index when we are creating a group.
610
+ # Because sometimes we need to force an index in that case. But generally we don't
611
+ # want to allow people to mess with this value if the index is protected.
612
+ @sitemap_index.location[:create_index] = value if @sitemap_index && ((!@sitemap_index.finalized? && !@protect_index) || force)
613
+ end
614
+
615
+ # Set the namer to use to generate the sitemap (and index) file names.
616
+ # This should be an instance of <tt>SitemapGenerator::SimpleNamer</tt>
617
+ def namer=(value)
618
+ @namer = value
619
+ @sitemap.location[:namer] = value if @sitemap && !@sitemap.finalized?
620
+ @sitemap_index.location[:namer] = value if @sitemap_index && !@sitemap_index.finalized? && !@protect_index
621
+ end
622
+
623
+ # Return the namer object. If it is not set, looks for it on
624
+ # the current sitemap and if there is no sitemap, creates a new one using
625
+ # the current filename.
626
+ def namer
627
+ @namer ||= @sitemap && @sitemap.location.namer || SitemapGenerator::SimpleNamer.new(@filename)
628
+ end
629
+
630
+ # Set the value of the compress setting.
631
+ #
632
+ # Values:
633
+ # * `true` - Boolean; compress all files
634
+ # * `false` - Boolean; write out only uncompressed files
635
+ # * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files.
636
+ #
637
+ # The compression setting applies to groups too. So :all_but_first will have the same effect (the first
638
+ # file in the group will not be compressed, the rest will). So if you require different behaviour for your
639
+ # groups, pass in a `:compress` option e.g. <tt>group(:compress => false) { add('/link') }</tt>
640
+ def compress=(value)
641
+ @compress = value
642
+ @sitemap_index.location[:compress] = @compress if @sitemap_index
643
+ @sitemap.location[:compress] = @compress if @sitemap
644
+ end
645
+
646
+ # Return the current compression setting. Its value determines which files will be gzip'ed.
647
+ # See the setter for documentation of its values.
648
+ def compress
649
+ @compress
650
+ end
651
+
652
+ protected
653
+
654
+ # Update the given attribute on the current sitemap index and sitemap file location objects.
655
+ # But don't create the index or sitemap files yet if they are not already created.
656
+ def update_location_info(attribute, value, opts={})
657
+ opts = SitemapGenerator::Utilities.reverse_merge(opts, :include_index => !@protect_index)
658
+ @sitemap_index.location[attribute] = value if opts[:include_index] && @sitemap_index && !@sitemap_index.finalized?
659
+ @sitemap.location[attribute] = value if @sitemap && !@sitemap.finalized?
660
+ end
661
+ end
662
+ include LocationHelpers
663
+ end
664
+ end
665
+
@@ -0,0 +1,7 @@
1
+ module SitemapGenerator
2
+ class Railtie < Rails::Railtie
3
+ rake_tasks do
4
+ require File.expand_path('../tasks', __FILE__)
5
+ end
6
+ end
7
+ end