sitemap_generator 2.2.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/Gemfile +9 -24
  2. data/Gemfile.lock +23 -58
  3. data/README.md +56 -75
  4. data/Rakefile +29 -117
  5. data/VERSION +1 -1
  6. data/lib/sitemap_generator.rb +24 -8
  7. data/lib/sitemap_generator/application.rb +31 -4
  8. data/lib/sitemap_generator/builder.rb +0 -6
  9. data/lib/sitemap_generator/builder/sitemap_file.rb +16 -6
  10. data/lib/sitemap_generator/builder/sitemap_index_file.rb +4 -3
  11. data/lib/sitemap_generator/builder/sitemap_index_url.rb +1 -1
  12. data/lib/sitemap_generator/builder/sitemap_url.rb +6 -8
  13. data/lib/sitemap_generator/core_ext.rb +3 -0
  14. data/lib/sitemap_generator/core_ext/big_decimal.rb +45 -0
  15. data/lib/sitemap_generator/core_ext/numeric.rb +48 -0
  16. data/lib/sitemap_generator/helpers/number_helper.rb +237 -0
  17. data/lib/sitemap_generator/interpreter.rb +1 -1
  18. data/lib/sitemap_generator/link_set.rb +39 -18
  19. data/lib/sitemap_generator/railtie.rb +2 -2
  20. data/lib/sitemap_generator/sitemap_namer.rb +1 -1
  21. data/lib/sitemap_generator/tasks.rb +53 -1
  22. data/lib/sitemap_generator/utilities.rb +107 -1
  23. data/lib/tasks/sitemap_generator_tasks.rake +1 -0
  24. data/spec/blueprint.rb +15 -0
  25. data/spec/files/sitemap.create.rb +12 -0
  26. data/spec/files/sitemap.deprecated.rb +13 -0
  27. data/spec/files/sitemap.groups.rb +37 -0
  28. data/spec/sitemap_generator/application_spec.rb +69 -0
  29. data/spec/sitemap_generator/builder/sitemap_file_spec.rb +77 -0
  30. data/spec/sitemap_generator/builder/sitemap_index_file_spec.rb +38 -0
  31. data/spec/sitemap_generator/builder/sitemap_index_url_spec.rb +16 -0
  32. data/spec/sitemap_generator/builder/sitemap_url_spec.rb +152 -0
  33. data/spec/sitemap_generator/core_ext/bigdecimal_spec.rb +20 -0
  34. data/spec/sitemap_generator/core_ext/numeric_spec.rb +43 -0
  35. data/spec/sitemap_generator/geo_sitemap_spec.rb +30 -0
  36. data/spec/sitemap_generator/helpers/number_helper_spec.rb +191 -0
  37. data/spec/sitemap_generator/interpreter_spec.rb +24 -0
  38. data/spec/sitemap_generator/link_set_spec.rb +606 -0
  39. data/spec/sitemap_generator/news_sitemap_spec.rb +42 -0
  40. data/spec/sitemap_generator/sitemap_generator_spec.rb +232 -0
  41. data/spec/sitemap_generator/sitemap_groups_spec.rb +133 -0
  42. data/spec/sitemap_generator/sitemap_location_spec.rb +124 -0
  43. data/spec/sitemap_generator/sitemap_namer_spec.rb +61 -0
  44. data/spec/sitemap_generator/templates_spec.rb +24 -0
  45. data/spec/sitemap_generator/utilities/existence_spec.rb +26 -0
  46. data/spec/sitemap_generator/utilities/hash_spec.rb +57 -0
  47. data/spec/sitemap_generator/utilities/rounding_spec.rb +31 -0
  48. data/spec/sitemap_generator/utilities_spec.rb +50 -0
  49. data/spec/sitemap_generator/video_sitemap_spec.rb +103 -0
  50. data/spec/spec_helper.rb +20 -0
  51. data/spec/support/file_macros.rb +39 -0
  52. data/spec/support/schemas/siteindex.xsd +73 -0
  53. data/spec/support/schemas/sitemap-geo.xsd +41 -0
  54. data/spec/support/schemas/sitemap-news.xsd +159 -0
  55. data/spec/support/schemas/sitemap-video.xsd +409 -0
  56. data/spec/support/schemas/sitemap.xsd +115 -0
  57. data/spec/support/xml_macros.rb +55 -0
  58. metadata +141 -122
  59. data/tasks/sitemap_generator_tasks.rake +0 -43
@@ -21,7 +21,7 @@ module SitemapGenerator
21
21
  #
22
22
  # All other options are passed to the LinkSet by setting them using accessor methods.
23
23
  def initialize(opts={}, &block)
24
- opts.reverse_merge!(:link_set => SitemapGenerator::Sitemap)
24
+ SitemapGenerator::Utilities.reverse_merge!(opts, :link_set => SitemapGenerator::Sitemap)
25
25
  @linkset = opts.delete :link_set
26
26
  @linkset.send(:set_options, opts)
27
27
  eval(&block) if block_given?
@@ -32,11 +32,14 @@ module SitemapGenerator
32
32
  def create(opts={}, &block)
33
33
  reset!
34
34
  set_options(opts)
35
- start_time = Time.now if @verbose
35
+ if verbose
36
+ start_time = Time.now
37
+ puts "In #{sitemap_index.location.public_path}"
38
+ end
36
39
  interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
37
40
  finalize!
38
- end_time = Time.now if @verbose
39
- puts sitemap_index.stats_summary(:time_taken => end_time - start_time) if @verbose
41
+ end_time = Time.now if verbose
42
+ output(sitemap_index.stats_summary(:time_taken => end_time - start_time)) if verbose
40
43
  self
41
44
  end
42
45
 
@@ -96,18 +99,17 @@ module SitemapGenerator
96
99
  # * <tt>:verbose</tt> - If +true+, output a summary line for each sitemap and sitemap
97
100
  # index that is created. Default is +false+.
98
101
  def initialize(options={})
99
- options.reverse_merge!({
102
+ SitemapGenerator::Utilities.reverse_merge!(options,
100
103
  :include_root => true,
101
104
  :include_index => true,
102
105
  :filename => :sitemap,
103
- :verbose => false,
104
106
  :search_engines => {
105
107
  :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=%s",
106
108
  :ask => "http://submissions.ask.com/ping?sitemap=%s",
107
109
  :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=%s",
108
110
  :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=%s"
109
111
  }
110
- })
112
+ )
111
113
  options.each_pair { |k, v| instance_variable_set("@#{k}".to_sym, v) }
112
114
 
113
115
  # If an index is passed in, protect it from modification.
@@ -125,7 +127,7 @@ module SitemapGenerator
125
127
  # host - host for the link, defaults to your <tt>default_host</tt>.
126
128
  def add(link, options={})
127
129
  add_default_links if !@added_default_links
128
- sitemap.add(link, options.reverse_merge!(:host => @default_host))
130
+ sitemap.add(link, SitemapGenerator::Utilities.reverse_merge!(options, :host => @default_host))
129
131
  rescue SitemapGenerator::SitemapFullError
130
132
  finalize_sitemap!
131
133
  retry
@@ -227,22 +229,24 @@ module SitemapGenerator
227
229
  #
228
230
  # SitemapGenerator::Sitemap.ping_search_engines('http://example.com/sitemap_index.xml.gz', :super_engine => 'http://superengine.com/ping?url=%s')
229
231
  def ping_search_engines(*args)
230
- engines = args.last.is_a?(Hash) ? args.pop : {}
231
- index_url = CGI.escape(args.shift || sitemap_index_url)
232
-
232
+ require 'cgi/session'
233
233
  require 'open-uri'
234
234
  require 'timeout'
235
235
 
236
- puts "\n" if verbose
236
+ engines = args.last.is_a?(Hash) ? args.pop : {}
237
+ index_url = CGI.escape(args.shift || sitemap_index_url)
238
+
239
+ output("\n")
237
240
  search_engines.merge(engines).each do |engine, link|
238
241
  link = link % index_url
242
+ name = Utilities.titleize(engine.to_s)
239
243
  begin
240
244
  Timeout::timeout(10) {
241
245
  open(link)
242
246
  }
243
- puts "Successful ping of #{engine.to_s.titleize}" if verbose
247
+ output("Successful ping of #{name}")
244
248
  rescue Timeout::Error, StandardError => e
245
- puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
249
+ output("Ping failed for #{name}: #{e.inspect} (URL #{link})")
246
250
  end
247
251
  end
248
252
  end
@@ -296,6 +300,16 @@ module SitemapGenerator
296
300
  !!@include_root
297
301
  end
298
302
 
303
+ # Set verbose on the instance or by setting ENV['VERBOSE'] to true or false.
304
+ # By default verbose is true. When running rake tasks, pass the <tt>-s</tt>
305
+ # option to rake to turn verbose off.
306
+ def verbose
307
+ if @verbose.nil?
308
+ @verbose = SitemapGenerator.verbose.nil? ? true : SitemapGenerator.verbose
309
+ end
310
+ @verbose
311
+ end
312
+
299
313
  protected
300
314
 
301
315
  # Set each option on this instance using accessor methods. This will affect
@@ -319,7 +333,7 @@ module SitemapGenerator
319
333
  # change the public path.
320
334
  def options_for_group(opts)
321
335
  opts.delete(:public_path)
322
- opts.reverse_merge!(
336
+ SitemapGenerator::Utilities.reverse_merge!(opts,
323
337
  :include_index => false,
324
338
  :include_root => false,
325
339
  :sitemap_index => sitemap_index
@@ -340,7 +354,7 @@ module SitemapGenerator
340
354
  end
341
355
  hash
342
356
  end
343
- opts.reverse_merge!(current_settings)
357
+ SitemapGenerator::Utilities.reverse_merge!(opts, current_settings)
344
358
  opts
345
359
  end
346
360
 
@@ -371,7 +385,7 @@ module SitemapGenerator
371
385
  add_default_links if !@added_default_links && !@created_group
372
386
  return if sitemap.finalized? || sitemap.empty? && @created_group
373
387
  sitemap_index.add(sitemap)
374
- puts sitemap.summary if verbose
388
+ output(sitemap.summary)
375
389
  end
376
390
 
377
391
  # Finalize a sitemap index and output a summary line. Do nothing if it has already
@@ -379,7 +393,7 @@ module SitemapGenerator
379
393
  def finalize_sitemap_index!
380
394
  return if @protect_index || sitemap_index.finalized?
381
395
  sitemap_index.finalize!
382
- puts sitemap_index.summary if verbose
396
+ output(sitemap_index.summary)
383
397
  end
384
398
 
385
399
  # Return the interpreter linked to this instance.
@@ -397,6 +411,13 @@ module SitemapGenerator
397
411
  @added_default_links = false
398
412
  end
399
413
 
414
+ # Write the given string to STDOUT. Used so that the sitemap config can be
415
+ # evaluated and some info output to STDOUT in a lazy fasion.
416
+ def output(string)
417
+ return unless verbose
418
+ puts string
419
+ end
420
+
400
421
  module LocationHelpers
401
422
  public
402
423
 
@@ -522,7 +543,7 @@ module SitemapGenerator
522
543
  # Update the given attribute on the current sitemap index and sitemap file location objects.
523
544
  # But don't create the index or sitemap files yet if they are not already created.
524
545
  def update_location_info(attribute, value, opts={})
525
- opts.reverse_merge!(:include_index => !@protect_index)
546
+ SitemapGenerator::Utilities.reverse_merge!(opts, :include_index => !@protect_index)
526
547
  @sitemap_index.location[attribute] = value if opts[:include_index] && @sitemap_index && !@sitemap_index.finalized?
527
548
  @sitemap.location[attribute] = value if @sitemap && !@sitemap.finalized?
528
549
  end
@@ -1,7 +1,7 @@
1
1
  module SitemapGenerator
2
2
  class Railtie < Rails::Railtie
3
3
  rake_tasks do
4
- load File.expand_path('../../../tasks/sitemap_generator_tasks.rake', __FILE__)
4
+ require File.expand_path('../tasks', __FILE__)
5
5
  end
6
6
  end
7
- end
7
+ end
@@ -15,7 +15,7 @@ module SitemapGenerator
15
15
  # :extension - Default: '.xml.gz'. File extension to append.
16
16
  # :start - Default: 1. Index at which to start counting.
17
17
  def initialize(base, options={});
18
- @options = options.reverse_merge(
18
+ @options = SitemapGenerator::Utilities.reverse_merge(options,
19
19
  :extension => '.xml.gz',
20
20
  :start => 1
21
21
  )
@@ -1 +1,53 @@
1
- load File.expand_path(File.join(File.dirname(__FILE__), '../../tasks/sitemap_generator_tasks.rake'))
1
+ # require this file to load the tasks
2
+ require 'rake'
3
+
4
+ # Require sitemap_generator at runtime. If we don't do this the ActionView helpers are included
5
+ # before the Rails environment can be loaded by other Rake tasks, which causes problems
6
+ # for those tasks when rendering using ActionView.
7
+ namespace :sitemap do
8
+ # Require sitemap_generator only. When installed as a plugin the require will fail, so in
9
+ # that case, load the environment first.
10
+ task :require do
11
+ begin
12
+ require 'sitemap_generator'
13
+ rescue LoadError => e
14
+ if defined?(Rails)
15
+ Rake::Task['sitemap:require_environment'].invoke
16
+ else
17
+ raise e
18
+ end
19
+ end
20
+ end
21
+
22
+ # Require sitemap_generator after loading the Rails environment. We still need the require
23
+ # in case we are installed as a gem and are setup to not automatically be required.
24
+ task :require_environment do
25
+ if defined?(Rails)
26
+ Rake::Task['environment'].invoke
27
+ end
28
+ require 'sitemap_generator'
29
+ end
30
+
31
+ desc "Install a default config/sitemap.rb file"
32
+ task :install => ['sitemap:require'] do
33
+ SitemapGenerator::Utilities.install_sitemap_rb(verbose)
34
+ end
35
+
36
+ desc "Delete all Sitemap files in public/ directory"
37
+ task :clean => ['sitemap:require'] do
38
+ SitemapGenerator::Utilities.clean_files
39
+ end
40
+
41
+ desc "Generate sitemaps and ping search engines."
42
+ task :refresh => ['sitemap:create'] do
43
+ SitemapGenerator::Sitemap.ping_search_engines
44
+ end
45
+
46
+ desc "Generate sitemaps but don't ping search engines."
47
+ task 'refresh:no_ping' => ['sitemap:create']
48
+
49
+ desc "Generate sitemaps but don't ping search engines. Alias for refresh:no_ping."
50
+ task :create => ['sitemap:require_environment'] do
51
+ SitemapGenerator::Interpreter.run(:config_file => ENV["CONFIG_FILE"], :verbose => verbose)
52
+ end
53
+ end
@@ -33,5 +33,111 @@ module SitemapGenerator
33
33
  unknown_keys = hash.keys - [valid_keys].flatten
34
34
  raise(ArgumentError, "Unknown key(s): #{unknown_keys.join(", ")}") unless unknown_keys.empty?
35
35
  end
36
+
37
+ # Return a new hash with all keys converted to symbols, as long as
38
+ # they respond to +to_sym+.
39
+ def symbolize_keys(hash)
40
+ symbolize_keys!(hash.dup)
41
+ end
42
+
43
+ # Destructively convert all keys to symbols, as long as they respond
44
+ # to +to_sym+.
45
+ def symbolize_keys!(hash)
46
+ hash.keys.each do |key|
47
+ hash[(key.to_sym rescue key) || key] = hash.delete(key)
48
+ end
49
+ hash
50
+ end
51
+
52
+ # Rounds the float with the specified precision.
53
+ #
54
+ # x = 1.337
55
+ # x.round # => 1
56
+ # x.round(1) # => 1.3
57
+ # x.round(2) # => 1.34
58
+ def round(float, precision = nil)
59
+ if precision
60
+ magnitude = 10.0 ** precision
61
+ (float * magnitude).round / magnitude
62
+ else
63
+ float.round
64
+ end
65
+ end
66
+
67
+ # Allows for reverse merging two hashes where the keys in the calling hash take precedence over those
68
+ # in the <tt>other_hash</tt>. This is particularly useful for initializing an option hash with default values:
69
+ #
70
+ # def setup(options = {})
71
+ # options.reverse_merge! :size => 25, :velocity => 10
72
+ # end
73
+ #
74
+ # Using <tt>merge</tt>, the above example would look as follows:
75
+ #
76
+ # def setup(options = {})
77
+ # { :size => 25, :velocity => 10 }.merge(options)
78
+ # end
79
+ #
80
+ # The default <tt>:size</tt> and <tt>:velocity</tt> are only set if the +options+ hash passed in doesn't already
81
+ # have the respective key.
82
+ def reverse_merge(hash, other_hash)
83
+ other_hash.merge(hash)
84
+ end
85
+
86
+ # Performs the opposite of <tt>merge</tt>, with the keys and values from the first hash taking precedence over the second.
87
+ # Modifies the receiver in place.
88
+ def reverse_merge!(hash, other_hash)
89
+ hash.merge!( other_hash ){|k,o,n| o }
90
+ end
91
+
92
+ # An object is blank if it's false, empty, or a whitespace string.
93
+ # For example, "", " ", +nil+, [], and {} are blank.
94
+ #
95
+ # This simplifies:
96
+ #
97
+ # if !address.nil? && !address.empty?
98
+ #
99
+ # ...to:
100
+ #
101
+ # if !address.blank?
102
+ def blank?(object)
103
+ case object
104
+ when NilClass, FalseClass
105
+ true
106
+ when TrueClass, Numeric
107
+ false
108
+ when String
109
+ object !~ /\S/
110
+ when Hash, Array
111
+ object.empty?
112
+ when Object
113
+ object.respond_to?(:empty?) ? object.empty? : !object
114
+ end
115
+ end
116
+
117
+ # An object is present if it's not blank.
118
+ def present?(object)
119
+ !blank?(object)
120
+ end
121
+
122
+ # Sets $VERBOSE for the duration of the block and back to its original value afterwards.
123
+ def with_warnings(flag)
124
+ old_verbose, $VERBOSE = $VERBOSE, flag
125
+ yield
126
+ ensure
127
+ $VERBOSE = old_verbose
128
+ end
129
+
130
+ def titleize(string)
131
+ string.gsub!(/_/, ' ')
132
+ string.split(/(\W)/).map(&:capitalize).join
133
+ end
134
+
135
+ def truthy?(value)
136
+ ['1', 1, 't', 'true', true].include?(value)
137
+ end
138
+
139
+ def falsy?(value)
140
+ ['0', 0, 'f', 'false', false].include?(value)
141
+ end
36
142
  end
37
- end
143
+ end
@@ -0,0 +1 @@
1
+ load(File.expand_path(File.join(File.dirname(__FILE__), '../sitemap_generator/tasks.rb')))
data/spec/blueprint.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'machinist/active_record'
2
+ require 'sham'
3
+
4
+ Sham.title { Time.now.to_i }
5
+ Content.blueprint do
6
+ title
7
+ end
8
+
9
+ module Blueprint
10
+ def self.seed
11
+ 14.times do |i|
12
+ content = Content.make(:title => "Link #{i}")
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,12 @@
1
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
+
3
+ SitemapGenerator::Sitemap.create do
4
+ add '/contents', :priority => 0.7, :changefreq => 'daily'
5
+
6
+ # add all individual articles
7
+ (1..10).each do |i|
8
+ add "/content/#{i}"
9
+ end
10
+
11
+ add "/merchant_path", :host => "https://www.example.com"
12
+ end
@@ -0,0 +1,13 @@
1
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
+ SitemapGenerator::Sitemap.yahoo_app_id = false
3
+
4
+ SitemapGenerator::Sitemap.add_links do |sitemap|
5
+ sitemap.add '/contents', :priority => 0.7, :changefreq => 'daily'
6
+
7
+ # add all individual articles
8
+ (1..10).each do |i|
9
+ sitemap.add "/content/#{i}"
10
+ end
11
+
12
+ sitemap.add "/merchant_path", :host => "https://www.example.com"
13
+ end
@@ -0,0 +1,37 @@
1
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
+ SitemapGenerator::Sitemap.create(
3
+ :include_root => true, :include_index => true,
4
+ :filename => :new_sitemaps, :sitemaps_path => 'fr/') do
5
+
6
+ add('/one', :priority => 0.7, :changefreq => 'daily')
7
+
8
+ # Test a new location and filename and sitemaps host
9
+ group(:sitemaps_path => 'en/', :filename => :xxx,
10
+ :sitemaps_host => "http://newhost.com") do
11
+
12
+ add '/two'
13
+ add '/three'
14
+ end
15
+
16
+ # Test a namer
17
+ group(:sitemaps_namer => SitemapGenerator::SitemapNamer.new(:abc, :start => 3)) do
18
+ add '/four'
19
+ add '/five'
20
+ add '/six'
21
+ end
22
+
23
+ add '/seven'
24
+
25
+ # This should be in a file of its own
26
+ group(:sitemaps_host => "http://exceptional.com") do
27
+ add '/eight'
28
+ add '/nine'
29
+ end
30
+
31
+ add '/ten'
32
+
33
+ # This should have no effect. Already added default links.
34
+ group(:include_root => true, :include_index => true) {}
35
+
36
+ add "/merchant_path", :host => "https://www.merchanthost.com"
37
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ describe SitemapGenerator::Application do
4
+ before :all do
5
+ SitemapGenerator::Utilities.with_warnings(nil) do
6
+ Object.const_set(:Rails, Object.new)
7
+ end
8
+ end
9
+
10
+ after :all do
11
+ SitemapGenerator::Utilities.with_warnings(nil) do
12
+ Object.const_set(:Rails, nil)
13
+ end
14
+ end
15
+
16
+ before :each do
17
+ @app = SitemapGenerator::Application.new
18
+ end
19
+
20
+ describe "rails3?" do
21
+ tests = {
22
+ :nil => false,
23
+ '2.3.11' => false,
24
+ '3.0.1' => true,
25
+ '3.0.11' => true
26
+ }
27
+
28
+ it "should identify the rails version correctly" do
29
+ tests.each do |version, result|
30
+ Rails.expects(:version).returns(version)
31
+ @app.rails3?.should == result
32
+ end
33
+ end
34
+ end
35
+
36
+ describe "with Rails" do
37
+ before :each do
38
+ @root = '/test'
39
+ Rails.expects(:root).returns(@root).at_least_once
40
+ end
41
+
42
+ it "should use the Rails.root" do
43
+ @app.root.should be_a(Pathname)
44
+ @app.root.to_s.should == @root
45
+ (@app.root + 'public/').to_s.should == File.join(@root, 'public/')
46
+ end
47
+ end
48
+
49
+ describe "with no Rails" do
50
+ before :each do
51
+ @rails = Rails
52
+ Object.send(:remove_const, :Rails)
53
+ end
54
+
55
+ after :each do
56
+ Object::Rails = @rails
57
+ end
58
+
59
+ it "should not be Rails" do
60
+ @app.rails?.should be_false
61
+ end
62
+
63
+ it "should use the current working directory" do
64
+ @app.root.should be_a(Pathname)
65
+ @app.root.to_s.should == Dir.getwd
66
+ (@app.root + 'public/').to_s.should == File.join(Dir.getwd, 'public/')
67
+ end
68
+ end
69
+ end