sitemap_generator 2.2.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/Gemfile +9 -24
  2. data/Gemfile.lock +23 -58
  3. data/README.md +56 -75
  4. data/Rakefile +29 -117
  5. data/VERSION +1 -1
  6. data/lib/sitemap_generator.rb +24 -8
  7. data/lib/sitemap_generator/application.rb +31 -4
  8. data/lib/sitemap_generator/builder.rb +0 -6
  9. data/lib/sitemap_generator/builder/sitemap_file.rb +16 -6
  10. data/lib/sitemap_generator/builder/sitemap_index_file.rb +4 -3
  11. data/lib/sitemap_generator/builder/sitemap_index_url.rb +1 -1
  12. data/lib/sitemap_generator/builder/sitemap_url.rb +6 -8
  13. data/lib/sitemap_generator/core_ext.rb +3 -0
  14. data/lib/sitemap_generator/core_ext/big_decimal.rb +45 -0
  15. data/lib/sitemap_generator/core_ext/numeric.rb +48 -0
  16. data/lib/sitemap_generator/helpers/number_helper.rb +237 -0
  17. data/lib/sitemap_generator/interpreter.rb +1 -1
  18. data/lib/sitemap_generator/link_set.rb +39 -18
  19. data/lib/sitemap_generator/railtie.rb +2 -2
  20. data/lib/sitemap_generator/sitemap_namer.rb +1 -1
  21. data/lib/sitemap_generator/tasks.rb +53 -1
  22. data/lib/sitemap_generator/utilities.rb +107 -1
  23. data/lib/tasks/sitemap_generator_tasks.rake +1 -0
  24. data/spec/blueprint.rb +15 -0
  25. data/spec/files/sitemap.create.rb +12 -0
  26. data/spec/files/sitemap.deprecated.rb +13 -0
  27. data/spec/files/sitemap.groups.rb +37 -0
  28. data/spec/sitemap_generator/application_spec.rb +69 -0
  29. data/spec/sitemap_generator/builder/sitemap_file_spec.rb +77 -0
  30. data/spec/sitemap_generator/builder/sitemap_index_file_spec.rb +38 -0
  31. data/spec/sitemap_generator/builder/sitemap_index_url_spec.rb +16 -0
  32. data/spec/sitemap_generator/builder/sitemap_url_spec.rb +152 -0
  33. data/spec/sitemap_generator/core_ext/bigdecimal_spec.rb +20 -0
  34. data/spec/sitemap_generator/core_ext/numeric_spec.rb +43 -0
  35. data/spec/sitemap_generator/geo_sitemap_spec.rb +30 -0
  36. data/spec/sitemap_generator/helpers/number_helper_spec.rb +191 -0
  37. data/spec/sitemap_generator/interpreter_spec.rb +24 -0
  38. data/spec/sitemap_generator/link_set_spec.rb +606 -0
  39. data/spec/sitemap_generator/news_sitemap_spec.rb +42 -0
  40. data/spec/sitemap_generator/sitemap_generator_spec.rb +232 -0
  41. data/spec/sitemap_generator/sitemap_groups_spec.rb +133 -0
  42. data/spec/sitemap_generator/sitemap_location_spec.rb +124 -0
  43. data/spec/sitemap_generator/sitemap_namer_spec.rb +61 -0
  44. data/spec/sitemap_generator/templates_spec.rb +24 -0
  45. data/spec/sitemap_generator/utilities/existence_spec.rb +26 -0
  46. data/spec/sitemap_generator/utilities/hash_spec.rb +57 -0
  47. data/spec/sitemap_generator/utilities/rounding_spec.rb +31 -0
  48. data/spec/sitemap_generator/utilities_spec.rb +50 -0
  49. data/spec/sitemap_generator/video_sitemap_spec.rb +103 -0
  50. data/spec/spec_helper.rb +20 -0
  51. data/spec/support/file_macros.rb +39 -0
  52. data/spec/support/schemas/siteindex.xsd +73 -0
  53. data/spec/support/schemas/sitemap-geo.xsd +41 -0
  54. data/spec/support/schemas/sitemap-news.xsd +159 -0
  55. data/spec/support/schemas/sitemap-video.xsd +409 -0
  56. data/spec/support/schemas/sitemap.xsd +115 -0
  57. data/spec/support/xml_macros.rb +55 -0
  58. metadata +141 -122
  59. data/tasks/sitemap_generator_tasks.rake +0 -43
@@ -21,7 +21,7 @@ module SitemapGenerator
21
21
  #
22
22
  # All other options are passed to the LinkSet by setting them using accessor methods.
23
23
  def initialize(opts={}, &block)
24
- opts.reverse_merge!(:link_set => SitemapGenerator::Sitemap)
24
+ SitemapGenerator::Utilities.reverse_merge!(opts, :link_set => SitemapGenerator::Sitemap)
25
25
  @linkset = opts.delete :link_set
26
26
  @linkset.send(:set_options, opts)
27
27
  eval(&block) if block_given?
@@ -32,11 +32,14 @@ module SitemapGenerator
32
32
  def create(opts={}, &block)
33
33
  reset!
34
34
  set_options(opts)
35
- start_time = Time.now if @verbose
35
+ if verbose
36
+ start_time = Time.now
37
+ puts "In #{sitemap_index.location.public_path}"
38
+ end
36
39
  interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
37
40
  finalize!
38
- end_time = Time.now if @verbose
39
- puts sitemap_index.stats_summary(:time_taken => end_time - start_time) if @verbose
41
+ end_time = Time.now if verbose
42
+ output(sitemap_index.stats_summary(:time_taken => end_time - start_time)) if verbose
40
43
  self
41
44
  end
42
45
 
@@ -96,18 +99,17 @@ module SitemapGenerator
96
99
  # * <tt>:verbose</tt> - If +true+, output a summary line for each sitemap and sitemap
97
100
  # index that is created. Default is +false+.
98
101
  def initialize(options={})
99
- options.reverse_merge!({
102
+ SitemapGenerator::Utilities.reverse_merge!(options,
100
103
  :include_root => true,
101
104
  :include_index => true,
102
105
  :filename => :sitemap,
103
- :verbose => false,
104
106
  :search_engines => {
105
107
  :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=%s",
106
108
  :ask => "http://submissions.ask.com/ping?sitemap=%s",
107
109
  :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=%s",
108
110
  :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=%s"
109
111
  }
110
- })
112
+ )
111
113
  options.each_pair { |k, v| instance_variable_set("@#{k}".to_sym, v) }
112
114
 
113
115
  # If an index is passed in, protect it from modification.
@@ -125,7 +127,7 @@ module SitemapGenerator
125
127
  # host - host for the link, defaults to your <tt>default_host</tt>.
126
128
  def add(link, options={})
127
129
  add_default_links if !@added_default_links
128
- sitemap.add(link, options.reverse_merge!(:host => @default_host))
130
+ sitemap.add(link, SitemapGenerator::Utilities.reverse_merge!(options, :host => @default_host))
129
131
  rescue SitemapGenerator::SitemapFullError
130
132
  finalize_sitemap!
131
133
  retry
@@ -227,22 +229,24 @@ module SitemapGenerator
227
229
  #
228
230
  # SitemapGenerator::Sitemap.ping_search_engines('http://example.com/sitemap_index.xml.gz', :super_engine => 'http://superengine.com/ping?url=%s')
229
231
  def ping_search_engines(*args)
230
- engines = args.last.is_a?(Hash) ? args.pop : {}
231
- index_url = CGI.escape(args.shift || sitemap_index_url)
232
-
232
+ require 'cgi/session'
233
233
  require 'open-uri'
234
234
  require 'timeout'
235
235
 
236
- puts "\n" if verbose
236
+ engines = args.last.is_a?(Hash) ? args.pop : {}
237
+ index_url = CGI.escape(args.shift || sitemap_index_url)
238
+
239
+ output("\n")
237
240
  search_engines.merge(engines).each do |engine, link|
238
241
  link = link % index_url
242
+ name = Utilities.titleize(engine.to_s)
239
243
  begin
240
244
  Timeout::timeout(10) {
241
245
  open(link)
242
246
  }
243
- puts "Successful ping of #{engine.to_s.titleize}" if verbose
247
+ output("Successful ping of #{name}")
244
248
  rescue Timeout::Error, StandardError => e
245
- puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
249
+ output("Ping failed for #{name}: #{e.inspect} (URL #{link})")
246
250
  end
247
251
  end
248
252
  end
@@ -296,6 +300,16 @@ module SitemapGenerator
296
300
  !!@include_root
297
301
  end
298
302
 
303
+ # Set verbose on the instance or by setting ENV['VERBOSE'] to true or false.
304
+ # By default verbose is true. When running rake tasks, pass the <tt>-s</tt>
305
+ # option to rake to turn verbose off.
306
+ def verbose
307
+ if @verbose.nil?
308
+ @verbose = SitemapGenerator.verbose.nil? ? true : SitemapGenerator.verbose
309
+ end
310
+ @verbose
311
+ end
312
+
299
313
  protected
300
314
 
301
315
  # Set each option on this instance using accessor methods. This will affect
@@ -319,7 +333,7 @@ module SitemapGenerator
319
333
  # change the public path.
320
334
  def options_for_group(opts)
321
335
  opts.delete(:public_path)
322
- opts.reverse_merge!(
336
+ SitemapGenerator::Utilities.reverse_merge!(opts,
323
337
  :include_index => false,
324
338
  :include_root => false,
325
339
  :sitemap_index => sitemap_index
@@ -340,7 +354,7 @@ module SitemapGenerator
340
354
  end
341
355
  hash
342
356
  end
343
- opts.reverse_merge!(current_settings)
357
+ SitemapGenerator::Utilities.reverse_merge!(opts, current_settings)
344
358
  opts
345
359
  end
346
360
 
@@ -371,7 +385,7 @@ module SitemapGenerator
371
385
  add_default_links if !@added_default_links && !@created_group
372
386
  return if sitemap.finalized? || sitemap.empty? && @created_group
373
387
  sitemap_index.add(sitemap)
374
- puts sitemap.summary if verbose
388
+ output(sitemap.summary)
375
389
  end
376
390
 
377
391
  # Finalize a sitemap index and output a summary line. Do nothing if it has already
@@ -379,7 +393,7 @@ module SitemapGenerator
379
393
  def finalize_sitemap_index!
380
394
  return if @protect_index || sitemap_index.finalized?
381
395
  sitemap_index.finalize!
382
- puts sitemap_index.summary if verbose
396
+ output(sitemap_index.summary)
383
397
  end
384
398
 
385
399
  # Return the interpreter linked to this instance.
@@ -397,6 +411,13 @@ module SitemapGenerator
397
411
  @added_default_links = false
398
412
  end
399
413
 
414
+ # Write the given string to STDOUT. Used so that the sitemap config can be
415
+ # evaluated and some info output to STDOUT in a lazy fasion.
416
+ def output(string)
417
+ return unless verbose
418
+ puts string
419
+ end
420
+
400
421
  module LocationHelpers
401
422
  public
402
423
 
@@ -522,7 +543,7 @@ module SitemapGenerator
522
543
  # Update the given attribute on the current sitemap index and sitemap file location objects.
523
544
  # But don't create the index or sitemap files yet if they are not already created.
524
545
  def update_location_info(attribute, value, opts={})
525
- opts.reverse_merge!(:include_index => !@protect_index)
546
+ SitemapGenerator::Utilities.reverse_merge!(opts, :include_index => !@protect_index)
526
547
  @sitemap_index.location[attribute] = value if opts[:include_index] && @sitemap_index && !@sitemap_index.finalized?
527
548
  @sitemap.location[attribute] = value if @sitemap && !@sitemap.finalized?
528
549
  end
@@ -1,7 +1,7 @@
1
1
  module SitemapGenerator
2
2
  class Railtie < Rails::Railtie
3
3
  rake_tasks do
4
- load File.expand_path('../../../tasks/sitemap_generator_tasks.rake', __FILE__)
4
+ require File.expand_path('../tasks', __FILE__)
5
5
  end
6
6
  end
7
- end
7
+ end
@@ -15,7 +15,7 @@ module SitemapGenerator
15
15
  # :extension - Default: '.xml.gz'. File extension to append.
16
16
  # :start - Default: 1. Index at which to start counting.
17
17
  def initialize(base, options={});
18
- @options = options.reverse_merge(
18
+ @options = SitemapGenerator::Utilities.reverse_merge(options,
19
19
  :extension => '.xml.gz',
20
20
  :start => 1
21
21
  )
@@ -1 +1,53 @@
1
- load File.expand_path(File.join(File.dirname(__FILE__), '../../tasks/sitemap_generator_tasks.rake'))
1
+ # require this file to load the tasks
2
+ require 'rake'
3
+
4
+ # Require sitemap_generator at runtime. If we don't do this the ActionView helpers are included
5
+ # before the Rails environment can be loaded by other Rake tasks, which causes problems
6
+ # for those tasks when rendering using ActionView.
7
+ namespace :sitemap do
8
+ # Require sitemap_generator only. When installed as a plugin the require will fail, so in
9
+ # that case, load the environment first.
10
+ task :require do
11
+ begin
12
+ require 'sitemap_generator'
13
+ rescue LoadError => e
14
+ if defined?(Rails)
15
+ Rake::Task['sitemap:require_environment'].invoke
16
+ else
17
+ raise e
18
+ end
19
+ end
20
+ end
21
+
22
+ # Require sitemap_generator after loading the Rails environment. We still need the require
23
+ # in case we are installed as a gem and are setup to not automatically be required.
24
+ task :require_environment do
25
+ if defined?(Rails)
26
+ Rake::Task['environment'].invoke
27
+ end
28
+ require 'sitemap_generator'
29
+ end
30
+
31
+ desc "Install a default config/sitemap.rb file"
32
+ task :install => ['sitemap:require'] do
33
+ SitemapGenerator::Utilities.install_sitemap_rb(verbose)
34
+ end
35
+
36
+ desc "Delete all Sitemap files in public/ directory"
37
+ task :clean => ['sitemap:require'] do
38
+ SitemapGenerator::Utilities.clean_files
39
+ end
40
+
41
+ desc "Generate sitemaps and ping search engines."
42
+ task :refresh => ['sitemap:create'] do
43
+ SitemapGenerator::Sitemap.ping_search_engines
44
+ end
45
+
46
+ desc "Generate sitemaps but don't ping search engines."
47
+ task 'refresh:no_ping' => ['sitemap:create']
48
+
49
+ desc "Generate sitemaps but don't ping search engines. Alias for refresh:no_ping."
50
+ task :create => ['sitemap:require_environment'] do
51
+ SitemapGenerator::Interpreter.run(:config_file => ENV["CONFIG_FILE"], :verbose => verbose)
52
+ end
53
+ end
@@ -33,5 +33,111 @@ module SitemapGenerator
33
33
  unknown_keys = hash.keys - [valid_keys].flatten
34
34
  raise(ArgumentError, "Unknown key(s): #{unknown_keys.join(", ")}") unless unknown_keys.empty?
35
35
  end
36
+
37
+ # Return a new hash with all keys converted to symbols, as long as
38
+ # they respond to +to_sym+.
39
+ def symbolize_keys(hash)
40
+ symbolize_keys!(hash.dup)
41
+ end
42
+
43
+ # Destructively convert all keys to symbols, as long as they respond
44
+ # to +to_sym+.
45
+ def symbolize_keys!(hash)
46
+ hash.keys.each do |key|
47
+ hash[(key.to_sym rescue key) || key] = hash.delete(key)
48
+ end
49
+ hash
50
+ end
51
+
52
+ # Rounds the float with the specified precision.
53
+ #
54
+ # x = 1.337
55
+ # x.round # => 1
56
+ # x.round(1) # => 1.3
57
+ # x.round(2) # => 1.34
58
+ def round(float, precision = nil)
59
+ if precision
60
+ magnitude = 10.0 ** precision
61
+ (float * magnitude).round / magnitude
62
+ else
63
+ float.round
64
+ end
65
+ end
66
+
67
+ # Allows for reverse merging two hashes where the keys in the calling hash take precedence over those
68
+ # in the <tt>other_hash</tt>. This is particularly useful for initializing an option hash with default values:
69
+ #
70
+ # def setup(options = {})
71
+ # options.reverse_merge! :size => 25, :velocity => 10
72
+ # end
73
+ #
74
+ # Using <tt>merge</tt>, the above example would look as follows:
75
+ #
76
+ # def setup(options = {})
77
+ # { :size => 25, :velocity => 10 }.merge(options)
78
+ # end
79
+ #
80
+ # The default <tt>:size</tt> and <tt>:velocity</tt> are only set if the +options+ hash passed in doesn't already
81
+ # have the respective key.
82
+ def reverse_merge(hash, other_hash)
83
+ other_hash.merge(hash)
84
+ end
85
+
86
+ # Performs the opposite of <tt>merge</tt>, with the keys and values from the first hash taking precedence over the second.
87
+ # Modifies the receiver in place.
88
+ def reverse_merge!(hash, other_hash)
89
+ hash.merge!( other_hash ){|k,o,n| o }
90
+ end
91
+
92
+ # An object is blank if it's false, empty, or a whitespace string.
93
+ # For example, "", " ", +nil+, [], and {} are blank.
94
+ #
95
+ # This simplifies:
96
+ #
97
+ # if !address.nil? && !address.empty?
98
+ #
99
+ # ...to:
100
+ #
101
+ # if !address.blank?
102
+ def blank?(object)
103
+ case object
104
+ when NilClass, FalseClass
105
+ true
106
+ when TrueClass, Numeric
107
+ false
108
+ when String
109
+ object !~ /\S/
110
+ when Hash, Array
111
+ object.empty?
112
+ when Object
113
+ object.respond_to?(:empty?) ? object.empty? : !object
114
+ end
115
+ end
116
+
117
+ # An object is present if it's not blank.
118
+ def present?(object)
119
+ !blank?(object)
120
+ end
121
+
122
+ # Sets $VERBOSE for the duration of the block and back to its original value afterwards.
123
+ def with_warnings(flag)
124
+ old_verbose, $VERBOSE = $VERBOSE, flag
125
+ yield
126
+ ensure
127
+ $VERBOSE = old_verbose
128
+ end
129
+
130
+ def titleize(string)
131
+ string.gsub!(/_/, ' ')
132
+ string.split(/(\W)/).map(&:capitalize).join
133
+ end
134
+
135
+ def truthy?(value)
136
+ ['1', 1, 't', 'true', true].include?(value)
137
+ end
138
+
139
+ def falsy?(value)
140
+ ['0', 0, 'f', 'false', false].include?(value)
141
+ end
36
142
  end
37
- end
143
+ end
@@ -0,0 +1 @@
1
+ load(File.expand_path(File.join(File.dirname(__FILE__), '../sitemap_generator/tasks.rb')))
data/spec/blueprint.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'machinist/active_record'
2
+ require 'sham'
3
+
4
+ Sham.title { Time.now.to_i }
5
+ Content.blueprint do
6
+ title
7
+ end
8
+
9
+ module Blueprint
10
+ def self.seed
11
+ 14.times do |i|
12
+ content = Content.make(:title => "Link #{i}")
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,12 @@
1
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
+
3
+ SitemapGenerator::Sitemap.create do
4
+ add '/contents', :priority => 0.7, :changefreq => 'daily'
5
+
6
+ # add all individual articles
7
+ (1..10).each do |i|
8
+ add "/content/#{i}"
9
+ end
10
+
11
+ add "/merchant_path", :host => "https://www.example.com"
12
+ end
@@ -0,0 +1,13 @@
1
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
+ SitemapGenerator::Sitemap.yahoo_app_id = false
3
+
4
+ SitemapGenerator::Sitemap.add_links do |sitemap|
5
+ sitemap.add '/contents', :priority => 0.7, :changefreq => 'daily'
6
+
7
+ # add all individual articles
8
+ (1..10).each do |i|
9
+ sitemap.add "/content/#{i}"
10
+ end
11
+
12
+ sitemap.add "/merchant_path", :host => "https://www.example.com"
13
+ end
@@ -0,0 +1,37 @@
1
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
2
+ SitemapGenerator::Sitemap.create(
3
+ :include_root => true, :include_index => true,
4
+ :filename => :new_sitemaps, :sitemaps_path => 'fr/') do
5
+
6
+ add('/one', :priority => 0.7, :changefreq => 'daily')
7
+
8
+ # Test a new location and filename and sitemaps host
9
+ group(:sitemaps_path => 'en/', :filename => :xxx,
10
+ :sitemaps_host => "http://newhost.com") do
11
+
12
+ add '/two'
13
+ add '/three'
14
+ end
15
+
16
+ # Test a namer
17
+ group(:sitemaps_namer => SitemapGenerator::SitemapNamer.new(:abc, :start => 3)) do
18
+ add '/four'
19
+ add '/five'
20
+ add '/six'
21
+ end
22
+
23
+ add '/seven'
24
+
25
+ # This should be in a file of its own
26
+ group(:sitemaps_host => "http://exceptional.com") do
27
+ add '/eight'
28
+ add '/nine'
29
+ end
30
+
31
+ add '/ten'
32
+
33
+ # This should have no effect. Already added default links.
34
+ group(:include_root => true, :include_index => true) {}
35
+
36
+ add "/merchant_path", :host => "https://www.merchanthost.com"
37
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ describe SitemapGenerator::Application do
4
+ before :all do
5
+ SitemapGenerator::Utilities.with_warnings(nil) do
6
+ Object.const_set(:Rails, Object.new)
7
+ end
8
+ end
9
+
10
+ after :all do
11
+ SitemapGenerator::Utilities.with_warnings(nil) do
12
+ Object.const_set(:Rails, nil)
13
+ end
14
+ end
15
+
16
+ before :each do
17
+ @app = SitemapGenerator::Application.new
18
+ end
19
+
20
+ describe "rails3?" do
21
+ tests = {
22
+ :nil => false,
23
+ '2.3.11' => false,
24
+ '3.0.1' => true,
25
+ '3.0.11' => true
26
+ }
27
+
28
+ it "should identify the rails version correctly" do
29
+ tests.each do |version, result|
30
+ Rails.expects(:version).returns(version)
31
+ @app.rails3?.should == result
32
+ end
33
+ end
34
+ end
35
+
36
+ describe "with Rails" do
37
+ before :each do
38
+ @root = '/test'
39
+ Rails.expects(:root).returns(@root).at_least_once
40
+ end
41
+
42
+ it "should use the Rails.root" do
43
+ @app.root.should be_a(Pathname)
44
+ @app.root.to_s.should == @root
45
+ (@app.root + 'public/').to_s.should == File.join(@root, 'public/')
46
+ end
47
+ end
48
+
49
+ describe "with no Rails" do
50
+ before :each do
51
+ @rails = Rails
52
+ Object.send(:remove_const, :Rails)
53
+ end
54
+
55
+ after :each do
56
+ Object::Rails = @rails
57
+ end
58
+
59
+ it "should not be Rails" do
60
+ @app.rails?.should be_false
61
+ end
62
+
63
+ it "should use the current working directory" do
64
+ @app.root.should be_a(Pathname)
65
+ @app.root.to_s.should == Dir.getwd
66
+ (@app.root + 'public/').to_s.should == File.join(Dir.getwd, 'public/')
67
+ end
68
+ end
69
+ end