sitemap_generator 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.autotest +36 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +131 -0
  4. data/Rakefile +29 -0
  5. data/VERSION +1 -0
  6. data/init.rb +2 -0
  7. data/install.rb +13 -0
  8. data/lib/sitemap_generator.rb +10 -0
  9. data/lib/sitemap_generator/helper.rb +59 -0
  10. data/lib/sitemap_generator/link.rb +19 -0
  11. data/lib/sitemap_generator/link_set.rb +28 -0
  12. data/lib/sitemap_generator/mapper.rb +16 -0
  13. data/lib/sitemap_generator/tasks.rb +1 -0
  14. data/tasks/sitemap_generator_tasks.rake +74 -0
  15. data/templates/sitemap.rb +30 -0
  16. data/templates/sitemap_index.builder +22 -0
  17. data/templates/xml_sitemap.builder +15 -0
  18. data/test/mock_app/.gitignore +3 -0
  19. data/test/mock_app/README +243 -0
  20. data/test/mock_app/Rakefile +10 -0
  21. data/test/mock_app/app/controllers/application_controller.rb +10 -0
  22. data/test/mock_app/app/controllers/contents_controller.rb +85 -0
  23. data/test/mock_app/app/models/content.rb +2 -0
  24. data/test/mock_app/config/boot.rb +110 -0
  25. data/test/mock_app/config/database.yml +5 -0
  26. data/test/mock_app/config/environment.rb +41 -0
  27. data/test/mock_app/config/environments/development.rb +17 -0
  28. data/test/mock_app/config/environments/production.rb +28 -0
  29. data/test/mock_app/config/environments/test.rb +28 -0
  30. data/test/mock_app/config/initializers/backtrace_silencers.rb +7 -0
  31. data/test/mock_app/config/initializers/inflections.rb +10 -0
  32. data/test/mock_app/config/initializers/mime_types.rb +5 -0
  33. data/test/mock_app/config/initializers/new_rails_defaults.rb +19 -0
  34. data/test/mock_app/config/initializers/session_store.rb +15 -0
  35. data/test/mock_app/config/locales/en.yml +5 -0
  36. data/test/mock_app/config/routes.rb +45 -0
  37. data/test/mock_app/config/sitemap.rb +13 -0
  38. data/test/mock_app/db/migrate/20090826121911_create_contents.rb +12 -0
  39. data/test/mock_app/db/schema.rb +20 -0
  40. data/test/mock_app/db/test.sqlite3 +0 -0
  41. data/test/mock_app/public/index.html +275 -0
  42. data/test/mock_app/script/console +3 -0
  43. data/test/sitemap_generator_test.rb +19 -0
  44. data/test/test_helper.rb +11 -0
  45. data/uninstall.rb +4 -0
  46. metadata +117 -0
data/.autotest ADDED
@@ -0,0 +1,36 @@
1
+ class Autotest
2
+ ##
3
+ # Convert a path in a string, s, into a class name, changing
4
+ # underscores to CamelCase, etc.
5
+
6
+ def path_to_classname(s)
7
+ sep = File::SEPARATOR
8
+ f = s.sub(/^test#{sep}/, '').sub(/\.rb$/, '').split(sep)
9
+ f = f.map { |path| path.split(/_|(\d+)/).map { |seg| seg.capitalize }.join }
10
+ f = f.map { |path| path =~ /Test$/ ? path : "#{path}Test" }
11
+ f.join('::')
12
+ end
13
+ end
14
+
15
+ Autotest.add_hook :initialize do |at|
16
+ unless ARGV.empty?
17
+ if ARGV[0] == '-d'
18
+ at.find_directories = ARGV[1..-1].dup
19
+ else
20
+ at.find_directories = []
21
+ at.extra_files = ARGV.dup
22
+ end
23
+ end
24
+
25
+ # doesn't seem to work
26
+ # at.clear_mappings
27
+
28
+ at.add_mapping(/^lib\/.*\.rb$/) do |filename, _|
29
+ possible = File.basename(filename, 'rb').gsub '_', '_?'
30
+ files_matching %r%^test/.*#{possible}_test\.rb$%
31
+ end
32
+
33
+ at.add_mapping(/^test.*\/.*test\.rb$/) do |filename, _|
34
+ filename
35
+ end
36
+ end
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 [name of plugin creator]
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,131 @@
1
+ SitemapGenerator
2
+ ================
3
+
4
+ This plugin enables ['enterprise-class'][enterprise_class] Google Sitemaps to be easily generated for a Rails site as a rake task, using a simple 'Rails Routes'-like DSL.
5
+
6
+ Raison d'être
7
+ -------
8
+
9
+ Most of the Sitemap plugins out there seem to try to recreate the Sitemap links by iterating the Rails routes. In some cases this is possible, but for a great deal of cases it isn't.
10
+
11
+ a) There are probably quite a few routes in your routes file that don't need inclusion in the Sitemap. (AJAX routes I'm looking at you.)
12
+
13
+ and
14
+
15
+ b) How would you infer the correct series of links for the following route?
16
+
17
+ map.zipcode 'location/:state/:city/:zipcode', :controller => 'zipcode', :action => 'index'
18
+
19
+ Don't tell me it's trivial, because it isn't. It just looks trivial.
20
+
21
+ So my idea is to have another file similar to 'routes.rb' called 'sitemap.rb', where you can define what goes into the Sitemap.
22
+
23
+ Here's my solution:
24
+
25
+ Zipcode.find(:all, :include => :city).each do |z|
26
+ sitemap.add zipcode_path(:state => z.city.state, :city => z.city, :zipcode => z)
27
+ end
28
+
29
+ Easy hey?
30
+
31
+ Other Sitemap settings for the link, like `lastmod`, `priority`, `changefreq` and `host` are entered automatically, although you can override them if you need to.
32
+
33
+ Other "difficult" Sitemap issues, solved by this plugin:
34
+
35
+ - Support for more than 50,000 urls (using a Sitemap Index file)
36
+ - Gzip of Sitemap files
37
+ - Variable priority of links
38
+ - Paging/sorting links (e.g. my_list?page=3)
39
+ - SSL host links (e.g. https:)
40
+ - Rails apps which are installed on a sub-path (e.g. example.com/blog_app/)
41
+
42
+ Installation
43
+ =======
44
+
45
+ 1. Install plugin as normal
46
+
47
+ <code>./script/plugin install git://github.com/adamsalter/sitemap_generator-plugin.git</code>
48
+
49
+ 2. Installation should create a 'config/sitemap.rb' file which will contain your logic for generation of the Sitemap files. (If you want to recreate this file manually run `rake sitemap:install`)
50
+
51
+ 3. Run `rake sitemap:refresh` as needed to create Sitemap files. This will also ping all the ['major'][sitemap_engines] search engines. (if you want to disable all non-essential output run the rake task thusly `rake -s sitemap:refresh SILENT=true`)
52
+
53
+ Sitemaps with many urls (100,000+) take quite a long time to generate, so if you need to refresh your Sitemaps regularly you can set the rake task up as a cron job. Most cron agents will only send you an email if there is output from the cron task.
54
+
55
+ 4. Finally, and optionally, add the following to your robots.txt file.
56
+
57
+ <code>Sitemap: &lt;hostname>/sitemap_index.xml.gz</code>
58
+
59
+ The robots.txt Sitemap URL should be the complete URL to the Sitemap Index, such as: `http://www.example.org/sitemap_index.xml.gz`
60
+
61
+ Example 'config/sitemap.rb'
62
+ ==========
63
+
64
+ # Set the host name for URL creation
65
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
66
+
67
+ SitemapGenerator::Sitemap.add_links do |sitemap|
68
+ # Put links creation logic here.
69
+ #
70
+ # The Root Path ('/') and Sitemap Index file are added automatically.
71
+ # Links are added to the Sitemap output in the order they are specified.
72
+ #
73
+ # Usage: sitemap.add path, options
74
+ # (default options are used if you don't specify them)
75
+ #
76
+ # Defaults: :priority => 0.5, :changefreq => 'weekly',
77
+ # :lastmod => Time.now, :host => default_host
78
+
79
+
80
+ # Examples:
81
+
82
+ # add '/articles'
83
+ sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily'
84
+
85
+ # add all individual articles
86
+ Article.find(:all).each do |a|
87
+ sitemap.add article_path(a), :lastmod => a.updated_at
88
+ end
89
+
90
+ # add merchant path
91
+ sitemap.add '/purchase', :priority => 0.7, :host => "https://www.example.com"
92
+
93
+ end
94
+
95
+ Notes
96
+ =======
97
+
98
+ 1) Tested/working on Rails 1.x.x <=> 2.x.x, no guarantees made for Rails 3.0.
99
+
100
+ 2) For large sitemaps it may be useful to split your generation into batches to avoid running out of memory. E.g.:
101
+
102
+ # add movies
103
+ Movie.find_in_batches(:batch_size => 1000) do |movies|
104
+ movies.each do |movie|
105
+ sitemap.add "/movies/show/#{movie.to_param}", :lastmod => movie.updated_at, :changefreq => 'weekly'
106
+ end
107
+ end
108
+
109
+
110
+ Known Bugs
111
+ ========
112
+
113
+ - Sitemaps.org [states][sitemaps_org] that no Sitemap XML file should be more than 10Mb uncompressed. The plugin will warn you about this, but does nothing to avoid it (like move some URLs into a later file).
114
+ - There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
115
+ - Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
116
+
117
+ Follow me on:
118
+ ---------
119
+
120
+ > Twitter: [twitter.com/adamsalter](http://twitter.com/adamsalter)
121
+ > Github: [github.com/adamsalter](http://github.com/adamsalter)
122
+
123
+ Copyright (c) 2009 Adam @ [Codebright.net][cb], released under the MIT license
124
+
125
+ [enterprise_class]:https://twitter.com/dhh/status/1631034662 "I use enterprise in the same sense the Phusion guys do - i.e. Enterprise Ruby. Please don't look down on my use of the word 'enterprise' to represent being a cut above. It doesn't mean you ever have to work for a company the size of IBM. Or constantly fight inertia, writing crappy software, adhering to change management practices and spending hours in meetings... Not that there's anything wrong with that - Wait, what?"
126
+ [sitemap_engines]:http://en.wikipedia.org/wiki/Sitemap_index "http://en.wikipedia.org/wiki/Sitemap_index"
127
+ [sitemaps_org]:http://www.sitemaps.org/protocol.php "http://www.sitemaps.org/protocol.php"
128
+ [sitemaps_xml]:http://www.sitemaps.org/protocol.php#xmlTagDefinitions "XML Tag Definitions"
129
+ [sitemap_generator_usage]:http://wiki.github.com/adamsalter/sitemap_generator-plugin/sitemapgenerator-usage "http://wiki.github.com/adamsalter/sitemap_generator-plugin/sitemapgenerator-usage"
130
+ [boost_juice]:http://www.boostjuice.com.au/ "Mmmm, sweet, sweet Boost Juice."
131
+ [cb]:http://codebright.net "http://codebright.net"
data/Rakefile ADDED
@@ -0,0 +1,29 @@
1
+ require 'rake/testtask'
2
+ require 'find'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "sitemap_generator"
8
+ gem.summary = %Q{This plugin enables 'enterprise-class' Google Sitemaps to be easily generated for a Rails site as a rake task}
9
+ gem.description = %Q{This plugin enables 'enterprise-class' Google Sitemaps to be easily generated for a Rails site as a rake task}
10
+ gem.email = "adam@salter.net "
11
+ gem.homepage = "http://github.com/adamsalter/sitemap_generator-plugin"
12
+ gem.authors = ["Adam Salter"]
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+ end
15
+
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
18
+ end
19
+
20
+ desc 'Default: run unit tests.'
21
+ task :default => :test
22
+
23
+ desc 'Test ActiveScaffold.'
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.pattern = 'test/**/*_test.rb'
27
+ t.verbose = true
28
+ end
29
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/init.rb ADDED
@@ -0,0 +1,2 @@
1
+ # Include hook code here
2
+
data/install.rb ADDED
@@ -0,0 +1,13 @@
1
+ # Install hook code here
2
+
3
+ # Copy sitemap_template.rb to config/sitemap.rb
4
+ require 'fileutils'
5
+ current_dir = File.dirname(__FILE__)
6
+ sitemap_template = File.join(current_dir, 'templates/sitemap.rb')
7
+ new_sitemap = File.join(RAILS_ROOT, 'config/sitemap.rb')
8
+ if File.exist?(new_sitemap)
9
+ puts "already exists: config/sitemap.rb, file not copied"
10
+ else
11
+ puts "created: config/sitemap.rb"
12
+ FileUtils.cp(sitemap_template, new_sitemap)
13
+ end
@@ -0,0 +1,10 @@
1
+ require 'sitemap_generator/mapper'
2
+ require 'sitemap_generator/link'
3
+ require 'sitemap_generator/link_set'
4
+ require 'sitemap_generator/helper'
5
+ require 'sitemap_generator/tasks'
6
+
7
+ module SitemapGenerator
8
+ Sitemap = LinkSet.new
9
+ end
10
+
@@ -0,0 +1,59 @@
1
+ require 'action_controller'
2
+ require 'action_controller/test_process'
3
+ begin
4
+ require 'application_controller'
5
+ rescue LoadError
6
+ # Rails < 2.3
7
+ require 'application'
8
+ end
9
+
10
+ module SitemapGenerator
11
+ module Helper
12
+ def load_sitemap_rb
13
+ controller = ApplicationController.new
14
+ controller.request = ActionController::TestRequest.new
15
+ controller.params = {}
16
+ controller.send(:initialize_current_url)
17
+ b = controller.instance_eval{binding}
18
+ sitemap_mapper_file = File.join(RAILS_ROOT, 'config/sitemap.rb')
19
+ eval(open(sitemap_mapper_file).read, b)
20
+ end
21
+
22
+ def url_with_hostname(path)
23
+ URI.join(Sitemap.default_host, path).to_s
24
+ end
25
+
26
+ def w3c_date(date)
27
+ date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
28
+ end
29
+
30
+ def ping_search_engines(sitemap_index)
31
+ require 'open-uri'
32
+ index_location = CGI.escape(url_with_hostname(sitemap_index))
33
+ # engines list from http://en.wikipedia.org/wiki/Sitemap_index
34
+ yahoo_app_id = SitemapGenerator::Sitemap.yahoo_app_id
35
+ {:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}",
36
+ :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}&appid=#{yahoo_app_id}",
37
+ :ask => "http://submissions.ask.com/ping?sitemap=#{index_location}",
38
+ :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}",
39
+ :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link|
40
+ begin
41
+ unless SitemapGenerator::Sitemap.yahoo_app_id == false
42
+ open(link)
43
+ puts "Successful ping of #{engine.to_s.titleize}" unless ENV['SILENT'].present?
44
+ end
45
+ rescue StandardError => e
46
+ puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}"
47
+ puts <<-END if engine == :yahoo
48
+ Yahoo requires an 'AppID' for more than one ping per "timeframe", you can either:
49
+ - remove yahoo from the ping list (config/sitemap.rb):
50
+ SitemapGenerator::Sitemap.yahoo_app_id = false
51
+ - or add your Yahoo AppID to the generator (config/sitemap.rb):
52
+ SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
53
+ For more information: http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
54
+ END
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,19 @@
1
+
2
+ module SitemapGenerator
3
+ class Link
4
+ class << self
5
+ def generate(path, options = {})
6
+ options.assert_valid_keys(:priority, :changefreq, :lastmod, :host)
7
+ options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host)
8
+ {
9
+ :path => path,
10
+ :priority => options[:priority],
11
+ :changefreq => options[:changefreq],
12
+ :lastmod => options[:lastmod],
13
+ :host => options[:host],
14
+ :loc => URI.join(options[:host], path).to_s
15
+ }
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ module SitemapGenerator
2
+ class LinkSet
3
+ attr_accessor :default_host, :yahoo_app_id, :links
4
+
5
+ def initialize
6
+ @links = []
7
+ end
8
+
9
+ def default_host=(host)
10
+ @default_host = host
11
+ add_default_links
12
+ end
13
+
14
+ def add_default_links
15
+ # Add default links
16
+ @links << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
17
+ @links << Link.generate('/sitemap_index.xml.gz', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
18
+ end
19
+
20
+ def add_links
21
+ yield Mapper.new(self)
22
+ end
23
+
24
+ def add_link(link)
25
+ @links << link
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,16 @@
1
+
2
+ module SitemapGenerator
3
+ # Generator instances are used to build links.
4
+ # The object passed to the add_links block in config/sitemap.rb is a Generator instance.
5
+ class Mapper
6
+ attr_accessor :set
7
+
8
+ def initialize(set)
9
+ @set = set
10
+ end
11
+
12
+ def add(loc, options = {})
13
+ set.add_link Link.generate(loc, options)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1 @@
1
+ load "#{File.dirname(__FILE__)}/../../tasks/sitemap_generator_tasks.rake"
@@ -0,0 +1,74 @@
1
+ require 'zlib'
2
+
3
+ namespace :sitemap do
4
+
5
+ desc "Install a default config/sitemap.rb file"
6
+ task :install do
7
+ load File.expand_path(File.join(File.dirname(__FILE__), "..", "install.rb"))
8
+ end
9
+
10
+ desc "Delete all Sitemap files in public/ directory"
11
+ task :clean do
12
+ sitemap_files = Dir[File.join(RAILS_ROOT, 'public/sitemap*.xml.gz')]
13
+ FileUtils.rm sitemap_files
14
+ end
15
+
16
+ desc "Create Sitemap XML files in public/ directory"
17
+ desc "Create Sitemap XML files in public/ directory (set SILENT=true for no output)"
18
+ task :refresh => ['sitemap:create'] do
19
+ ping_search_engines("sitemap_index.xml.gz")
20
+ end
21
+
22
+ desc "Create Sitemap XML files (don't ping search engines)"
23
+ task 'refresh:no_ping' => ['sitemap:create'] do
24
+ end
25
+
26
+ task :create => [:environment] do
27
+ include SitemapGenerator::Helper
28
+ include ActionView::Helpers::NumberHelper
29
+
30
+ start_time = Time.now
31
+
32
+ # update links from config/sitemap.rb
33
+ load_sitemap_rb
34
+
35
+ raise(ArgumentError, "Default hostname not defined") if SitemapGenerator::Sitemap.default_host.blank?
36
+
37
+ links_grps = SitemapGenerator::Sitemap.links.in_groups_of(50000, false)
38
+ raise(ArgumentError, "TOO MANY LINKS!! I really thought 2,500,000,000 links would be enough for anybody!") if links_grps.length > 50000
39
+
40
+ Rake::Task['sitemap:clean'].invoke
41
+
42
+ # render individual sitemaps
43
+ sitemap_files = []
44
+ xml_sitemap_template = File.join(File.dirname(__FILE__), '../templates/xml_sitemap.builder')
45
+ links_grps.each_with_index do |links, index|
46
+ buffer = ''
47
+ xml = Builder::XmlMarkup.new(:target=>buffer)
48
+ eval(open(xml_sitemap_template).read, binding)
49
+ filename = File.join(RAILS_ROOT, "public/sitemap#{index+1}.xml.gz")
50
+ Zlib::GzipWriter.open(filename) do |gz|
51
+ gz.write buffer
52
+ end
53
+ puts "+ #{filename}" unless ENV['SILENT'].present?
54
+ puts "** Sitemap too big! The uncompressed size exceeds 10Mb" if (buffer.size > 10 * 1024 * 1024) && ENV['SILENT'].blank?
55
+ sitemap_files << filename
56
+ end
57
+
58
+ # render index
59
+ sitemap_index_template = File.join(File.dirname(__FILE__), '../templates/sitemap_index.builder')
60
+ buffer = ''
61
+ xml = Builder::XmlMarkup.new(:target=>buffer)
62
+ eval(open(sitemap_index_template).read, binding)
63
+ filename = File.join(RAILS_ROOT, "public/sitemap_index.xml.gz")
64
+ Zlib::GzipWriter.open(filename) do |gz|
65
+ gz.write buffer
66
+ end
67
+ puts "+ #{filename}" unless ENV['SILENT'].present?
68
+ puts "** Sitemap Index too big! The uncompressed size exceeds 10Mb" if (buffer.size > 10 * 1024 * 1024) && ENV['SILENT'].blank?
69
+
70
+ stop_time = Time.now
71
+ puts "Sitemap stats: #{number_with_delimiter(SitemapGenerator::Sitemap.links.length)} links, " + ("%dm%02ds" % (stop_time - start_time).divmod(60)) unless ENV['SILENT'].present?
72
+
73
+ end
74
+ end