sitemap_generator 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.autotest +36 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +131 -0
  4. data/Rakefile +29 -0
  5. data/VERSION +1 -0
  6. data/init.rb +2 -0
  7. data/install.rb +13 -0
  8. data/lib/sitemap_generator.rb +10 -0
  9. data/lib/sitemap_generator/helper.rb +59 -0
  10. data/lib/sitemap_generator/link.rb +19 -0
  11. data/lib/sitemap_generator/link_set.rb +28 -0
  12. data/lib/sitemap_generator/mapper.rb +16 -0
  13. data/lib/sitemap_generator/tasks.rb +1 -0
  14. data/tasks/sitemap_generator_tasks.rake +74 -0
  15. data/templates/sitemap.rb +30 -0
  16. data/templates/sitemap_index.builder +22 -0
  17. data/templates/xml_sitemap.builder +15 -0
  18. data/test/mock_app/.gitignore +3 -0
  19. data/test/mock_app/README +243 -0
  20. data/test/mock_app/Rakefile +10 -0
  21. data/test/mock_app/app/controllers/application_controller.rb +10 -0
  22. data/test/mock_app/app/controllers/contents_controller.rb +85 -0
  23. data/test/mock_app/app/models/content.rb +2 -0
  24. data/test/mock_app/config/boot.rb +110 -0
  25. data/test/mock_app/config/database.yml +5 -0
  26. data/test/mock_app/config/environment.rb +41 -0
  27. data/test/mock_app/config/environments/development.rb +17 -0
  28. data/test/mock_app/config/environments/production.rb +28 -0
  29. data/test/mock_app/config/environments/test.rb +28 -0
  30. data/test/mock_app/config/initializers/backtrace_silencers.rb +7 -0
  31. data/test/mock_app/config/initializers/inflections.rb +10 -0
  32. data/test/mock_app/config/initializers/mime_types.rb +5 -0
  33. data/test/mock_app/config/initializers/new_rails_defaults.rb +19 -0
  34. data/test/mock_app/config/initializers/session_store.rb +15 -0
  35. data/test/mock_app/config/locales/en.yml +5 -0
  36. data/test/mock_app/config/routes.rb +45 -0
  37. data/test/mock_app/config/sitemap.rb +13 -0
  38. data/test/mock_app/db/migrate/20090826121911_create_contents.rb +12 -0
  39. data/test/mock_app/db/schema.rb +20 -0
  40. data/test/mock_app/db/test.sqlite3 +0 -0
  41. data/test/mock_app/public/index.html +275 -0
  42. data/test/mock_app/script/console +3 -0
  43. data/test/sitemap_generator_test.rb +19 -0
  44. data/test/test_helper.rb +11 -0
  45. data/uninstall.rb +4 -0
  46. metadata +117 -0
data/.autotest ADDED
@@ -0,0 +1,36 @@
1
+ class Autotest
2
+ ##
3
+ # Convert a path in a string, s, into a class name, changing
4
+ # underscores to CamelCase, etc.
5
+
6
+ def path_to_classname(s)
7
+ sep = File::SEPARATOR
8
+ f = s.sub(/^test#{sep}/, '').sub(/\.rb$/, '').split(sep)
9
+ f = f.map { |path| path.split(/_|(\d+)/).map { |seg| seg.capitalize }.join }
10
+ f = f.map { |path| path =~ /Test$/ ? path : "#{path}Test" }
11
+ f.join('::')
12
+ end
13
+ end
14
+
15
+ Autotest.add_hook :initialize do |at|
16
+ unless ARGV.empty?
17
+ if ARGV[0] == '-d'
18
+ at.find_directories = ARGV[1..-1].dup
19
+ else
20
+ at.find_directories = []
21
+ at.extra_files = ARGV.dup
22
+ end
23
+ end
24
+
25
+ # doesn't seem to work
26
+ # at.clear_mappings
27
+
28
+ at.add_mapping(/^lib\/.*\.rb$/) do |filename, _|
29
+ possible = File.basename(filename, 'rb').gsub '_', '_?'
30
+ files_matching %r%^test/.*#{possible}_test\.rb$%
31
+ end
32
+
33
+ at.add_mapping(/^test.*\/.*test\.rb$/) do |filename, _|
34
+ filename
35
+ end
36
+ end
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 [name of plugin creator]
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,131 @@
1
+ SitemapGenerator
2
+ ================
3
+
4
+ This plugin enables ['enterprise-class'][enterprise_class] Google Sitemaps to be easily generated for a Rails site as a rake task, using a simple 'Rails Routes'-like DSL.
5
+
6
+ Raison d'être
7
+ -------
8
+
9
+ Most of the Sitemap plugins out there seem to try to recreate the Sitemap links by iterating the Rails routes. In some cases this is possible, but for a great deal of cases it isn't.
10
+
11
+ a) There are probably quite a few routes in your routes file that don't need inclusion in the Sitemap. (AJAX routes I'm looking at you.)
12
+
13
+ and
14
+
15
+ b) How would you infer the correct series of links for the following route?
16
+
17
+ map.zipcode 'location/:state/:city/:zipcode', :controller => 'zipcode', :action => 'index'
18
+
19
+ Don't tell me it's trivial, because it isn't. It just looks trivial.
20
+
21
+ So my idea is to have another file similar to 'routes.rb' called 'sitemap.rb', where you can define what goes into the Sitemap.
22
+
23
+ Here's my solution:
24
+
25
+ Zipcode.find(:all, :include => :city).each do |z|
26
+ sitemap.add zipcode_path(:state => z.city.state, :city => z.city, :zipcode => z)
27
+ end
28
+
29
+ Easy hey?
30
+
31
+ Other Sitemap settings for the link, like `lastmod`, `priority`, `changefreq` and `host` are entered automatically, although you can override them if you need to.
32
+
33
+ Other "difficult" Sitemap issues, solved by this plugin:
34
+
35
+ - Support for more than 50,000 urls (using a Sitemap Index file)
36
+ - Gzip of Sitemap files
37
+ - Variable priority of links
38
+ - Paging/sorting links (e.g. my_list?page=3)
39
+ - SSL host links (e.g. https:)
40
+ - Rails apps which are installed on a sub-path (e.g. example.com/blog_app/)
41
+
42
+ Installation
43
+ =======
44
+
45
+ 1. Install plugin as normal
46
+
47
+ <code>./script/plugin install git://github.com/adamsalter/sitemap_generator-plugin.git</code>
48
+
49
+ 2. Installation should create a 'config/sitemap.rb' file which will contain your logic for generation of the Sitemap files. (If you want to recreate this file manually run `rake sitemap:install`)
50
+
51
+ 3. Run `rake sitemap:refresh` as needed to create Sitemap files. This will also ping all the ['major'][sitemap_engines] search engines. (if you want to disable all non-essential output run the rake task thusly `rake -s sitemap:refresh SILENT=true`)
52
+
53
+ Sitemaps with many urls (100,000+) take quite a long time to generate, so if you need to refresh your Sitemaps regularly you can set the rake task up as a cron job. Most cron agents will only send you an email if there is output from the cron task.
54
+
55
+ 4. Finally, and optionally, add the following to your robots.txt file.
56
+
57
+ <code>Sitemap: &lt;hostname>/sitemap_index.xml.gz</code>
58
+
59
+ The robots.txt Sitemap URL should be the complete URL to the Sitemap Index, such as: `http://www.example.org/sitemap_index.xml.gz`
60
+
61
+ Example 'config/sitemap.rb'
62
+ ==========
63
+
64
+ # Set the host name for URL creation
65
+ SitemapGenerator::Sitemap.default_host = "http://www.example.com"
66
+
67
+ SitemapGenerator::Sitemap.add_links do |sitemap|
68
+ # Put links creation logic here.
69
+ #
70
+ # The Root Path ('/') and Sitemap Index file are added automatically.
71
+ # Links are added to the Sitemap output in the order they are specified.
72
+ #
73
+ # Usage: sitemap.add path, options
74
+ # (default options are used if you don't specify them)
75
+ #
76
+ # Defaults: :priority => 0.5, :changefreq => 'weekly',
77
+ # :lastmod => Time.now, :host => default_host
78
+
79
+
80
+ # Examples:
81
+
82
+ # add '/articles'
83
+ sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily'
84
+
85
+ # add all individual articles
86
+ Article.find(:all).each do |a|
87
+ sitemap.add article_path(a), :lastmod => a.updated_at
88
+ end
89
+
90
+ # add merchant path
91
+ sitemap.add '/purchase', :priority => 0.7, :host => "https://www.example.com"
92
+
93
+ end
94
+
95
+ Notes
96
+ =======
97
+
98
+ 1) Tested/working on Rails 1.x.x <=> 2.x.x, no guarantees made for Rails 3.0.
99
+
100
+ 2) For large sitemaps it may be useful to split your generation into batches to avoid running out of memory. E.g.:
101
+
102
+ # add movies
103
+ Movie.find_in_batches(:batch_size => 1000) do |movies|
104
+ movies.each do |movie|
105
+ sitemap.add "/movies/show/#{movie.to_param}", :lastmod => movie.updated_at, :changefreq => 'weekly'
106
+ end
107
+ end
108
+
109
+
110
+ Known Bugs
111
+ ========
112
+
113
+ - Sitemaps.org [states][sitemaps_org] that no Sitemap XML file should be more than 10Mb uncompressed. The plugin will warn you about this, but does nothing to avoid it (like move some URLs into a later file).
114
+ - There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
115
+ - Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
116
+
117
+ Follow me on:
118
+ ---------
119
+
120
+ > Twitter: [twitter.com/adamsalter](http://twitter.com/adamsalter)
121
+ > Github: [github.com/adamsalter](http://github.com/adamsalter)
122
+
123
+ Copyright (c) 2009 Adam @ [Codebright.net][cb], released under the MIT license
124
+
125
+ [enterprise_class]:https://twitter.com/dhh/status/1631034662 "I use enterprise in the same sense the Phusion guys do - i.e. Enterprise Ruby. Please don't look down on my use of the word 'enterprise' to represent being a cut above. It doesn't mean you ever have to work for a company the size of IBM. Or constantly fight inertia, writing crappy software, adhering to change management practices and spending hours in meetings... Not that there's anything wrong with that - Wait, what?"
126
+ [sitemap_engines]:http://en.wikipedia.org/wiki/Sitemap_index "http://en.wikipedia.org/wiki/Sitemap_index"
127
+ [sitemaps_org]:http://www.sitemaps.org/protocol.php "http://www.sitemaps.org/protocol.php"
128
+ [sitemaps_xml]:http://www.sitemaps.org/protocol.php#xmlTagDefinitions "XML Tag Definitions"
129
+ [sitemap_generator_usage]:http://wiki.github.com/adamsalter/sitemap_generator-plugin/sitemapgenerator-usage "http://wiki.github.com/adamsalter/sitemap_generator-plugin/sitemapgenerator-usage"
130
+ [boost_juice]:http://www.boostjuice.com.au/ "Mmmm, sweet, sweet Boost Juice."
131
+ [cb]:http://codebright.net "http://codebright.net"
data/Rakefile ADDED
@@ -0,0 +1,29 @@
1
+ require 'rake/testtask'
2
+ require 'find'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "sitemap_generator"
8
+ gem.summary = %Q{This plugin enables 'enterprise-class' Google Sitemaps to be easily generated for a Rails site as a rake task}
9
+ gem.description = %Q{This plugin enables 'enterprise-class' Google Sitemaps to be easily generated for a Rails site as a rake task}
10
+ gem.email = "adam@salter.net "
11
+ gem.homepage = "http://github.com/adamsalter/sitemap_generator-plugin"
12
+ gem.authors = ["Adam Salter"]
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+ end
15
+
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
18
+ end
19
+
20
+ desc 'Default: run unit tests.'
21
+ task :default => :test
22
+
23
+ desc 'Test ActiveScaffold.'
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.pattern = 'test/**/*_test.rb'
27
+ t.verbose = true
28
+ end
29
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/init.rb ADDED
@@ -0,0 +1,2 @@
1
+ # Include hook code here
2
+
data/install.rb ADDED
@@ -0,0 +1,13 @@
1
+ # Install hook code here
2
+
3
+ # Copy sitemap_template.rb to config/sitemap.rb
4
+ require 'fileutils'
5
+ current_dir = File.dirname(__FILE__)
6
+ sitemap_template = File.join(current_dir, 'templates/sitemap.rb')
7
+ new_sitemap = File.join(RAILS_ROOT, 'config/sitemap.rb')
8
+ if File.exist?(new_sitemap)
9
+ puts "already exists: config/sitemap.rb, file not copied"
10
+ else
11
+ puts "created: config/sitemap.rb"
12
+ FileUtils.cp(sitemap_template, new_sitemap)
13
+ end
@@ -0,0 +1,10 @@
1
+ require 'sitemap_generator/mapper'
2
+ require 'sitemap_generator/link'
3
+ require 'sitemap_generator/link_set'
4
+ require 'sitemap_generator/helper'
5
+ require 'sitemap_generator/tasks'
6
+
7
+ module SitemapGenerator
8
+ Sitemap = LinkSet.new
9
+ end
10
+
@@ -0,0 +1,59 @@
1
+ require 'action_controller'
2
+ require 'action_controller/test_process'
3
+ begin
4
+ require 'application_controller'
5
+ rescue LoadError
6
+ # Rails < 2.3
7
+ require 'application'
8
+ end
9
+
10
+ module SitemapGenerator
11
+ module Helper
12
+ def load_sitemap_rb
13
+ controller = ApplicationController.new
14
+ controller.request = ActionController::TestRequest.new
15
+ controller.params = {}
16
+ controller.send(:initialize_current_url)
17
+ b = controller.instance_eval{binding}
18
+ sitemap_mapper_file = File.join(RAILS_ROOT, 'config/sitemap.rb')
19
+ eval(open(sitemap_mapper_file).read, b)
20
+ end
21
+
22
+ def url_with_hostname(path)
23
+ URI.join(Sitemap.default_host, path).to_s
24
+ end
25
+
26
+ def w3c_date(date)
27
+ date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
28
+ end
29
+
30
+ def ping_search_engines(sitemap_index)
31
+ require 'open-uri'
32
+ index_location = CGI.escape(url_with_hostname(sitemap_index))
33
+ # engines list from http://en.wikipedia.org/wiki/Sitemap_index
34
+ yahoo_app_id = SitemapGenerator::Sitemap.yahoo_app_id
35
+ {:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}",
36
+ :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}&appid=#{yahoo_app_id}",
37
+ :ask => "http://submissions.ask.com/ping?sitemap=#{index_location}",
38
+ :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}",
39
+ :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link|
40
+ begin
41
+ unless SitemapGenerator::Sitemap.yahoo_app_id == false
42
+ open(link)
43
+ puts "Successful ping of #{engine.to_s.titleize}" unless ENV['SILENT'].present?
44
+ end
45
+ rescue StandardError => e
46
+ puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}"
47
+ puts <<-END if engine == :yahoo
48
+ Yahoo requires an 'AppID' for more than one ping per "timeframe", you can either:
49
+ - remove yahoo from the ping list (config/sitemap.rb):
50
+ SitemapGenerator::Sitemap.yahoo_app_id = false
51
+ - or add your Yahoo AppID to the generator (config/sitemap.rb):
52
+ SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
53
+ For more information: http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
54
+ END
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,19 @@
1
+
2
+ module SitemapGenerator
3
+ class Link
4
+ class << self
5
+ def generate(path, options = {})
6
+ options.assert_valid_keys(:priority, :changefreq, :lastmod, :host)
7
+ options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host)
8
+ {
9
+ :path => path,
10
+ :priority => options[:priority],
11
+ :changefreq => options[:changefreq],
12
+ :lastmod => options[:lastmod],
13
+ :host => options[:host],
14
+ :loc => URI.join(options[:host], path).to_s
15
+ }
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ module SitemapGenerator
2
+ class LinkSet
3
+ attr_accessor :default_host, :yahoo_app_id, :links
4
+
5
+ def initialize
6
+ @links = []
7
+ end
8
+
9
+ def default_host=(host)
10
+ @default_host = host
11
+ add_default_links
12
+ end
13
+
14
+ def add_default_links
15
+ # Add default links
16
+ @links << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
17
+ @links << Link.generate('/sitemap_index.xml.gz', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
18
+ end
19
+
20
+ def add_links
21
+ yield Mapper.new(self)
22
+ end
23
+
24
+ def add_link(link)
25
+ @links << link
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,16 @@
1
+
2
+ module SitemapGenerator
3
+ # Generator instances are used to build links.
4
+ # The object passed to the add_links block in config/sitemap.rb is a Generator instance.
5
+ class Mapper
6
+ attr_accessor :set
7
+
8
+ def initialize(set)
9
+ @set = set
10
+ end
11
+
12
+ def add(loc, options = {})
13
+ set.add_link Link.generate(loc, options)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1 @@
1
+ load "#{File.dirname(__FILE__)}/../../tasks/sitemap_generator_tasks.rake"
@@ -0,0 +1,74 @@
1
+ require 'zlib'
2
+
3
+ namespace :sitemap do
4
+
5
+ desc "Install a default config/sitemap.rb file"
6
+ task :install do
7
+ load File.expand_path(File.join(File.dirname(__FILE__), "..", "install.rb"))
8
+ end
9
+
10
+ desc "Delete all Sitemap files in public/ directory"
11
+ task :clean do
12
+ sitemap_files = Dir[File.join(RAILS_ROOT, 'public/sitemap*.xml.gz')]
13
+ FileUtils.rm sitemap_files
14
+ end
15
+
16
+ desc "Create Sitemap XML files in public/ directory"
17
+ desc "Create Sitemap XML files in public/ directory (set SILENT=true for no output)"
18
+ task :refresh => ['sitemap:create'] do
19
+ ping_search_engines("sitemap_index.xml.gz")
20
+ end
21
+
22
+ desc "Create Sitemap XML files (don't ping search engines)"
23
+ task 'refresh:no_ping' => ['sitemap:create'] do
24
+ end
25
+
26
+ task :create => [:environment] do
27
+ include SitemapGenerator::Helper
28
+ include ActionView::Helpers::NumberHelper
29
+
30
+ start_time = Time.now
31
+
32
+ # update links from config/sitemap.rb
33
+ load_sitemap_rb
34
+
35
+ raise(ArgumentError, "Default hostname not defined") if SitemapGenerator::Sitemap.default_host.blank?
36
+
37
+ links_grps = SitemapGenerator::Sitemap.links.in_groups_of(50000, false)
38
+ raise(ArgumentError, "TOO MANY LINKS!! I really thought 2,500,000,000 links would be enough for anybody!") if links_grps.length > 50000
39
+
40
+ Rake::Task['sitemap:clean'].invoke
41
+
42
+ # render individual sitemaps
43
+ sitemap_files = []
44
+ xml_sitemap_template = File.join(File.dirname(__FILE__), '../templates/xml_sitemap.builder')
45
+ links_grps.each_with_index do |links, index|
46
+ buffer = ''
47
+ xml = Builder::XmlMarkup.new(:target=>buffer)
48
+ eval(open(xml_sitemap_template).read, binding)
49
+ filename = File.join(RAILS_ROOT, "public/sitemap#{index+1}.xml.gz")
50
+ Zlib::GzipWriter.open(filename) do |gz|
51
+ gz.write buffer
52
+ end
53
+ puts "+ #{filename}" unless ENV['SILENT'].present?
54
+ puts "** Sitemap too big! The uncompressed size exceeds 10Mb" if (buffer.size > 10 * 1024 * 1024) && ENV['SILENT'].blank?
55
+ sitemap_files << filename
56
+ end
57
+
58
+ # render index
59
+ sitemap_index_template = File.join(File.dirname(__FILE__), '../templates/sitemap_index.builder')
60
+ buffer = ''
61
+ xml = Builder::XmlMarkup.new(:target=>buffer)
62
+ eval(open(sitemap_index_template).read, binding)
63
+ filename = File.join(RAILS_ROOT, "public/sitemap_index.xml.gz")
64
+ Zlib::GzipWriter.open(filename) do |gz|
65
+ gz.write buffer
66
+ end
67
+ puts "+ #{filename}" unless ENV['SILENT'].present?
68
+ puts "** Sitemap Index too big! The uncompressed size exceeds 10Mb" if (buffer.size > 10 * 1024 * 1024) && ENV['SILENT'].blank?
69
+
70
+ stop_time = Time.now
71
+ puts "Sitemap stats: #{number_with_delimiter(SitemapGenerator::Sitemap.links.length)} links, " + ("%dm%02ds" % (stop_time - start_time).divmod(60)) unless ENV['SILENT'].present?
72
+
73
+ end
74
+ end