sitemap_generator 1.5.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +355 -186
- data/VERSION +1 -1
- data/lib/sitemap_generator.rb +15 -2
- data/lib/sitemap_generator/builder/sitemap_file.rb +17 -31
- data/lib/sitemap_generator/builder/sitemap_index_file.rb +14 -14
- data/lib/sitemap_generator/interpreter.rb +46 -18
- data/lib/sitemap_generator/link_set.rb +306 -126
- data/lib/sitemap_generator/sitemap_location.rb +60 -26
- data/lib/sitemap_generator/sitemap_namer.rb +41 -12
- data/tasks/sitemap_generator_tasks.rake +1 -2
- data/templates/sitemap.rb +10 -11
- metadata +5 -7
- data/README.md.orig +0 -374
@@ -1,7 +1,7 @@
|
|
1
1
|
module SitemapGenerator
|
2
2
|
class SitemapLocation < Hash
|
3
3
|
|
4
|
-
[:
|
4
|
+
[:host].each do |method|
|
5
5
|
define_method(method) do
|
6
6
|
raise SitemapGenerator::SitemapError, "No value set for #{method}" unless self[method]
|
7
7
|
self[method]
|
@@ -10,31 +10,28 @@ module SitemapGenerator
|
|
10
10
|
|
11
11
|
[:public_path, :sitemaps_path].each do |method|
|
12
12
|
define_method(method) do
|
13
|
-
Pathname.new(self[method].nil? ? '' : self[method])
|
13
|
+
Pathname.new(self[method].nil? ? '' : self[method].to_s)
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
-
#
|
18
|
-
#
|
17
|
+
# If no +filename+ or +namer+ is provided, the default namer is used. For sitemap
|
18
|
+
# files this generates names like <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt> and so on,
|
19
19
|
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
20
|
+
# === Options
|
21
|
+
# * <tt>public_path</tt> - path to the "public" directory, or the directory you want to
|
22
|
+
# write sitemaps in. Default is a directory <tt>public/</tt>
|
23
|
+
# in the current working directory, or relative to the Rails root
|
24
|
+
# directory if running under Rails.
|
25
|
+
# * <tt>sitemaps_path</tt> - gives the path relative to the <tt>public_path</tt> in which to
|
26
|
+
# write sitemaps e.g. <tt>sitemaps/</tt>.
|
27
|
+
# * <tt>host</tt> - host name for URLs. The full URL to the file is then constructed from
|
28
|
+
# the <tt>host</tt>, <tt>sitemaps_path</tt> and <tt>filename</tt>
|
29
|
+
# * <tt>filename</tt> - full name of the file e.g. <tt>'sitemap1.xml.gz'<tt>
|
30
|
+
# * <tt>namer</tt> - a SitemapGenerator::SitemapNamer instance. Can be passed instead of +filename+.
|
30
31
|
def initialize(opts={})
|
31
|
-
SitemapGenerator::Utilities.assert_valid_keys(opts, [:public_path, :sitemaps_path, :host, :filename])
|
32
|
-
opts.
|
33
|
-
|
34
|
-
:public_path => SitemapGenerator.app.root + 'public/',
|
35
|
-
:host => nil,
|
36
|
-
:filename => nil
|
37
|
-
)
|
32
|
+
SitemapGenerator::Utilities.assert_valid_keys(opts, [:public_path, :sitemaps_path, :host, :filename, :namer])
|
33
|
+
opts[:public_path] ||= SitemapGenerator.app.root + 'public/'
|
34
|
+
opts[:namer] = SitemapGenerator::SitemapNamer.new(:sitemap) if !opts[:filename] && !opts[:namer]
|
38
35
|
self.merge!(opts)
|
39
36
|
end
|
40
37
|
|
@@ -45,27 +42,64 @@ module SitemapGenerator
|
|
45
42
|
|
46
43
|
# Full path to the directory of the file.
|
47
44
|
def directory
|
48
|
-
(public_path + sitemaps_path).to_s
|
45
|
+
(public_path + sitemaps_path).expand_path.to_s
|
49
46
|
end
|
50
47
|
|
51
48
|
# Full path of the file including the filename.
|
52
49
|
def path
|
53
|
-
(public_path + sitemaps_path + filename).to_s
|
50
|
+
(public_path + sitemaps_path + filename).expand_path.to_s
|
54
51
|
end
|
55
52
|
|
56
53
|
# Relative path of the file (including the filename) relative to <tt>public_path</tt>
|
57
54
|
def path_in_public
|
58
55
|
(sitemaps_path + filename).to_s
|
59
56
|
end
|
60
|
-
|
57
|
+
|
61
58
|
# Full URL of the file.
|
62
59
|
def url
|
63
60
|
URI.join(host, sitemaps_path.to_s, filename.to_s).to_s
|
64
61
|
end
|
65
|
-
|
62
|
+
|
66
63
|
# Return the size of the file at <tt>path</tt>
|
67
64
|
def filesize
|
68
65
|
File.size?(path)
|
69
66
|
end
|
67
|
+
|
68
|
+
# Return the filename. Raises an exception if no filename or namer is set.
|
69
|
+
# If using a namer once the filename has been retrieved from the namer its
|
70
|
+
# value is locked so that it is unaffected by further changes to the namer.
|
71
|
+
def filename
|
72
|
+
raise SitemapGenerator::SitemapError, "No filename or namer set" unless self[:filename] || self[:namer]
|
73
|
+
unless self[:filename]
|
74
|
+
self.send(:[]=, :filename, self[:namer].to_s, :super => true)
|
75
|
+
end
|
76
|
+
self[:filename]
|
77
|
+
end
|
78
|
+
|
79
|
+
def namer
|
80
|
+
self[:namer]
|
81
|
+
end
|
82
|
+
|
83
|
+
# If you set the filename, clear the namer and vice versa.
|
84
|
+
def []=(key, value, opts={})
|
85
|
+
if !opts[:super]
|
86
|
+
case key
|
87
|
+
when :namer
|
88
|
+
super(:filename, nil)
|
89
|
+
when :filename
|
90
|
+
super(:namer, nil)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
super(key, value)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class SitemapIndexLocation < SitemapLocation
|
98
|
+
def initialize(opts={})
|
99
|
+
if !opts[:filename] && !opts[:namer]
|
100
|
+
opts[:namer] = SitemapGenerator::SitemapIndexNamer.new(:sitemap_index)
|
101
|
+
end
|
102
|
+
super(opts)
|
103
|
+
end
|
70
104
|
end
|
71
|
-
end
|
105
|
+
end
|
@@ -1,30 +1,59 @@
|
|
1
1
|
module SitemapGenerator
|
2
|
-
# A
|
3
|
-
# Return an object with a method `next` that generates sitemaps with the given name
|
4
|
-
# and an index appended.
|
2
|
+
# A class for generating sitemap names given the base for the filename.
|
5
3
|
#
|
6
|
-
#
|
7
|
-
#
|
4
|
+
# === Example
|
5
|
+
# namer = SitemapNamer.new(:sitemap)
|
6
|
+
# namer.to_s => 'sitemap1.xml.gz'
|
7
|
+
# namer.next.to_s => 'sitemap2.xml.gz'
|
8
8
|
class SitemapNamer
|
9
|
+
NameError = Class.new(StandardError)
|
10
|
+
|
9
11
|
# Params:
|
10
|
-
#
|
12
|
+
# base - string or symbol that forms the base of the generated filename
|
11
13
|
#
|
12
14
|
# Options include:
|
13
15
|
# :extension - Default: '.xml.gz'. File extension to append.
|
14
16
|
# :start - Default: 1. Index at which to start counting.
|
15
|
-
def initialize(
|
17
|
+
def initialize(base, options={});
|
16
18
|
@options = options.reverse_merge(
|
17
19
|
:extension => '.xml.gz',
|
18
20
|
:start => 1
|
19
21
|
)
|
20
|
-
@
|
21
|
-
|
22
|
+
@base = base
|
23
|
+
reset
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
"#{@base}#{@count}#{@options[:extension]}"
|
22
28
|
end
|
23
29
|
|
30
|
+
# Increment count and return self
|
24
31
|
def next
|
25
|
-
"#{@name}#{@count}#{@options[:extension]}"
|
26
|
-
ensure
|
27
32
|
@count += 1
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Decrement count and return self
|
37
|
+
def previous
|
38
|
+
raise NameError, "Already at the start of the series" if start?
|
39
|
+
@count -= 1
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
# Reset count to the starting index
|
44
|
+
def reset
|
45
|
+
@count = @options[:start]
|
46
|
+
end
|
47
|
+
|
48
|
+
def start?
|
49
|
+
@count <= @options[:start]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# A Namer for Sitemap Indexes. The name never changes.
|
54
|
+
class SitemapIndexNamer < SitemapNamer
|
55
|
+
def to_s
|
56
|
+
"#{@base}#{@options[:extension]}"
|
28
57
|
end
|
29
58
|
end
|
30
|
-
end
|
59
|
+
end
|
@@ -37,7 +37,6 @@ namespace :sitemap do
|
|
37
37
|
task 'refresh:no_ping' => ['sitemap:create']
|
38
38
|
|
39
39
|
task :create => ['sitemap:require_environment'] do
|
40
|
-
SitemapGenerator::
|
41
|
-
SitemapGenerator::Sitemap.create(ENV["CONFIG_FILE"])
|
40
|
+
SitemapGenerator::Interpreter.run(:config_file => ENV["CONFIG_FILE"], :verbose => verbose)
|
42
41
|
end
|
43
42
|
end
|
data/templates/sitemap.rb
CHANGED
@@ -1,28 +1,27 @@
|
|
1
1
|
# Set the host name for URL creation
|
2
2
|
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
3
3
|
|
4
|
-
SitemapGenerator::Sitemap.
|
4
|
+
SitemapGenerator::Sitemap.create do
|
5
5
|
# Put links creation logic here.
|
6
6
|
#
|
7
|
-
# The root path '/' and sitemap index file are added automatically.
|
7
|
+
# The root path '/' and sitemap index file are added automatically for you.
|
8
8
|
# Links are added to the Sitemap in the order they are specified.
|
9
9
|
#
|
10
|
-
# Usage:
|
10
|
+
# Usage: add(path, options={})
|
11
11
|
# (default options are used if you don't specify)
|
12
12
|
#
|
13
13
|
# Defaults: :priority => 0.5, :changefreq => 'weekly',
|
14
14
|
# :lastmod => Time.now, :host => default_host
|
15
|
-
#
|
16
|
-
#
|
15
|
+
#
|
17
16
|
# Examples:
|
18
|
-
#
|
17
|
+
#
|
19
18
|
# Add '/articles'
|
20
|
-
#
|
21
|
-
# sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily'
|
22
19
|
#
|
23
|
-
#
|
20
|
+
# add articles_path, :priority => 0.7, :changefreq => 'daily'
|
21
|
+
#
|
22
|
+
# Add all articles:
|
24
23
|
#
|
25
24
|
# Article.find_each do |article|
|
26
|
-
#
|
25
|
+
# add article_path(article), :lastmod => article.updated_at
|
27
26
|
# end
|
28
|
-
end
|
27
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
|
-
- 1
|
8
|
-
- 5
|
9
7
|
- 2
|
10
|
-
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 2.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Karl Varga
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-05-20 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -111,13 +111,11 @@ extensions: []
|
|
111
111
|
|
112
112
|
extra_rdoc_files:
|
113
113
|
- README.md
|
114
|
-
- README.md.orig
|
115
114
|
files:
|
116
115
|
- Gemfile
|
117
116
|
- Gemfile.lock
|
118
117
|
- MIT-LICENSE
|
119
118
|
- README.md
|
120
|
-
- README.md.orig
|
121
119
|
- Rakefile
|
122
120
|
- VERSION
|
123
121
|
- lib/sitemap_generator.rb
|
data/README.md.orig
DELETED
@@ -1,374 +0,0 @@
|
|
1
|
-
SitemapGenerator
|
2
|
-
================
|
3
|
-
|
4
|
-
SitemapGenerator generates Sitemaps for your Rails application. The Sitemaps adhere to the [Sitemap 0.9 protocol][sitemap_protocol] specification. You specify the contents of your Sitemap using a configuration file, à la Rails Routes. A set of rake tasks is included to help you manage your Sitemaps.
|
5
|
-
|
6
|
-
Features
|
7
|
-
-------
|
8
|
-
|
9
|
-
- Supports [Video sitemaps][sitemap_video], [Image sitemaps][sitemap_images], and [Geo sitemaps][geo_tags]
|
10
|
-
- Rails 2.x and 3.x compatible
|
11
|
-
- Adheres to the [Sitemap 0.9 protocol][sitemap_protocol]
|
12
|
-
- Handles millions of links
|
13
|
-
- Compresses Sitemaps using GZip
|
14
|
-
- Notifies Search Engines (Google, Yahoo, Bing, Ask, SitemapWriter) of new sitemaps
|
15
|
-
- Ensures your old Sitemaps stay in place if the new Sitemap fails to generate
|
16
|
-
- You set the hostname (and protocol) of the links in your Sitemap
|
17
|
-
|
18
|
-
Changelog
|
19
|
-
-------
|
20
|
-
|
21
|
-
- v1.4.0: [Geo sitemap][geo_tags] support, support for generate multiple sitemap sets with different filenames
|
22
|
-
- v1.3.0: Support setting the sitemaps path
|
23
|
-
- v1.2.0: Verified working with Rails 3 stable release
|
24
|
-
- v1.1.0: [Video sitemap][sitemap_video] support
|
25
|
-
- v0.2.6: [Image Sitemap][sitemap_images] support
|
26
|
-
- v0.2.5: Rails 3 prerelease support (beta)
|
27
|
-
|
28
|
-
Foreword
|
29
|
-
-------
|
30
|
-
|
31
|
-
Adam Salter first created SitemapGenerator while we were working together in Sydney, Australia. Unfortunately, he passed away in 2009. Since then I have taken over development of SitemapGenerator.
|
32
|
-
|
33
|
-
Those who knew him know what an amazing guy he was, and what an excellent Rails programmer he was. His passing is a great loss to the Rails community.
|
34
|
-
|
35
|
-
The canonical repository is now: [http://github.com/kjvarga/sitemap_generator][canonical_repo]
|
36
|
-
|
37
|
-
Install
|
38
|
-
=======
|
39
|
-
|
40
|
-
**Rails 3:**
|
41
|
-
|
42
|
-
1. Add the gem to your `Gemfile`
|
43
|
-
|
44
|
-
gem 'sitemap_generator'
|
45
|
-
|
46
|
-
2. `$ rake sitemap:install`
|
47
|
-
|
48
|
-
You don't need to include the tasks in your `Rakefile` because the tasks are loaded for you.
|
49
|
-
|
50
|
-
**Pre Rails 3: As a gem**
|
51
|
-
|
52
|
-
1. Add the gem as a dependency in your <tt>config/environment.rb</tt>
|
53
|
-
|
54
|
-
config.gem 'sitemap_generator', :lib => false
|
55
|
-
|
56
|
-
2. `$ rake gems:install`
|
57
|
-
|
58
|
-
3. Add the following to your `Rakefile`
|
59
|
-
|
60
|
-
begin
|
61
|
-
require 'sitemap_generator/tasks'
|
62
|
-
rescue Exception => e
|
63
|
-
puts "Warning, couldn't load gem tasks: #{e.message}! Skipping..."
|
64
|
-
end
|
65
|
-
|
66
|
-
4. `$ rake sitemap:install`
|
67
|
-
|
68
|
-
**Pre Rails 3: As a plugin**
|
69
|
-
|
70
|
-
1. `$ ./script/plugin install git://github.com/kjvarga/sitemap_generator.git`
|
71
|
-
|
72
|
-
Usage
|
73
|
-
======
|
74
|
-
|
75
|
-
<code>rake sitemap:install</code> creates a <tt>config/sitemap.rb</tt> file which contains your logic for generating the Sitemap files.
|
76
|
-
|
77
|
-
Once you have configured your sitemap in <tt>config/sitemap.rb</tt> (see Configuration below) run <code>rake sitemap:refresh</code> as needed to create/rebuild your Sitemap files. Sitemaps are generated into the <tt>public/</tt> folder and are named <tt>sitemap_index.xml.gz</tt>, <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt>, etc.
|
78
|
-
|
79
|
-
Using <code>rake sitemap:refresh</code> will notify major search engines to let them know that a new Sitemap is available (Google, Yahoo, Bing, Ask, SitemapWriter). To generate new Sitemaps without notifying search engines (for example when running in a local environment) use <code>rake sitemap:refresh:no_ping</code>.
|
80
|
-
|
81
|
-
To ping Yahoo you will need to set your Yahoo AppID in <tt>config/sitemap.rb</tt>. For example: <code>SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"</code>
|
82
|
-
|
83
|
-
To disable all non-essential output (only errors will be displayed) run the rake tasks with the <code>-s</code> option. For example <code>rake -s sitemap:refresh</code>.
|
84
|
-
|
85
|
-
Cron
|
86
|
-
-----
|
87
|
-
|
88
|
-
To keep your Sitemaps up-to-date, setup a cron job. Make sure to pass the <code>-s</code> option to silence rake. That way you will only get email when the sitemap build fails.
|
89
|
-
|
90
|
-
If you're using Whenever, your schedule would look something like the following:
|
91
|
-
|
92
|
-
# config/schedule.rb
|
93
|
-
every 1.day, :at => '5:00 am' do
|
94
|
-
rake "-s sitemap:refresh"
|
95
|
-
end
|
96
|
-
|
97
|
-
Robots.txt
|
98
|
-
----------
|
99
|
-
|
100
|
-
You should add the Sitemap index file to <code>public/robots.txt</code> to help search engines find your Sitemaps. The URL should be the complete URL to the Sitemap index file. For example:
|
101
|
-
|
102
|
-
Sitemap: http://www.example.org/sitemap_index.xml.gz
|
103
|
-
|
104
|
-
Image Sitemaps
|
105
|
-
-----------
|
106
|
-
|
107
|
-
Images can be added to a sitemap URL by passing an <tt>:images</tt> array to <tt>add()</tt>. Each item in the array must be a Hash containing tags defined by the [Image Sitemap][image_tags] specification. For example:
|
108
|
-
|
109
|
-
sitemap.add('/index.html', :images => [{ :loc => 'http://www.example.com/image.png', :title => 'Image' }])
|
110
|
-
|
111
|
-
Supported image options include:
|
112
|
-
|
113
|
-
* `loc` Required, location of the image
|
114
|
-
* `caption`
|
115
|
-
* `geo_location`
|
116
|
-
* `title`
|
117
|
-
* `license`
|
118
|
-
|
119
|
-
Video Sitemaps
|
120
|
-
-----------
|
121
|
-
|
122
|
-
A video can be added to a sitemap URL by passing a <tt>:video</tt> Hash to <tt>add()</tt>. The Hash can contain tags defined by the [Video Sitemap specification][video_tags]. To associate more than one <tt>tag</tt> with a video, pass the tags as an array with the key <tt>:tags</tt>.
|
123
|
-
|
124
|
-
sitemap.add('/index.html', :video => { :thumbnail_loc => 'http://www.example.com/video1_thumbnail.png', :title => 'Title', :description => 'Description', :content_loc => 'http://www.example.com/cool_video.mpg', :tags => %w[one two three], :category => 'Category' })
|
125
|
-
|
126
|
-
Supported video options include:
|
127
|
-
|
128
|
-
* `thumbnail_loc` Required
|
129
|
-
* `title` Required
|
130
|
-
* `description` Required
|
131
|
-
* `content_loc` Depends. At least one of `player_loc` or `content_loc` is required
|
132
|
-
* `player_loc` Depends. At least one of `player_loc` or `content_loc` is required
|
133
|
-
* `expiration_date` Recommended
|
134
|
-
* `duration` Recommended
|
135
|
-
* `rating`
|
136
|
-
* `view_count`
|
137
|
-
* `publication_date`
|
138
|
-
* `family_friendly`
|
139
|
-
* `tags` A list of tags if more than one tag.
|
140
|
-
* `tag` A single tag. See `tags`
|
141
|
-
* `category`
|
142
|
-
* `gallery_loc`
|
143
|
-
* `uploader` (use `uploader_info` to set the info attribute)
|
144
|
-
|
145
|
-
Geo Sitemaps
|
146
|
-
-----------
|
147
|
-
|
148
|
-
Page with geo data can be added by passing a <tt>:geo</tt> Hash to <tt>add()</tt>. The Hash only supports one tag of <tt>:format</tt>. Google provides an [example of a geo sitemap link here][geo_tags]. Note that the sitemap does not actually contain your KML or GeoRSS. It merely links to a page that has this content.
|
149
|
-
|
150
|
-
sitemap.add('/restaurants/1234.kml', :geo => { :format => 'kml' })
|
151
|
-
|
152
|
-
Supported geo options include:
|
153
|
-
|
154
|
-
* `format` Required, either 'kml' or 'georss'
|
155
|
-
|
156
|
-
Configuration
|
157
|
-
======
|
158
|
-
|
159
|
-
The sitemap configuration file can be found in <tt>config/sitemap.rb</tt>. When you run a rake task to refresh your sitemaps this file is evaluated. It contains all your configuration settings, as well as your sitemap definition.
|
160
|
-
|
161
|
-
Sitemap Links
|
162
|
-
----------
|
163
|
-
|
164
|
-
The Root Path <tt>/</tt> and Sitemap Index file are automatically added to your sitemap. Links are added to the Sitemap output in the order they are specified. Add links to your sitemap by calling <tt>add_links</tt>, passing a black which receives the sitemap object. Then call <tt>add(path, options)</tt> on the sitemap to add a link.
|
165
|
-
|
166
|
-
For Example:
|
167
|
-
|
168
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
169
|
-
sitemap.add '/reports'
|
170
|
-
end
|
171
|
-
|
172
|
-
The Rails URL helpers are automatically included for you if Rails is detected. So in your call to <tt>add</tt> you can use them to generate paths for your active records, e.g.:
|
173
|
-
|
174
|
-
Article.find_each do |article|
|
175
|
-
sitemap.add article_path(article), :lastmod => article.updated_at
|
176
|
-
end
|
177
|
-
|
178
|
-
For large sitemaps it is advisable to iterate through your Active Records in batches to avoid loading all records into memory at once. As of Rails 2.3.2 you can use <tt>ActiveRecord::Base#find_each</tt> or <tt>ActiveRecord::Base#find_in_batches</tt> to do batched finds, which can significantly improve sitemap performance.
|
179
|
-
|
180
|
-
Valid [options to <tt>add</tt>](http://sitemaps.org/protocol.php#xmlTagDefinitions) are:
|
181
|
-
|
182
|
-
* `priority` The priority of this URL relative to other URLs on your site. Valid values range from 0.0 to 1.0. Default _0.5_
|
183
|
-
* `changefreq` One of: always, hourly, daily, weekly, monthly, yearly, never. Default _weekly_
|
184
|
-
* `lastmod` Time instance. The date of last modification. Default `Time.now`
|
185
|
-
* `host` Optional host for the link's URL. Defaults to `default_host`
|
186
|
-
|
187
|
-
Sitemaps Path
|
188
|
-
----------
|
189
|
-
|
190
|
-
By default sitemaps are generated into <tt>public/</tt>. You can customize the location for your generated sitemaps by setting <tt>sitemaps_path</tt> to a path relative to your public directory. The directory will be created for you if it does not already exist.
|
191
|
-
|
192
|
-
For example:
|
193
|
-
|
194
|
-
SitemapGenerator::Sitemap.sitemaps_path = 'sitemaps/'
|
195
|
-
|
196
|
-
Will generate sitemaps into the `public/sitemaps/` directory. If you want your sitemaps to be findable by robots, you need to specify the location of your sitemap index file in your <tt>public/robots.txt</tt>.
|
197
|
-
|
198
|
-
Sitemaps Host
|
199
|
-
----------
|
200
|
-
|
201
|
-
You must set the <tt>default_host</tt> that is to be used when adding links to your sitemap. The hostname should match the host that the sitemaps are going to be served from. For example:
|
202
|
-
|
203
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
204
|
-
|
205
|
-
The hostname must include the full protocol.
|
206
|
-
|
207
|
-
Sitemap Filenames
|
208
|
-
----------
|
209
|
-
|
210
|
-
By default sitemaps have the name <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt>, etc with the sitemap index having name <tt>sitemap_index.xml.gz</tt>.
|
211
|
-
|
212
|
-
If you want to change the <tt>sitemap</tt> portion of the name you can set it as shown below. The surrounding structure of numbers, extensions, and _index will stay the same. For example:
|
213
|
-
|
214
|
-
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
215
|
-
|
216
|
-
Example Configuration File
|
217
|
-
---------
|
218
|
-
|
219
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
220
|
-
SitemapGenerator::Sitemap.yahoo_app_id = nil # Set to your Yahoo AppID to ping Yahoo
|
221
|
-
|
222
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
223
|
-
# Put links creation logic here.
|
224
|
-
#
|
225
|
-
# The Root Path ('/') and Sitemap Index file are added automatically.
|
226
|
-
# Links are added to the Sitemap output in the order they are specified.
|
227
|
-
#
|
228
|
-
# Usage: sitemap.add path, options
|
229
|
-
# (default options are used if you don't specify them)
|
230
|
-
#
|
231
|
-
# Defaults: :priority => 0.5, :changefreq => 'weekly',
|
232
|
-
# :lastmod => Time.now, :host => default_host
|
233
|
-
|
234
|
-
# add '/articles'
|
235
|
-
sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily'
|
236
|
-
|
237
|
-
# add all articles
|
238
|
-
Article.all.each do |a|
|
239
|
-
sitemap.add article_path(a), :lastmod => a.updated_at
|
240
|
-
end
|
241
|
-
|
242
|
-
# add news page with images
|
243
|
-
News.all.each do |news|
|
244
|
-
images = news.images.collect do |image|
|
245
|
-
{ :loc => image.url, :title => image.name }
|
246
|
-
end
|
247
|
-
sitemap.add news_path(news), :images => images
|
248
|
-
end
|
249
|
-
end
|
250
|
-
|
251
|
-
Generating Multiple Sets Of Sitemaps
|
252
|
-
----------
|
253
|
-
|
254
|
-
To generate multiple sets of sitemaps you can create multiple configuration files. Each should contain a different <tt>SitemapGenerator::Sitemap.filename</tt> to avoid overwriting the previous set. (Of course you can keep the default name of 'sitemap' in one of them.) You can then build each set with a separate rake task. For example:
|
255
|
-
|
256
|
-
rake sitemap:refresh
|
257
|
-
rake sitemap:refresh CONFIG_FILE="config/geo_sitemap.rb"
|
258
|
-
<<<<<<< HEAD
|
259
|
-
|
260
|
-
The first one uses the default config file at <tt>config/sitemap.rb</tt>. Your two config files might look like this:
|
261
|
-
=======
|
262
|
-
|
263
|
-
The first one uses the default config file at <tt>config/sitemap.rb</tt>. Your first config file might look like this:
|
264
|
-
>>>>>>> lets you build multiple sitemap sets
|
265
|
-
|
266
|
-
# config/sitemap.rb
|
267
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
268
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
269
|
-
Store.each do |store
|
270
|
-
sitemap.add store_path(store)
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
<<<<<<< HEAD
|
275
|
-
=======
|
276
|
-
And the second:
|
277
|
-
>>>>>>> lets you build multiple sitemap sets
|
278
|
-
|
279
|
-
# config/geo_sitemap.rb
|
280
|
-
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
281
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
282
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
283
|
-
Store.each do |store
|
284
|
-
sitemap.add store_path(store, :format => :kml), :geo => { :format => 'kml' }
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
Raison d'être
|
289
|
-
-------
|
290
|
-
|
291
|
-
Most of the Sitemap plugins out there seem to try to recreate the Sitemap links by iterating the Rails routes. In some cases this is possible, but for a great deal of cases it isn't.
|
292
|
-
|
293
|
-
a) There are probably quite a few routes in your routes file that don't need inclusion in the Sitemap. (AJAX routes I'm looking at you.)
|
294
|
-
|
295
|
-
and
|
296
|
-
|
297
|
-
b) How would you infer the correct series of links for the following route?
|
298
|
-
|
299
|
-
map.zipcode 'location/:state/:city/:zipcode', :controller => 'zipcode', :action => 'index'
|
300
|
-
|
301
|
-
Don't tell me it's trivial, because it isn't. It just looks trivial.
|
302
|
-
|
303
|
-
So my idea is to have another file similar to 'routes.rb' called 'sitemap.rb', where you can define what goes into the Sitemap.
|
304
|
-
|
305
|
-
Here's my solution:
|
306
|
-
|
307
|
-
Zipcode.find(:all, :include => :city).each do |z|
|
308
|
-
sitemap.add zipcode_path(:state => z.city.state, :city => z.city, :zipcode => z)
|
309
|
-
end
|
310
|
-
|
311
|
-
Easy hey?
|
312
|
-
|
313
|
-
Other Sitemap settings for the link, like `lastmod`, `priority`, `changefreq` and `host` are entered automatically, although you can override them if you need to.
|
314
|
-
|
315
|
-
Compatibility
|
316
|
-
=======
|
317
|
-
|
318
|
-
Tested and working on:
|
319
|
-
|
320
|
-
- **Rails** 3.0.0
|
321
|
-
- **Rails** 1.x - 2.3.8
|
322
|
-
- **Ruby** 1.8.6, 1.8.7, 1.8.7 Enterprise Edition, 1.9.1
|
323
|
-
|
324
|
-
Notes
|
325
|
-
=======
|
326
|
-
|
327
|
-
1) New Capistrano deploys will remove your Sitemap files, unless you run `rake sitemap:refresh`. The way around this is to create a cap task to copy the sitemaps from the previous deploy:
|
328
|
-
|
329
|
-
after "deploy:update_code", "deploy:copy_old_sitemap"
|
330
|
-
|
331
|
-
namespace :deploy do
|
332
|
-
task :copy_old_sitemap do
|
333
|
-
run "if [ -e #{previous_release}/public/sitemap_index.xml.gz ]; then cp #{previous_release}/public/sitemap* #{current_release}/public/; fi"
|
334
|
-
end
|
335
|
-
end
|
336
|
-
|
337
|
-
Known Bugs
|
338
|
-
========
|
339
|
-
|
340
|
-
- There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
|
341
|
-
- Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
|
342
|
-
|
343
|
-
Wishlist & Coming Soon
|
344
|
-
========
|
345
|
-
|
346
|
-
- Support for read-only filesystems
|
347
|
-
- Support for plain Ruby and Merb sitemaps
|
348
|
-
|
349
|
-
Thanks (in no particular order)
|
350
|
-
========
|
351
|
-
|
352
|
-
- [Alex Soto](http://github.com/apsoto) for video sitemaps
|
353
|
-
- [Alexadre Bini](http://github.com/alexandrebini) for image sitemaps
|
354
|
-
- [Dan Pickett](http://github.com/dpickett)
|
355
|
-
- [Rob Biedenharn](http://github.com/rab)
|
356
|
-
- [Richie Vos](http://github.com/jerryvos)
|
357
|
-
- [Adrian Mugnolo](http://github.com/xymbol)
|
358
|
-
- [Jason Weathered](http://github.com/jasoncodes)
|
359
|
-
- [Andy Stewart](http://github.com/airblade)
|
360
|
-
- [Brian Armstrong](https://github.com/barmstrong) for geo sitemaps
|
361
|
-
|
362
|
-
Copyright (c) 2009 Karl Varga released under the MIT license
|
363
|
-
|
364
|
-
[canonical_repo]:http://github.com/kjvarga/sitemap_generator
|
365
|
-
[enterprise_class]:https://twitter.com/dhh/status/1631034662 "I use enterprise in the same sense the Phusion guys do - i.e. Enterprise Ruby. Please don't look down on my use of the word 'enterprise' to represent being a cut above. It doesn't mean you ever have to work for a company the size of IBM. Or constantly fight inertia, writing crappy software, adhering to change management practices and spending hours in meetings... Not that there's anything wrong with that - Wait, what?"
|
366
|
-
[sitemaps_org]:http://www.sitemaps.org/protocol.php "http://www.sitemaps.org/protocol.php"
|
367
|
-
[sitemaps_xml]:http://www.sitemaps.org/protocol.php#xmlTagDefinitions "XML Tag Definitions"
|
368
|
-
[sitemap_generator_usage]:http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage "http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage"
|
369
|
-
[sitemap_images]:http://www.google.com/support/webmasters/bin/answer.py?answer=178636
|
370
|
-
[sitemap_video]:http://www.google.com/support/webmasters/bin/topic.py?topic=10079
|
371
|
-
[sitemap_protocol]:http://sitemaps.org/protocol.php
|
372
|
-
[video_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=80472#4
|
373
|
-
[image_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=178636
|
374
|
-
[geo_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=94555
|