sitemap_generator 1.5.2 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +355 -186
- data/VERSION +1 -1
- data/lib/sitemap_generator.rb +15 -2
- data/lib/sitemap_generator/builder/sitemap_file.rb +17 -31
- data/lib/sitemap_generator/builder/sitemap_index_file.rb +14 -14
- data/lib/sitemap_generator/interpreter.rb +46 -18
- data/lib/sitemap_generator/link_set.rb +306 -126
- data/lib/sitemap_generator/sitemap_location.rb +60 -26
- data/lib/sitemap_generator/sitemap_namer.rb +41 -12
- data/tasks/sitemap_generator_tasks.rake +1 -2
- data/templates/sitemap.rb +10 -11
- metadata +5 -7
- data/README.md.orig +0 -374
@@ -1,7 +1,7 @@
|
|
1
1
|
module SitemapGenerator
|
2
2
|
class SitemapLocation < Hash
|
3
3
|
|
4
|
-
[:
|
4
|
+
[:host].each do |method|
|
5
5
|
define_method(method) do
|
6
6
|
raise SitemapGenerator::SitemapError, "No value set for #{method}" unless self[method]
|
7
7
|
self[method]
|
@@ -10,31 +10,28 @@ module SitemapGenerator
|
|
10
10
|
|
11
11
|
[:public_path, :sitemaps_path].each do |method|
|
12
12
|
define_method(method) do
|
13
|
-
Pathname.new(self[method].nil? ? '' : self[method])
|
13
|
+
Pathname.new(self[method].nil? ? '' : self[method].to_s)
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
-
#
|
18
|
-
#
|
17
|
+
# If no +filename+ or +namer+ is provided, the default namer is used. For sitemap
|
18
|
+
# files this generates names like <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt> and so on,
|
19
19
|
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
20
|
+
# === Options
|
21
|
+
# * <tt>public_path</tt> - path to the "public" directory, or the directory you want to
|
22
|
+
# write sitemaps in. Default is a directory <tt>public/</tt>
|
23
|
+
# in the current working directory, or relative to the Rails root
|
24
|
+
# directory if running under Rails.
|
25
|
+
# * <tt>sitemaps_path</tt> - gives the path relative to the <tt>public_path</tt> in which to
|
26
|
+
# write sitemaps e.g. <tt>sitemaps/</tt>.
|
27
|
+
# * <tt>host</tt> - host name for URLs. The full URL to the file is then constructed from
|
28
|
+
# the <tt>host</tt>, <tt>sitemaps_path</tt> and <tt>filename</tt>
|
29
|
+
# * <tt>filename</tt> - full name of the file e.g. <tt>'sitemap1.xml.gz'<tt>
|
30
|
+
# * <tt>namer</tt> - a SitemapGenerator::SitemapNamer instance. Can be passed instead of +filename+.
|
30
31
|
def initialize(opts={})
|
31
|
-
SitemapGenerator::Utilities.assert_valid_keys(opts, [:public_path, :sitemaps_path, :host, :filename])
|
32
|
-
opts.
|
33
|
-
|
34
|
-
:public_path => SitemapGenerator.app.root + 'public/',
|
35
|
-
:host => nil,
|
36
|
-
:filename => nil
|
37
|
-
)
|
32
|
+
SitemapGenerator::Utilities.assert_valid_keys(opts, [:public_path, :sitemaps_path, :host, :filename, :namer])
|
33
|
+
opts[:public_path] ||= SitemapGenerator.app.root + 'public/'
|
34
|
+
opts[:namer] = SitemapGenerator::SitemapNamer.new(:sitemap) if !opts[:filename] && !opts[:namer]
|
38
35
|
self.merge!(opts)
|
39
36
|
end
|
40
37
|
|
@@ -45,27 +42,64 @@ module SitemapGenerator
|
|
45
42
|
|
46
43
|
# Full path to the directory of the file.
|
47
44
|
def directory
|
48
|
-
(public_path + sitemaps_path).to_s
|
45
|
+
(public_path + sitemaps_path).expand_path.to_s
|
49
46
|
end
|
50
47
|
|
51
48
|
# Full path of the file including the filename.
|
52
49
|
def path
|
53
|
-
(public_path + sitemaps_path + filename).to_s
|
50
|
+
(public_path + sitemaps_path + filename).expand_path.to_s
|
54
51
|
end
|
55
52
|
|
56
53
|
# Relative path of the file (including the filename) relative to <tt>public_path</tt>
|
57
54
|
def path_in_public
|
58
55
|
(sitemaps_path + filename).to_s
|
59
56
|
end
|
60
|
-
|
57
|
+
|
61
58
|
# Full URL of the file.
|
62
59
|
def url
|
63
60
|
URI.join(host, sitemaps_path.to_s, filename.to_s).to_s
|
64
61
|
end
|
65
|
-
|
62
|
+
|
66
63
|
# Return the size of the file at <tt>path</tt>
|
67
64
|
def filesize
|
68
65
|
File.size?(path)
|
69
66
|
end
|
67
|
+
|
68
|
+
# Return the filename. Raises an exception if no filename or namer is set.
|
69
|
+
# If using a namer once the filename has been retrieved from the namer its
|
70
|
+
# value is locked so that it is unaffected by further changes to the namer.
|
71
|
+
def filename
|
72
|
+
raise SitemapGenerator::SitemapError, "No filename or namer set" unless self[:filename] || self[:namer]
|
73
|
+
unless self[:filename]
|
74
|
+
self.send(:[]=, :filename, self[:namer].to_s, :super => true)
|
75
|
+
end
|
76
|
+
self[:filename]
|
77
|
+
end
|
78
|
+
|
79
|
+
def namer
|
80
|
+
self[:namer]
|
81
|
+
end
|
82
|
+
|
83
|
+
# If you set the filename, clear the namer and vice versa.
|
84
|
+
def []=(key, value, opts={})
|
85
|
+
if !opts[:super]
|
86
|
+
case key
|
87
|
+
when :namer
|
88
|
+
super(:filename, nil)
|
89
|
+
when :filename
|
90
|
+
super(:namer, nil)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
super(key, value)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class SitemapIndexLocation < SitemapLocation
|
98
|
+
def initialize(opts={})
|
99
|
+
if !opts[:filename] && !opts[:namer]
|
100
|
+
opts[:namer] = SitemapGenerator::SitemapIndexNamer.new(:sitemap_index)
|
101
|
+
end
|
102
|
+
super(opts)
|
103
|
+
end
|
70
104
|
end
|
71
|
-
end
|
105
|
+
end
|
@@ -1,30 +1,59 @@
|
|
1
1
|
module SitemapGenerator
|
2
|
-
# A
|
3
|
-
# Return an object with a method `next` that generates sitemaps with the given name
|
4
|
-
# and an index appended.
|
2
|
+
# A class for generating sitemap names given the base for the filename.
|
5
3
|
#
|
6
|
-
#
|
7
|
-
#
|
4
|
+
# === Example
|
5
|
+
# namer = SitemapNamer.new(:sitemap)
|
6
|
+
# namer.to_s => 'sitemap1.xml.gz'
|
7
|
+
# namer.next.to_s => 'sitemap2.xml.gz'
|
8
8
|
class SitemapNamer
|
9
|
+
NameError = Class.new(StandardError)
|
10
|
+
|
9
11
|
# Params:
|
10
|
-
#
|
12
|
+
# base - string or symbol that forms the base of the generated filename
|
11
13
|
#
|
12
14
|
# Options include:
|
13
15
|
# :extension - Default: '.xml.gz'. File extension to append.
|
14
16
|
# :start - Default: 1. Index at which to start counting.
|
15
|
-
def initialize(
|
17
|
+
def initialize(base, options={});
|
16
18
|
@options = options.reverse_merge(
|
17
19
|
:extension => '.xml.gz',
|
18
20
|
:start => 1
|
19
21
|
)
|
20
|
-
@
|
21
|
-
|
22
|
+
@base = base
|
23
|
+
reset
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
"#{@base}#{@count}#{@options[:extension]}"
|
22
28
|
end
|
23
29
|
|
30
|
+
# Increment count and return self
|
24
31
|
def next
|
25
|
-
"#{@name}#{@count}#{@options[:extension]}"
|
26
|
-
ensure
|
27
32
|
@count += 1
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Decrement count and return self
|
37
|
+
def previous
|
38
|
+
raise NameError, "Already at the start of the series" if start?
|
39
|
+
@count -= 1
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
# Reset count to the starting index
|
44
|
+
def reset
|
45
|
+
@count = @options[:start]
|
46
|
+
end
|
47
|
+
|
48
|
+
def start?
|
49
|
+
@count <= @options[:start]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# A Namer for Sitemap Indexes. The name never changes.
|
54
|
+
class SitemapIndexNamer < SitemapNamer
|
55
|
+
def to_s
|
56
|
+
"#{@base}#{@options[:extension]}"
|
28
57
|
end
|
29
58
|
end
|
30
|
-
end
|
59
|
+
end
|
@@ -37,7 +37,6 @@ namespace :sitemap do
|
|
37
37
|
task 'refresh:no_ping' => ['sitemap:create']
|
38
38
|
|
39
39
|
task :create => ['sitemap:require_environment'] do
|
40
|
-
SitemapGenerator::
|
41
|
-
SitemapGenerator::Sitemap.create(ENV["CONFIG_FILE"])
|
40
|
+
SitemapGenerator::Interpreter.run(:config_file => ENV["CONFIG_FILE"], :verbose => verbose)
|
42
41
|
end
|
43
42
|
end
|
data/templates/sitemap.rb
CHANGED
@@ -1,28 +1,27 @@
|
|
1
1
|
# Set the host name for URL creation
|
2
2
|
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
3
3
|
|
4
|
-
SitemapGenerator::Sitemap.
|
4
|
+
SitemapGenerator::Sitemap.create do
|
5
5
|
# Put links creation logic here.
|
6
6
|
#
|
7
|
-
# The root path '/' and sitemap index file are added automatically.
|
7
|
+
# The root path '/' and sitemap index file are added automatically for you.
|
8
8
|
# Links are added to the Sitemap in the order they are specified.
|
9
9
|
#
|
10
|
-
# Usage:
|
10
|
+
# Usage: add(path, options={})
|
11
11
|
# (default options are used if you don't specify)
|
12
12
|
#
|
13
13
|
# Defaults: :priority => 0.5, :changefreq => 'weekly',
|
14
14
|
# :lastmod => Time.now, :host => default_host
|
15
|
-
#
|
16
|
-
#
|
15
|
+
#
|
17
16
|
# Examples:
|
18
|
-
#
|
17
|
+
#
|
19
18
|
# Add '/articles'
|
20
|
-
#
|
21
|
-
# sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily'
|
22
19
|
#
|
23
|
-
#
|
20
|
+
# add articles_path, :priority => 0.7, :changefreq => 'daily'
|
21
|
+
#
|
22
|
+
# Add all articles:
|
24
23
|
#
|
25
24
|
# Article.find_each do |article|
|
26
|
-
#
|
25
|
+
# add article_path(article), :lastmod => article.updated_at
|
27
26
|
# end
|
28
|
-
end
|
27
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
|
-
- 1
|
8
|
-
- 5
|
9
7
|
- 2
|
10
|
-
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 2.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Karl Varga
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-05-20 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -111,13 +111,11 @@ extensions: []
|
|
111
111
|
|
112
112
|
extra_rdoc_files:
|
113
113
|
- README.md
|
114
|
-
- README.md.orig
|
115
114
|
files:
|
116
115
|
- Gemfile
|
117
116
|
- Gemfile.lock
|
118
117
|
- MIT-LICENSE
|
119
118
|
- README.md
|
120
|
-
- README.md.orig
|
121
119
|
- Rakefile
|
122
120
|
- VERSION
|
123
121
|
- lib/sitemap_generator.rb
|
data/README.md.orig
DELETED
@@ -1,374 +0,0 @@
|
|
1
|
-
SitemapGenerator
|
2
|
-
================
|
3
|
-
|
4
|
-
SitemapGenerator generates Sitemaps for your Rails application. The Sitemaps adhere to the [Sitemap 0.9 protocol][sitemap_protocol] specification. You specify the contents of your Sitemap using a configuration file, à la Rails Routes. A set of rake tasks is included to help you manage your Sitemaps.
|
5
|
-
|
6
|
-
Features
|
7
|
-
-------
|
8
|
-
|
9
|
-
- Supports [Video sitemaps][sitemap_video], [Image sitemaps][sitemap_images], and [Geo sitemaps][geo_tags]
|
10
|
-
- Rails 2.x and 3.x compatible
|
11
|
-
- Adheres to the [Sitemap 0.9 protocol][sitemap_protocol]
|
12
|
-
- Handles millions of links
|
13
|
-
- Compresses Sitemaps using GZip
|
14
|
-
- Notifies Search Engines (Google, Yahoo, Bing, Ask, SitemapWriter) of new sitemaps
|
15
|
-
- Ensures your old Sitemaps stay in place if the new Sitemap fails to generate
|
16
|
-
- You set the hostname (and protocol) of the links in your Sitemap
|
17
|
-
|
18
|
-
Changelog
|
19
|
-
-------
|
20
|
-
|
21
|
-
- v1.4.0: [Geo sitemap][geo_tags] support, support for generate multiple sitemap sets with different filenames
|
22
|
-
- v1.3.0: Support setting the sitemaps path
|
23
|
-
- v1.2.0: Verified working with Rails 3 stable release
|
24
|
-
- v1.1.0: [Video sitemap][sitemap_video] support
|
25
|
-
- v0.2.6: [Image Sitemap][sitemap_images] support
|
26
|
-
- v0.2.5: Rails 3 prerelease support (beta)
|
27
|
-
|
28
|
-
Foreword
|
29
|
-
-------
|
30
|
-
|
31
|
-
Adam Salter first created SitemapGenerator while we were working together in Sydney, Australia. Unfortunately, he passed away in 2009. Since then I have taken over development of SitemapGenerator.
|
32
|
-
|
33
|
-
Those who knew him know what an amazing guy he was, and what an excellent Rails programmer he was. His passing is a great loss to the Rails community.
|
34
|
-
|
35
|
-
The canonical repository is now: [http://github.com/kjvarga/sitemap_generator][canonical_repo]
|
36
|
-
|
37
|
-
Install
|
38
|
-
=======
|
39
|
-
|
40
|
-
**Rails 3:**
|
41
|
-
|
42
|
-
1. Add the gem to your `Gemfile`
|
43
|
-
|
44
|
-
gem 'sitemap_generator'
|
45
|
-
|
46
|
-
2. `$ rake sitemap:install`
|
47
|
-
|
48
|
-
You don't need to include the tasks in your `Rakefile` because the tasks are loaded for you.
|
49
|
-
|
50
|
-
**Pre Rails 3: As a gem**
|
51
|
-
|
52
|
-
1. Add the gem as a dependency in your <tt>config/environment.rb</tt>
|
53
|
-
|
54
|
-
config.gem 'sitemap_generator', :lib => false
|
55
|
-
|
56
|
-
2. `$ rake gems:install`
|
57
|
-
|
58
|
-
3. Add the following to your `Rakefile`
|
59
|
-
|
60
|
-
begin
|
61
|
-
require 'sitemap_generator/tasks'
|
62
|
-
rescue Exception => e
|
63
|
-
puts "Warning, couldn't load gem tasks: #{e.message}! Skipping..."
|
64
|
-
end
|
65
|
-
|
66
|
-
4. `$ rake sitemap:install`
|
67
|
-
|
68
|
-
**Pre Rails 3: As a plugin**
|
69
|
-
|
70
|
-
1. `$ ./script/plugin install git://github.com/kjvarga/sitemap_generator.git`
|
71
|
-
|
72
|
-
Usage
|
73
|
-
======
|
74
|
-
|
75
|
-
<code>rake sitemap:install</code> creates a <tt>config/sitemap.rb</tt> file which contains your logic for generating the Sitemap files.
|
76
|
-
|
77
|
-
Once you have configured your sitemap in <tt>config/sitemap.rb</tt> (see Configuration below) run <code>rake sitemap:refresh</code> as needed to create/rebuild your Sitemap files. Sitemaps are generated into the <tt>public/</tt> folder and are named <tt>sitemap_index.xml.gz</tt>, <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt>, etc.
|
78
|
-
|
79
|
-
Using <code>rake sitemap:refresh</code> will notify major search engines to let them know that a new Sitemap is available (Google, Yahoo, Bing, Ask, SitemapWriter). To generate new Sitemaps without notifying search engines (for example when running in a local environment) use <code>rake sitemap:refresh:no_ping</code>.
|
80
|
-
|
81
|
-
To ping Yahoo you will need to set your Yahoo AppID in <tt>config/sitemap.rb</tt>. For example: <code>SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"</code>
|
82
|
-
|
83
|
-
To disable all non-essential output (only errors will be displayed) run the rake tasks with the <code>-s</code> option. For example <code>rake -s sitemap:refresh</code>.
|
84
|
-
|
85
|
-
Cron
|
86
|
-
-----
|
87
|
-
|
88
|
-
To keep your Sitemaps up-to-date, setup a cron job. Make sure to pass the <code>-s</code> option to silence rake. That way you will only get email when the sitemap build fails.
|
89
|
-
|
90
|
-
If you're using Whenever, your schedule would look something like the following:
|
91
|
-
|
92
|
-
# config/schedule.rb
|
93
|
-
every 1.day, :at => '5:00 am' do
|
94
|
-
rake "-s sitemap:refresh"
|
95
|
-
end
|
96
|
-
|
97
|
-
Robots.txt
|
98
|
-
----------
|
99
|
-
|
100
|
-
You should add the Sitemap index file to <code>public/robots.txt</code> to help search engines find your Sitemaps. The URL should be the complete URL to the Sitemap index file. For example:
|
101
|
-
|
102
|
-
Sitemap: http://www.example.org/sitemap_index.xml.gz
|
103
|
-
|
104
|
-
Image Sitemaps
|
105
|
-
-----------
|
106
|
-
|
107
|
-
Images can be added to a sitemap URL by passing an <tt>:images</tt> array to <tt>add()</tt>. Each item in the array must be a Hash containing tags defined by the [Image Sitemap][image_tags] specification. For example:
|
108
|
-
|
109
|
-
sitemap.add('/index.html', :images => [{ :loc => 'http://www.example.com/image.png', :title => 'Image' }])
|
110
|
-
|
111
|
-
Supported image options include:
|
112
|
-
|
113
|
-
* `loc` Required, location of the image
|
114
|
-
* `caption`
|
115
|
-
* `geo_location`
|
116
|
-
* `title`
|
117
|
-
* `license`
|
118
|
-
|
119
|
-
Video Sitemaps
|
120
|
-
-----------
|
121
|
-
|
122
|
-
A video can be added to a sitemap URL by passing a <tt>:video</tt> Hash to <tt>add()</tt>. The Hash can contain tags defined by the [Video Sitemap specification][video_tags]. To associate more than one <tt>tag</tt> with a video, pass the tags as an array with the key <tt>:tags</tt>.
|
123
|
-
|
124
|
-
sitemap.add('/index.html', :video => { :thumbnail_loc => 'http://www.example.com/video1_thumbnail.png', :title => 'Title', :description => 'Description', :content_loc => 'http://www.example.com/cool_video.mpg', :tags => %w[one two three], :category => 'Category' })
|
125
|
-
|
126
|
-
Supported video options include:
|
127
|
-
|
128
|
-
* `thumbnail_loc` Required
|
129
|
-
* `title` Required
|
130
|
-
* `description` Required
|
131
|
-
* `content_loc` Depends. At least one of `player_loc` or `content_loc` is required
|
132
|
-
* `player_loc` Depends. At least one of `player_loc` or `content_loc` is required
|
133
|
-
* `expiration_date` Recommended
|
134
|
-
* `duration` Recommended
|
135
|
-
* `rating`
|
136
|
-
* `view_count`
|
137
|
-
* `publication_date`
|
138
|
-
* `family_friendly`
|
139
|
-
* `tags` A list of tags if more than one tag.
|
140
|
-
* `tag` A single tag. See `tags`
|
141
|
-
* `category`
|
142
|
-
* `gallery_loc`
|
143
|
-
* `uploader` (use `uploader_info` to set the info attribute)
|
144
|
-
|
145
|
-
Geo Sitemaps
|
146
|
-
-----------
|
147
|
-
|
148
|
-
Page with geo data can be added by passing a <tt>:geo</tt> Hash to <tt>add()</tt>. The Hash only supports one tag of <tt>:format</tt>. Google provides an [example of a geo sitemap link here][geo_tags]. Note that the sitemap does not actually contain your KML or GeoRSS. It merely links to a page that has this content.
|
149
|
-
|
150
|
-
sitemap.add('/restaurants/1234.kml', :geo => { :format => 'kml' })
|
151
|
-
|
152
|
-
Supported geo options include:
|
153
|
-
|
154
|
-
* `format` Required, either 'kml' or 'georss'
|
155
|
-
|
156
|
-
Configuration
|
157
|
-
======
|
158
|
-
|
159
|
-
The sitemap configuration file can be found in <tt>config/sitemap.rb</tt>. When you run a rake task to refresh your sitemaps this file is evaluated. It contains all your configuration settings, as well as your sitemap definition.
|
160
|
-
|
161
|
-
Sitemap Links
|
162
|
-
----------
|
163
|
-
|
164
|
-
The Root Path <tt>/</tt> and Sitemap Index file are automatically added to your sitemap. Links are added to the Sitemap output in the order they are specified. Add links to your sitemap by calling <tt>add_links</tt>, passing a black which receives the sitemap object. Then call <tt>add(path, options)</tt> on the sitemap to add a link.
|
165
|
-
|
166
|
-
For Example:
|
167
|
-
|
168
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
169
|
-
sitemap.add '/reports'
|
170
|
-
end
|
171
|
-
|
172
|
-
The Rails URL helpers are automatically included for you if Rails is detected. So in your call to <tt>add</tt> you can use them to generate paths for your active records, e.g.:
|
173
|
-
|
174
|
-
Article.find_each do |article|
|
175
|
-
sitemap.add article_path(article), :lastmod => article.updated_at
|
176
|
-
end
|
177
|
-
|
178
|
-
For large sitemaps it is advisable to iterate through your Active Records in batches to avoid loading all records into memory at once. As of Rails 2.3.2 you can use <tt>ActiveRecord::Base#find_each</tt> or <tt>ActiveRecord::Base#find_in_batches</tt> to do batched finds, which can significantly improve sitemap performance.
|
179
|
-
|
180
|
-
Valid [options to <tt>add</tt>](http://sitemaps.org/protocol.php#xmlTagDefinitions) are:
|
181
|
-
|
182
|
-
* `priority` The priority of this URL relative to other URLs on your site. Valid values range from 0.0 to 1.0. Default _0.5_
|
183
|
-
* `changefreq` One of: always, hourly, daily, weekly, monthly, yearly, never. Default _weekly_
|
184
|
-
* `lastmod` Time instance. The date of last modification. Default `Time.now`
|
185
|
-
* `host` Optional host for the link's URL. Defaults to `default_host`
|
186
|
-
|
187
|
-
Sitemaps Path
|
188
|
-
----------
|
189
|
-
|
190
|
-
By default sitemaps are generated into <tt>public/</tt>. You can customize the location for your generated sitemaps by setting <tt>sitemaps_path</tt> to a path relative to your public directory. The directory will be created for you if it does not already exist.
|
191
|
-
|
192
|
-
For example:
|
193
|
-
|
194
|
-
SitemapGenerator::Sitemap.sitemaps_path = 'sitemaps/'
|
195
|
-
|
196
|
-
Will generate sitemaps into the `public/sitemaps/` directory. If you want your sitemaps to be findable by robots, you need to specify the location of your sitemap index file in your <tt>public/robots.txt</tt>.
|
197
|
-
|
198
|
-
Sitemaps Host
|
199
|
-
----------
|
200
|
-
|
201
|
-
You must set the <tt>default_host</tt> that is to be used when adding links to your sitemap. The hostname should match the host that the sitemaps are going to be served from. For example:
|
202
|
-
|
203
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
204
|
-
|
205
|
-
The hostname must include the full protocol.
|
206
|
-
|
207
|
-
Sitemap Filenames
|
208
|
-
----------
|
209
|
-
|
210
|
-
By default sitemaps have the name <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt>, etc with the sitemap index having name <tt>sitemap_index.xml.gz</tt>.
|
211
|
-
|
212
|
-
If you want to change the <tt>sitemap</tt> portion of the name you can set it as shown below. The surrounding structure of numbers, extensions, and _index will stay the same. For example:
|
213
|
-
|
214
|
-
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
215
|
-
|
216
|
-
Example Configuration File
|
217
|
-
---------
|
218
|
-
|
219
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
220
|
-
SitemapGenerator::Sitemap.yahoo_app_id = nil # Set to your Yahoo AppID to ping Yahoo
|
221
|
-
|
222
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
223
|
-
# Put links creation logic here.
|
224
|
-
#
|
225
|
-
# The Root Path ('/') and Sitemap Index file are added automatically.
|
226
|
-
# Links are added to the Sitemap output in the order they are specified.
|
227
|
-
#
|
228
|
-
# Usage: sitemap.add path, options
|
229
|
-
# (default options are used if you don't specify them)
|
230
|
-
#
|
231
|
-
# Defaults: :priority => 0.5, :changefreq => 'weekly',
|
232
|
-
# :lastmod => Time.now, :host => default_host
|
233
|
-
|
234
|
-
# add '/articles'
|
235
|
-
sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily'
|
236
|
-
|
237
|
-
# add all articles
|
238
|
-
Article.all.each do |a|
|
239
|
-
sitemap.add article_path(a), :lastmod => a.updated_at
|
240
|
-
end
|
241
|
-
|
242
|
-
# add news page with images
|
243
|
-
News.all.each do |news|
|
244
|
-
images = news.images.collect do |image|
|
245
|
-
{ :loc => image.url, :title => image.name }
|
246
|
-
end
|
247
|
-
sitemap.add news_path(news), :images => images
|
248
|
-
end
|
249
|
-
end
|
250
|
-
|
251
|
-
Generating Multiple Sets Of Sitemaps
|
252
|
-
----------
|
253
|
-
|
254
|
-
To generate multiple sets of sitemaps you can create multiple configuration files. Each should contain a different <tt>SitemapGenerator::Sitemap.filename</tt> to avoid overwriting the previous set. (Of course you can keep the default name of 'sitemap' in one of them.) You can then build each set with a separate rake task. For example:
|
255
|
-
|
256
|
-
rake sitemap:refresh
|
257
|
-
rake sitemap:refresh CONFIG_FILE="config/geo_sitemap.rb"
|
258
|
-
<<<<<<< HEAD
|
259
|
-
|
260
|
-
The first one uses the default config file at <tt>config/sitemap.rb</tt>. Your two config files might look like this:
|
261
|
-
=======
|
262
|
-
|
263
|
-
The first one uses the default config file at <tt>config/sitemap.rb</tt>. Your first config file might look like this:
|
264
|
-
>>>>>>> lets you build multiple sitemap sets
|
265
|
-
|
266
|
-
# config/sitemap.rb
|
267
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
268
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
269
|
-
Store.each do |store
|
270
|
-
sitemap.add store_path(store)
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
<<<<<<< HEAD
|
275
|
-
=======
|
276
|
-
And the second:
|
277
|
-
>>>>>>> lets you build multiple sitemap sets
|
278
|
-
|
279
|
-
# config/geo_sitemap.rb
|
280
|
-
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
281
|
-
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
282
|
-
SitemapGenerator::Sitemap.add_links do |sitemap|
|
283
|
-
Store.each do |store
|
284
|
-
sitemap.add store_path(store, :format => :kml), :geo => { :format => 'kml' }
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
Raison d'être
|
289
|
-
-------
|
290
|
-
|
291
|
-
Most of the Sitemap plugins out there seem to try to recreate the Sitemap links by iterating the Rails routes. In some cases this is possible, but for a great deal of cases it isn't.
|
292
|
-
|
293
|
-
a) There are probably quite a few routes in your routes file that don't need inclusion in the Sitemap. (AJAX routes I'm looking at you.)
|
294
|
-
|
295
|
-
and
|
296
|
-
|
297
|
-
b) How would you infer the correct series of links for the following route?
|
298
|
-
|
299
|
-
map.zipcode 'location/:state/:city/:zipcode', :controller => 'zipcode', :action => 'index'
|
300
|
-
|
301
|
-
Don't tell me it's trivial, because it isn't. It just looks trivial.
|
302
|
-
|
303
|
-
So my idea is to have another file similar to 'routes.rb' called 'sitemap.rb', where you can define what goes into the Sitemap.
|
304
|
-
|
305
|
-
Here's my solution:
|
306
|
-
|
307
|
-
Zipcode.find(:all, :include => :city).each do |z|
|
308
|
-
sitemap.add zipcode_path(:state => z.city.state, :city => z.city, :zipcode => z)
|
309
|
-
end
|
310
|
-
|
311
|
-
Easy hey?
|
312
|
-
|
313
|
-
Other Sitemap settings for the link, like `lastmod`, `priority`, `changefreq` and `host` are entered automatically, although you can override them if you need to.
|
314
|
-
|
315
|
-
Compatibility
|
316
|
-
=======
|
317
|
-
|
318
|
-
Tested and working on:
|
319
|
-
|
320
|
-
- **Rails** 3.0.0
|
321
|
-
- **Rails** 1.x - 2.3.8
|
322
|
-
- **Ruby** 1.8.6, 1.8.7, 1.8.7 Enterprise Edition, 1.9.1
|
323
|
-
|
324
|
-
Notes
|
325
|
-
=======
|
326
|
-
|
327
|
-
1) New Capistrano deploys will remove your Sitemap files, unless you run `rake sitemap:refresh`. The way around this is to create a cap task to copy the sitemaps from the previous deploy:
|
328
|
-
|
329
|
-
after "deploy:update_code", "deploy:copy_old_sitemap"
|
330
|
-
|
331
|
-
namespace :deploy do
|
332
|
-
task :copy_old_sitemap do
|
333
|
-
run "if [ -e #{previous_release}/public/sitemap_index.xml.gz ]; then cp #{previous_release}/public/sitemap* #{current_release}/public/; fi"
|
334
|
-
end
|
335
|
-
end
|
336
|
-
|
337
|
-
Known Bugs
|
338
|
-
========
|
339
|
-
|
340
|
-
- There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
|
341
|
-
- Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
|
342
|
-
|
343
|
-
Wishlist & Coming Soon
|
344
|
-
========
|
345
|
-
|
346
|
-
- Support for read-only filesystems
|
347
|
-
- Support for plain Ruby and Merb sitemaps
|
348
|
-
|
349
|
-
Thanks (in no particular order)
|
350
|
-
========
|
351
|
-
|
352
|
-
- [Alex Soto](http://github.com/apsoto) for video sitemaps
|
353
|
-
- [Alexadre Bini](http://github.com/alexandrebini) for image sitemaps
|
354
|
-
- [Dan Pickett](http://github.com/dpickett)
|
355
|
-
- [Rob Biedenharn](http://github.com/rab)
|
356
|
-
- [Richie Vos](http://github.com/jerryvos)
|
357
|
-
- [Adrian Mugnolo](http://github.com/xymbol)
|
358
|
-
- [Jason Weathered](http://github.com/jasoncodes)
|
359
|
-
- [Andy Stewart](http://github.com/airblade)
|
360
|
-
- [Brian Armstrong](https://github.com/barmstrong) for geo sitemaps
|
361
|
-
|
362
|
-
Copyright (c) 2009 Karl Varga released under the MIT license
|
363
|
-
|
364
|
-
[canonical_repo]:http://github.com/kjvarga/sitemap_generator
|
365
|
-
[enterprise_class]:https://twitter.com/dhh/status/1631034662 "I use enterprise in the same sense the Phusion guys do - i.e. Enterprise Ruby. Please don't look down on my use of the word 'enterprise' to represent being a cut above. It doesn't mean you ever have to work for a company the size of IBM. Or constantly fight inertia, writing crappy software, adhering to change management practices and spending hours in meetings... Not that there's anything wrong with that - Wait, what?"
|
366
|
-
[sitemaps_org]:http://www.sitemaps.org/protocol.php "http://www.sitemaps.org/protocol.php"
|
367
|
-
[sitemaps_xml]:http://www.sitemaps.org/protocol.php#xmlTagDefinitions "XML Tag Definitions"
|
368
|
-
[sitemap_generator_usage]:http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage "http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage"
|
369
|
-
[sitemap_images]:http://www.google.com/support/webmasters/bin/answer.py?answer=178636
|
370
|
-
[sitemap_video]:http://www.google.com/support/webmasters/bin/topic.py?topic=10079
|
371
|
-
[sitemap_protocol]:http://sitemaps.org/protocol.php
|
372
|
-
[video_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=80472#4
|
373
|
-
[image_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=178636
|
374
|
-
[geo_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=94555
|