sitemap_generator 1.3.9 → 1.3.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +5 -5
- data/README.md +61 -2
- data/README.md.orig +374 -0
- data/VERSION +1 -1
- data/lib/sitemap_generator.rb +1 -0
- data/lib/sitemap_generator/builder/sitemap_file.rb +1 -0
- data/lib/sitemap_generator/builder/sitemap_url.rb +10 -2
- data/lib/sitemap_generator/link_set.rb +16 -6
- data/tasks/sitemap_generator_tasks.rake +1 -1
- metadata +6 -4
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: ./
|
3
3
|
specs:
|
4
|
-
sitemap_generator (1.3.
|
4
|
+
sitemap_generator (1.3.9)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: http://rubygems.org/
|
@@ -28,13 +28,13 @@ GEM
|
|
28
28
|
gemcutter (>= 0.1.0)
|
29
29
|
git (>= 1.2.5)
|
30
30
|
rubyforge (>= 2.0.0)
|
31
|
-
json (1.
|
32
|
-
json_pure (1.
|
31
|
+
json (1.5.1)
|
32
|
+
json_pure (1.5.1)
|
33
33
|
linecache (0.43)
|
34
34
|
mocha (0.9.10)
|
35
35
|
rake
|
36
36
|
nokogiri (1.4.4)
|
37
|
-
rack (1.1.
|
37
|
+
rack (1.1.1)
|
38
38
|
rails (2.3.8)
|
39
39
|
actionmailer (= 2.3.8)
|
40
40
|
actionpack (= 2.3.8)
|
@@ -57,7 +57,7 @@ GEM
|
|
57
57
|
sqlite3-ruby (1.3.1)
|
58
58
|
text-format (1.0.0)
|
59
59
|
text-hyphen (~> 1.0.0)
|
60
|
-
text-hyphen (1.0.
|
60
|
+
text-hyphen (1.0.2)
|
61
61
|
|
62
62
|
PLATFORMS
|
63
63
|
ruby
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ SitemapGenerator generates Sitemaps for your Rails application. The Sitemaps ad
|
|
6
6
|
Features
|
7
7
|
-------
|
8
8
|
|
9
|
-
- Supports [Video sitemaps][sitemap_video]
|
9
|
+
- Supports [Video sitemaps][sitemap_video], [Image sitemaps][sitemap_images], and [Geo sitemaps][geo_tags]
|
10
10
|
- Rails 2.x and 3.x compatible
|
11
11
|
- Adheres to the [Sitemap 0.9 protocol][sitemap_protocol]
|
12
12
|
- Handles millions of links
|
@@ -18,6 +18,7 @@ Features
|
|
18
18
|
Changelog
|
19
19
|
-------
|
20
20
|
|
21
|
+
- v1.4.0: [Geo sitemap][geo_tags] support, support for generate multiple sitemap sets with different filenames
|
21
22
|
- v1.3.0: Support setting the sitemaps path
|
22
23
|
- v1.2.0: Verified working with Rails 3 stable release
|
23
24
|
- v1.1.0: [Video sitemap][sitemap_video] support
|
@@ -140,6 +141,17 @@ Supported video options include:
|
|
140
141
|
* `category`
|
141
142
|
* `gallery_loc`
|
142
143
|
* `uploader` (use `uploader_info` to set the info attribute)
|
144
|
+
|
145
|
+
Geo Sitemaps
|
146
|
+
-----------
|
147
|
+
|
148
|
+
Page with geo data can be added by passing a <tt>:geo</tt> Hash to <tt>add()</tt>. The Hash only supports one tag of <tt>:format</tt>. Google provides an [example of a geo sitemap link here][geo_tags]. Note that the sitemap does not actually contain your KML or GeoRSS. It merely links to a page that has this content.
|
149
|
+
|
150
|
+
sitemap.add('/stores/1234.xml', :geo => { :format => 'kml' })
|
151
|
+
|
152
|
+
Supported geo options include:
|
153
|
+
|
154
|
+
* `format` Required, either 'kml' or 'georss'
|
143
155
|
|
144
156
|
Configuration
|
145
157
|
======
|
@@ -192,6 +204,15 @@ You must set the <tt>default_host</tt> that is to be used when adding links to y
|
|
192
204
|
|
193
205
|
The hostname must include the full protocol.
|
194
206
|
|
207
|
+
Sitemap Filenames
|
208
|
+
----------
|
209
|
+
|
210
|
+
By default sitemaps have the name <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt>, etc with the sitemap index having name <tt>sitemap_index.xml.gz</tt>.
|
211
|
+
|
212
|
+
If you want to change the <tt>sitemap</tt> portion of the name you can set it as shown below. The surrounding structure of numbers, extensions, and _index will stay the same. For example:
|
213
|
+
|
214
|
+
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
215
|
+
|
195
216
|
Example Configuration File
|
196
217
|
---------
|
197
218
|
|
@@ -227,6 +248,42 @@ Example Configuration File
|
|
227
248
|
end
|
228
249
|
end
|
229
250
|
|
251
|
+
Generating Multiple Sets Of Sitemaps
|
252
|
+
----------
|
253
|
+
|
254
|
+
To generate multiple sets of sitemaps you can create multiple configuration files. Each should contain a different <tt>SitemapGenerator::Sitemap.filename</tt> to avoid overwriting the previous set. (Of course you can keep the default name of 'sitemap' in one of them.) You can then build each set with a separate rake task. For example:
|
255
|
+
|
256
|
+
rake sitemap:refresh
|
257
|
+
rake sitemap:refresh CONFIG_FILE="config/geo_sitemap.rb"
|
258
|
+
|
259
|
+
The first one uses the default config file at <tt>config/sitemap.rb</tt>. Your first config file might look like this:
|
260
|
+
|
261
|
+
# config/sitemap.rb
|
262
|
+
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
263
|
+
SitemapGenerator::Sitemap.add_links do |sitemap|
|
264
|
+
Store.each do |store
|
265
|
+
sitemap.add store_path(store)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
And the second:
|
270
|
+
|
271
|
+
# config/geo_sitemap.rb
|
272
|
+
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
273
|
+
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
274
|
+
SitemapGenerator::Sitemap.add_links do |sitemap|
|
275
|
+
Store.each do |store
|
276
|
+
sitemap.add "stores/#{store.id}.xml", :geo => { :format => 'kml' }
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
After running both rake tasks you'll have the following files in your <tt>public</tt> directory (or wherever you set the sitemaps_path):
|
281
|
+
|
282
|
+
geo_sitemap_index.xml.gz
|
283
|
+
geo_sitemap1.xml.gz
|
284
|
+
sitemap_index.xml.gz
|
285
|
+
sitemap1.xml.gz
|
286
|
+
|
230
287
|
Raison d'être
|
231
288
|
-------
|
232
289
|
|
@@ -299,6 +356,7 @@ Thanks (in no particular order)
|
|
299
356
|
- [Adrian Mugnolo](http://github.com/xymbol)
|
300
357
|
- [Jason Weathered](http://github.com/jasoncodes)
|
301
358
|
- [Andy Stewart](http://github.com/airblade)
|
359
|
+
- [Brian Armstrong](https://github.com/barmstrong) for geo sitemaps
|
302
360
|
|
303
361
|
Copyright (c) 2009 Karl Varga released under the MIT license
|
304
362
|
|
@@ -311,4 +369,5 @@ Copyright (c) 2009 Karl Varga released under the MIT license
|
|
311
369
|
[sitemap_video]:http://www.google.com/support/webmasters/bin/topic.py?topic=10079
|
312
370
|
[sitemap_protocol]:http://sitemaps.org/protocol.php
|
313
371
|
[video_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=80472#4
|
314
|
-
[image_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=178636
|
372
|
+
[image_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=178636
|
373
|
+
[geo_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=94555
|
data/README.md.orig
ADDED
@@ -0,0 +1,374 @@
|
|
1
|
+
SitemapGenerator
|
2
|
+
================
|
3
|
+
|
4
|
+
SitemapGenerator generates Sitemaps for your Rails application. The Sitemaps adhere to the [Sitemap 0.9 protocol][sitemap_protocol] specification. You specify the contents of your Sitemap using a configuration file, à la Rails Routes. A set of rake tasks is included to help you manage your Sitemaps.
|
5
|
+
|
6
|
+
Features
|
7
|
+
-------
|
8
|
+
|
9
|
+
- Supports [Video sitemaps][sitemap_video], [Image sitemaps][sitemap_images], and [Geo sitemaps][geo_tags]
|
10
|
+
- Rails 2.x and 3.x compatible
|
11
|
+
- Adheres to the [Sitemap 0.9 protocol][sitemap_protocol]
|
12
|
+
- Handles millions of links
|
13
|
+
- Compresses Sitemaps using GZip
|
14
|
+
- Notifies Search Engines (Google, Yahoo, Bing, Ask, SitemapWriter) of new sitemaps
|
15
|
+
- Ensures your old Sitemaps stay in place if the new Sitemap fails to generate
|
16
|
+
- You set the hostname (and protocol) of the links in your Sitemap
|
17
|
+
|
18
|
+
Changelog
|
19
|
+
-------
|
20
|
+
|
21
|
+
- v1.4.0: [Geo sitemap][geo_tags] support, support for generate multiple sitemap sets with different filenames
|
22
|
+
- v1.3.0: Support setting the sitemaps path
|
23
|
+
- v1.2.0: Verified working with Rails 3 stable release
|
24
|
+
- v1.1.0: [Video sitemap][sitemap_video] support
|
25
|
+
- v0.2.6: [Image Sitemap][sitemap_images] support
|
26
|
+
- v0.2.5: Rails 3 prerelease support (beta)
|
27
|
+
|
28
|
+
Foreword
|
29
|
+
-------
|
30
|
+
|
31
|
+
Adam Salter first created SitemapGenerator while we were working together in Sydney, Australia. Unfortunately, he passed away in 2009. Since then I have taken over development of SitemapGenerator.
|
32
|
+
|
33
|
+
Those who knew him know what an amazing guy he was, and what an excellent Rails programmer he was. His passing is a great loss to the Rails community.
|
34
|
+
|
35
|
+
The canonical repository is now: [http://github.com/kjvarga/sitemap_generator][canonical_repo]
|
36
|
+
|
37
|
+
Install
|
38
|
+
=======
|
39
|
+
|
40
|
+
**Rails 3:**
|
41
|
+
|
42
|
+
1. Add the gem to your `Gemfile`
|
43
|
+
|
44
|
+
gem 'sitemap_generator'
|
45
|
+
|
46
|
+
2. `$ rake sitemap:install`
|
47
|
+
|
48
|
+
You don't need to include the tasks in your `Rakefile` because the tasks are loaded for you.
|
49
|
+
|
50
|
+
**Pre Rails 3: As a gem**
|
51
|
+
|
52
|
+
1. Add the gem as a dependency in your <tt>config/environment.rb</tt>
|
53
|
+
|
54
|
+
config.gem 'sitemap_generator', :lib => false
|
55
|
+
|
56
|
+
2. `$ rake gems:install`
|
57
|
+
|
58
|
+
3. Add the following to your `Rakefile`
|
59
|
+
|
60
|
+
begin
|
61
|
+
require 'sitemap_generator/tasks'
|
62
|
+
rescue Exception => e
|
63
|
+
puts "Warning, couldn't load gem tasks: #{e.message}! Skipping..."
|
64
|
+
end
|
65
|
+
|
66
|
+
4. `$ rake sitemap:install`
|
67
|
+
|
68
|
+
**Pre Rails 3: As a plugin**
|
69
|
+
|
70
|
+
1. `$ ./script/plugin install git://github.com/kjvarga/sitemap_generator.git`
|
71
|
+
|
72
|
+
Usage
|
73
|
+
======
|
74
|
+
|
75
|
+
<code>rake sitemap:install</code> creates a <tt>config/sitemap.rb</tt> file which contains your logic for generating the Sitemap files.
|
76
|
+
|
77
|
+
Once you have configured your sitemap in <tt>config/sitemap.rb</tt> (see Configuration below) run <code>rake sitemap:refresh</code> as needed to create/rebuild your Sitemap files. Sitemaps are generated into the <tt>public/</tt> folder and are named <tt>sitemap_index.xml.gz</tt>, <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt>, etc.
|
78
|
+
|
79
|
+
Using <code>rake sitemap:refresh</code> will notify major search engines to let them know that a new Sitemap is available (Google, Yahoo, Bing, Ask, SitemapWriter). To generate new Sitemaps without notifying search engines (for example when running in a local environment) use <code>rake sitemap:refresh:no_ping</code>.
|
80
|
+
|
81
|
+
To ping Yahoo you will need to set your Yahoo AppID in <tt>config/sitemap.rb</tt>. For example: <code>SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"</code>
|
82
|
+
|
83
|
+
To disable all non-essential output (only errors will be displayed) run the rake tasks with the <code>-s</code> option. For example <code>rake -s sitemap:refresh</code>.
|
84
|
+
|
85
|
+
Cron
|
86
|
+
-----
|
87
|
+
|
88
|
+
To keep your Sitemaps up-to-date, setup a cron job. Make sure to pass the <code>-s</code> option to silence rake. That way you will only get email when the sitemap build fails.
|
89
|
+
|
90
|
+
If you're using Whenever, your schedule would look something like the following:
|
91
|
+
|
92
|
+
# config/schedule.rb
|
93
|
+
every 1.day, :at => '5:00 am' do
|
94
|
+
rake "-s sitemap:refresh"
|
95
|
+
end
|
96
|
+
|
97
|
+
Robots.txt
|
98
|
+
----------
|
99
|
+
|
100
|
+
You should add the Sitemap index file to <code>public/robots.txt</code> to help search engines find your Sitemaps. The URL should be the complete URL to the Sitemap index file. For example:
|
101
|
+
|
102
|
+
Sitemap: http://www.example.org/sitemap_index.xml.gz
|
103
|
+
|
104
|
+
Image Sitemaps
|
105
|
+
-----------
|
106
|
+
|
107
|
+
Images can be added to a sitemap URL by passing an <tt>:images</tt> array to <tt>add()</tt>. Each item in the array must be a Hash containing tags defined by the [Image Sitemap][image_tags] specification. For example:
|
108
|
+
|
109
|
+
sitemap.add('/index.html', :images => [{ :loc => 'http://www.example.com/image.png', :title => 'Image' }])
|
110
|
+
|
111
|
+
Supported image options include:
|
112
|
+
|
113
|
+
* `loc` Required, location of the image
|
114
|
+
* `caption`
|
115
|
+
* `geo_location`
|
116
|
+
* `title`
|
117
|
+
* `license`
|
118
|
+
|
119
|
+
Video Sitemaps
|
120
|
+
-----------
|
121
|
+
|
122
|
+
A video can be added to a sitemap URL by passing a <tt>:video</tt> Hash to <tt>add()</tt>. The Hash can contain tags defined by the [Video Sitemap specification][video_tags]. To associate more than one <tt>tag</tt> with a video, pass the tags as an array with the key <tt>:tags</tt>.
|
123
|
+
|
124
|
+
sitemap.add('/index.html', :video => { :thumbnail_loc => 'http://www.example.com/video1_thumbnail.png', :title => 'Title', :description => 'Description', :content_loc => 'http://www.example.com/cool_video.mpg', :tags => %w[one two three], :category => 'Category' })
|
125
|
+
|
126
|
+
Supported video options include:
|
127
|
+
|
128
|
+
* `thumbnail_loc` Required
|
129
|
+
* `title` Required
|
130
|
+
* `description` Required
|
131
|
+
* `content_loc` Depends. At least one of `player_loc` or `content_loc` is required
|
132
|
+
* `player_loc` Depends. At least one of `player_loc` or `content_loc` is required
|
133
|
+
* `expiration_date` Recommended
|
134
|
+
* `duration` Recommended
|
135
|
+
* `rating`
|
136
|
+
* `view_count`
|
137
|
+
* `publication_date`
|
138
|
+
* `family_friendly`
|
139
|
+
* `tags` A list of tags if more than one tag.
|
140
|
+
* `tag` A single tag. See `tags`
|
141
|
+
* `category`
|
142
|
+
* `gallery_loc`
|
143
|
+
* `uploader` (use `uploader_info` to set the info attribute)
|
144
|
+
|
145
|
+
Geo Sitemaps
|
146
|
+
-----------
|
147
|
+
|
148
|
+
Page with geo data can be added by passing a <tt>:geo</tt> Hash to <tt>add()</tt>. The Hash only supports one tag of <tt>:format</tt>. Google provides an [example of a geo sitemap link here][geo_tags]. Note that the sitemap does not actually contain your KML or GeoRSS. It merely links to a page that has this content.
|
149
|
+
|
150
|
+
sitemap.add('/restaurants/1234.kml', :geo => { :format => 'kml' })
|
151
|
+
|
152
|
+
Supported geo options include:
|
153
|
+
|
154
|
+
* `format` Required, either 'kml' or 'georss'
|
155
|
+
|
156
|
+
Configuration
|
157
|
+
======
|
158
|
+
|
159
|
+
The sitemap configuration file can be found in <tt>config/sitemap.rb</tt>. When you run a rake task to refresh your sitemaps this file is evaluated. It contains all your configuration settings, as well as your sitemap definition.
|
160
|
+
|
161
|
+
Sitemap Links
|
162
|
+
----------
|
163
|
+
|
164
|
+
The Root Path <tt>/</tt> and Sitemap Index file are automatically added to your sitemap. Links are added to the Sitemap output in the order they are specified. Add links to your sitemap by calling <tt>add_links</tt>, passing a black which receives the sitemap object. Then call <tt>add(path, options)</tt> on the sitemap to add a link.
|
165
|
+
|
166
|
+
For Example:
|
167
|
+
|
168
|
+
SitemapGenerator::Sitemap.add_links do |sitemap|
|
169
|
+
sitemap.add '/reports'
|
170
|
+
end
|
171
|
+
|
172
|
+
The Rails URL helpers are automatically included for you if Rails is detected. So in your call to <tt>add</tt> you can use them to generate paths for your active records, e.g.:
|
173
|
+
|
174
|
+
Article.find_each do |article|
|
175
|
+
sitemap.add article_path(article), :lastmod => article.updated_at
|
176
|
+
end
|
177
|
+
|
178
|
+
For large sitemaps it is advisable to iterate through your Active Records in batches to avoid loading all records into memory at once. As of Rails 2.3.2 you can use <tt>ActiveRecord::Base#find_each</tt> or <tt>ActiveRecord::Base#find_in_batches</tt> to do batched finds, which can significantly improve sitemap performance.
|
179
|
+
|
180
|
+
Valid [options to <tt>add</tt>](http://sitemaps.org/protocol.php#xmlTagDefinitions) are:
|
181
|
+
|
182
|
+
* `priority` The priority of this URL relative to other URLs on your site. Valid values range from 0.0 to 1.0. Default _0.5_
|
183
|
+
* `changefreq` One of: always, hourly, daily, weekly, monthly, yearly, never. Default _weekly_
|
184
|
+
* `lastmod` Time instance. The date of last modification. Default `Time.now`
|
185
|
+
* `host` Optional host for the link's URL. Defaults to `default_host`
|
186
|
+
|
187
|
+
Sitemaps Path
|
188
|
+
----------
|
189
|
+
|
190
|
+
By default sitemaps are generated into <tt>public/</tt>. You can customize the location for your generated sitemaps by setting <tt>sitemaps_path</tt> to a path relative to your public directory. The directory will be created for you if it does not already exist.
|
191
|
+
|
192
|
+
For example:
|
193
|
+
|
194
|
+
SitemapGenerator::Sitemap.sitemaps_path = 'sitemaps/'
|
195
|
+
|
196
|
+
Will generate sitemaps into the `public/sitemaps/` directory. If you want your sitemaps to be findable by robots, you need to specify the location of your sitemap index file in your <tt>public/robots.txt</tt>.
|
197
|
+
|
198
|
+
Sitemaps Host
|
199
|
+
----------
|
200
|
+
|
201
|
+
You must set the <tt>default_host</tt> that is to be used when adding links to your sitemap. The hostname should match the host that the sitemaps are going to be served from. For example:
|
202
|
+
|
203
|
+
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
204
|
+
|
205
|
+
The hostname must include the full protocol.
|
206
|
+
|
207
|
+
Sitemap Filenames
|
208
|
+
----------
|
209
|
+
|
210
|
+
By default sitemaps have the name <tt>sitemap1.xml.gz</tt>, <tt>sitemap2.xml.gz</tt>, etc with the sitemap index having name <tt>sitemap_index.xml.gz</tt>.
|
211
|
+
|
212
|
+
If you want to change the <tt>sitemap</tt> portion of the name you can set it as shown below. The surrounding structure of numbers, extensions, and _index will stay the same. For example:
|
213
|
+
|
214
|
+
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
215
|
+
|
216
|
+
Example Configuration File
|
217
|
+
---------
|
218
|
+
|
219
|
+
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
220
|
+
SitemapGenerator::Sitemap.yahoo_app_id = nil # Set to your Yahoo AppID to ping Yahoo
|
221
|
+
|
222
|
+
SitemapGenerator::Sitemap.add_links do |sitemap|
|
223
|
+
# Put links creation logic here.
|
224
|
+
#
|
225
|
+
# The Root Path ('/') and Sitemap Index file are added automatically.
|
226
|
+
# Links are added to the Sitemap output in the order they are specified.
|
227
|
+
#
|
228
|
+
# Usage: sitemap.add path, options
|
229
|
+
# (default options are used if you don't specify them)
|
230
|
+
#
|
231
|
+
# Defaults: :priority => 0.5, :changefreq => 'weekly',
|
232
|
+
# :lastmod => Time.now, :host => default_host
|
233
|
+
|
234
|
+
# add '/articles'
|
235
|
+
sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily'
|
236
|
+
|
237
|
+
# add all articles
|
238
|
+
Article.all.each do |a|
|
239
|
+
sitemap.add article_path(a), :lastmod => a.updated_at
|
240
|
+
end
|
241
|
+
|
242
|
+
# add news page with images
|
243
|
+
News.all.each do |news|
|
244
|
+
images = news.images.collect do |image|
|
245
|
+
{ :loc => image.url, :title => image.name }
|
246
|
+
end
|
247
|
+
sitemap.add news_path(news), :images => images
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
Generating Multiple Sets Of Sitemaps
|
252
|
+
----------
|
253
|
+
|
254
|
+
To generate multiple sets of sitemaps you can create multiple configuration files. Each should contain a different <tt>SitemapGenerator::Sitemap.filename</tt> to avoid overwriting the previous set. (Of course you can keep the default name of 'sitemap' in one of them.) You can then build each set with a separate rake task. For example:
|
255
|
+
|
256
|
+
rake sitemap:refresh
|
257
|
+
rake sitemap:refresh CONFIG_FILE="config/geo_sitemap.rb"
|
258
|
+
<<<<<<< HEAD
|
259
|
+
|
260
|
+
The first one uses the default config file at <tt>config/sitemap.rb</tt>. Your two config files might look like this:
|
261
|
+
=======
|
262
|
+
|
263
|
+
The first one uses the default config file at <tt>config/sitemap.rb</tt>. Your first config file might look like this:
|
264
|
+
>>>>>>> lets you build multiple sitemap sets
|
265
|
+
|
266
|
+
# config/sitemap.rb
|
267
|
+
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
268
|
+
SitemapGenerator::Sitemap.add_links do |sitemap|
|
269
|
+
Store.each do |store
|
270
|
+
sitemap.add store_path(store)
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
<<<<<<< HEAD
|
275
|
+
=======
|
276
|
+
And the second:
|
277
|
+
>>>>>>> lets you build multiple sitemap sets
|
278
|
+
|
279
|
+
# config/geo_sitemap.rb
|
280
|
+
SitemapGenerator::Sitemap.filename = "geo_sitemap"
|
281
|
+
SitemapGenerator::Sitemap.default_host = "http://www.example.com"
|
282
|
+
SitemapGenerator::Sitemap.add_links do |sitemap|
|
283
|
+
Store.each do |store
|
284
|
+
sitemap.add store_path(store, :format => :kml), :geo => { :format => 'kml' }
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
Raison d'être
|
289
|
+
-------
|
290
|
+
|
291
|
+
Most of the Sitemap plugins out there seem to try to recreate the Sitemap links by iterating the Rails routes. In some cases this is possible, but for a great deal of cases it isn't.
|
292
|
+
|
293
|
+
a) There are probably quite a few routes in your routes file that don't need inclusion in the Sitemap. (AJAX routes I'm looking at you.)
|
294
|
+
|
295
|
+
and
|
296
|
+
|
297
|
+
b) How would you infer the correct series of links for the following route?
|
298
|
+
|
299
|
+
map.zipcode 'location/:state/:city/:zipcode', :controller => 'zipcode', :action => 'index'
|
300
|
+
|
301
|
+
Don't tell me it's trivial, because it isn't. It just looks trivial.
|
302
|
+
|
303
|
+
So my idea is to have another file similar to 'routes.rb' called 'sitemap.rb', where you can define what goes into the Sitemap.
|
304
|
+
|
305
|
+
Here's my solution:
|
306
|
+
|
307
|
+
Zipcode.find(:all, :include => :city).each do |z|
|
308
|
+
sitemap.add zipcode_path(:state => z.city.state, :city => z.city, :zipcode => z)
|
309
|
+
end
|
310
|
+
|
311
|
+
Easy hey?
|
312
|
+
|
313
|
+
Other Sitemap settings for the link, like `lastmod`, `priority`, `changefreq` and `host` are entered automatically, although you can override them if you need to.
|
314
|
+
|
315
|
+
Compatibility
|
316
|
+
=======
|
317
|
+
|
318
|
+
Tested and working on:
|
319
|
+
|
320
|
+
- **Rails** 3.0.0
|
321
|
+
- **Rails** 1.x - 2.3.8
|
322
|
+
- **Ruby** 1.8.6, 1.8.7, 1.8.7 Enterprise Edition, 1.9.1
|
323
|
+
|
324
|
+
Notes
|
325
|
+
=======
|
326
|
+
|
327
|
+
1) New Capistrano deploys will remove your Sitemap files, unless you run `rake sitemap:refresh`. The way around this is to create a cap task to copy the sitemaps from the previous deploy:
|
328
|
+
|
329
|
+
after "deploy:update_code", "deploy:copy_old_sitemap"
|
330
|
+
|
331
|
+
namespace :deploy do
|
332
|
+
task :copy_old_sitemap do
|
333
|
+
run "if [ -e #{previous_release}/public/sitemap_index.xml.gz ]; then cp #{previous_release}/public/sitemap* #{current_release}/public/; fi"
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
Known Bugs
|
338
|
+
========
|
339
|
+
|
340
|
+
- There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
|
341
|
+
- Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
|
342
|
+
|
343
|
+
Wishlist & Coming Soon
|
344
|
+
========
|
345
|
+
|
346
|
+
- Support for read-only filesystems
|
347
|
+
- Support for plain Ruby and Merb sitemaps
|
348
|
+
|
349
|
+
Thanks (in no particular order)
|
350
|
+
========
|
351
|
+
|
352
|
+
- [Alex Soto](http://github.com/apsoto) for video sitemaps
|
353
|
+
- [Alexadre Bini](http://github.com/alexandrebini) for image sitemaps
|
354
|
+
- [Dan Pickett](http://github.com/dpickett)
|
355
|
+
- [Rob Biedenharn](http://github.com/rab)
|
356
|
+
- [Richie Vos](http://github.com/jerryvos)
|
357
|
+
- [Adrian Mugnolo](http://github.com/xymbol)
|
358
|
+
- [Jason Weathered](http://github.com/jasoncodes)
|
359
|
+
- [Andy Stewart](http://github.com/airblade)
|
360
|
+
- [Brian Armstrong](https://github.com/barmstrong) for geo sitemaps
|
361
|
+
|
362
|
+
Copyright (c) 2009 Karl Varga released under the MIT license
|
363
|
+
|
364
|
+
[canonical_repo]:http://github.com/kjvarga/sitemap_generator
|
365
|
+
[enterprise_class]:https://twitter.com/dhh/status/1631034662 "I use enterprise in the same sense the Phusion guys do - i.e. Enterprise Ruby. Please don't look down on my use of the word 'enterprise' to represent being a cut above. It doesn't mean you ever have to work for a company the size of IBM. Or constantly fight inertia, writing crappy software, adhering to change management practices and spending hours in meetings... Not that there's anything wrong with that - Wait, what?"
|
366
|
+
[sitemaps_org]:http://www.sitemaps.org/protocol.php "http://www.sitemaps.org/protocol.php"
|
367
|
+
[sitemaps_xml]:http://www.sitemaps.org/protocol.php#xmlTagDefinitions "XML Tag Definitions"
|
368
|
+
[sitemap_generator_usage]:http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage "http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage"
|
369
|
+
[sitemap_images]:http://www.google.com/support/webmasters/bin/answer.py?answer=178636
|
370
|
+
[sitemap_video]:http://www.google.com/support/webmasters/bin/topic.py?topic=10079
|
371
|
+
[sitemap_protocol]:http://sitemaps.org/protocol.php
|
372
|
+
[video_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=80472#4
|
373
|
+
[image_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=178636
|
374
|
+
[geo_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=94555
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.3.
|
1
|
+
1.3.10
|
data/lib/sitemap_generator.rb
CHANGED
@@ -41,6 +41,7 @@ module SitemapGenerator
|
|
41
41
|
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
42
42
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
43
43
|
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"
|
44
|
+
xmlns:geo="http://www.google.com/geo/schemas/sitemap/1.0"
|
44
45
|
>
|
45
46
|
HTML
|
46
47
|
@xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').strip!
|
@@ -14,7 +14,7 @@ module SitemapGenerator
|
|
14
14
|
path = path.sitemap_path
|
15
15
|
end
|
16
16
|
|
17
|
-
options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images, :video)
|
17
|
+
options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images, :video, :geo)
|
18
18
|
options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
|
19
19
|
self.merge!(
|
20
20
|
:path => path,
|
@@ -24,7 +24,8 @@ module SitemapGenerator
|
|
24
24
|
:host => options[:host],
|
25
25
|
:loc => URI.join(options[:host], path).to_s,
|
26
26
|
:images => prepare_images(options[:images], options[:host]),
|
27
|
-
:video => options[:video]
|
27
|
+
:video => options[:video],
|
28
|
+
:geo => options[:geo]
|
28
29
|
)
|
29
30
|
end
|
30
31
|
|
@@ -76,6 +77,13 @@ module SitemapGenerator
|
|
76
77
|
end
|
77
78
|
end
|
78
79
|
end
|
80
|
+
|
81
|
+
unless self[:geo].blank?
|
82
|
+
geo = self[:geo]
|
83
|
+
builder.geo :geo do
|
84
|
+
builder.geo :format, geo[:format] if geo[:format]
|
85
|
+
end
|
86
|
+
end
|
79
87
|
end
|
80
88
|
builder << '' # Force to string
|
81
89
|
end
|
@@ -7,7 +7,7 @@ module SitemapGenerator
|
|
7
7
|
class LinkSet
|
8
8
|
include ActionView::Helpers::NumberHelper # for number_with_delimiter
|
9
9
|
|
10
|
-
attr_reader :default_host, :public_path, :sitemaps_path
|
10
|
+
attr_reader :default_host, :public_path, :sitemaps_path, :filename
|
11
11
|
attr_accessor :sitemap, :sitemap_index
|
12
12
|
attr_accessor :verbose, :yahoo_app_id
|
13
13
|
|
@@ -18,7 +18,7 @@ module SitemapGenerator
|
|
18
18
|
#
|
19
19
|
# TODO: Refactor so that we can have multiple instances
|
20
20
|
# of LinkSet.
|
21
|
-
def create(&block)
|
21
|
+
def create(config_file = 'config/sitemap.rb', &block)
|
22
22
|
require 'sitemap_generator/interpreter'
|
23
23
|
|
24
24
|
start_time = Time.now
|
@@ -27,7 +27,7 @@ module SitemapGenerator
|
|
27
27
|
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(@public_path, new_sitemap_path)
|
28
28
|
end
|
29
29
|
|
30
|
-
SitemapGenerator::Interpreter.new(self, &block)
|
30
|
+
SitemapGenerator::Interpreter.new(self, config_file, &block)
|
31
31
|
unless self.sitemap.finalized?
|
32
32
|
self.sitemap_index.add(self.sitemap)
|
33
33
|
puts self.sitemap.summary if verbose
|
@@ -52,10 +52,14 @@ module SitemapGenerator
|
|
52
52
|
#
|
53
53
|
# <tt>default_host</tt> hostname including protocol to use in all sitemap links
|
54
54
|
# e.g. http://en.google.ca
|
55
|
-
|
55
|
+
#
|
56
|
+
# <tt>filename</tt> used in the name of the file like "#{@filename}1.xml.gzip" and "#{@filename}_index.xml.gzip"
|
57
|
+
# Defaults to <tt>sitemap</tt>
|
58
|
+
def initialize(public_path = nil, sitemaps_path = nil, default_host = nil, filename = 'sitemap')
|
56
59
|
@default_host = default_host
|
57
60
|
@public_path = public_path
|
58
61
|
@sitemaps_path = sitemaps_path
|
62
|
+
@filename = filename
|
59
63
|
|
60
64
|
if @public_path.nil?
|
61
65
|
@public_path = File.join(::Rails.root, 'public/') rescue 'public/'
|
@@ -158,13 +162,19 @@ module SitemapGenerator
|
|
158
162
|
self.sitemap.sitemap_path = new_sitemap_path unless self.sitemap.finalized?
|
159
163
|
end
|
160
164
|
|
165
|
+
def filename=(value)
|
166
|
+
@filename = value
|
167
|
+
self.sitemap_index.sitemap_path = sitemap_index_path unless self.sitemap_index.finalized?
|
168
|
+
self.sitemap.sitemap_path = new_sitemap_path unless self.sitemap.finalized?
|
169
|
+
end
|
170
|
+
|
161
171
|
protected
|
162
172
|
|
163
173
|
# Return the current sitemap filename with index.
|
164
174
|
#
|
165
175
|
# The index depends on the length of the <tt>sitemaps</tt> array.
|
166
176
|
def new_sitemap_path
|
167
|
-
File.join(self.sitemaps_path || '', "
|
177
|
+
File.join(self.sitemaps_path || '', "#{@filename}#{self.sitemap_index.sitemaps.length + 1}.xml.gz")
|
168
178
|
end
|
169
179
|
|
170
180
|
# Return the current sitemap index filename.
|
@@ -172,7 +182,7 @@ module SitemapGenerator
|
|
172
182
|
# At the moment we only support one index file which can link to
|
173
183
|
# up to 50,000 sitemap files.
|
174
184
|
def sitemap_index_path
|
175
|
-
File.join(self.sitemaps_path || '',
|
185
|
+
File.join(self.sitemaps_path || '', "#{@filename}_index.xml.gz")
|
176
186
|
end
|
177
187
|
end
|
178
188
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 1.3.
|
9
|
+
- 10
|
10
|
+
version: 1.3.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Karl Varga
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-03-05 00:00:00 -08:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -111,11 +111,13 @@ extensions: []
|
|
111
111
|
|
112
112
|
extra_rdoc_files:
|
113
113
|
- README.md
|
114
|
+
- README.md.orig
|
114
115
|
files:
|
115
116
|
- Gemfile
|
116
117
|
- Gemfile.lock
|
117
118
|
- MIT-LICENSE
|
118
119
|
- README.md
|
120
|
+
- README.md.orig
|
119
121
|
- Rakefile
|
120
122
|
- VERSION
|
121
123
|
- lib/sitemap_generator.rb
|