sitemap_generator 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +0 -1
- data/Rakefile +28 -16
- data/VERSION +1 -1
- data/lib/sitemap_generator/builder/helper.rb +10 -0
- data/lib/sitemap_generator/builder/sitemap_file.rb +114 -0
- data/lib/sitemap_generator/builder/sitemap_index_file.rb +24 -0
- data/lib/sitemap_generator/builder.rb +9 -0
- data/lib/sitemap_generator/interpreter.rb +28 -0
- data/lib/sitemap_generator/link.rb +28 -23
- data/lib/sitemap_generator/link_set.rb +132 -125
- data/lib/sitemap_generator/templates.rb +1 -3
- data/lib/sitemap_generator/utilities.rb +28 -3
- data/lib/sitemap_generator.rb +7 -7
- data/tasks/sitemap_generator_tasks.rake +3 -15
- metadata +9 -8
- data/lib/sitemap_generator/helper.rb +0 -55
- data/lib/sitemap_generator/rails_helper.rb +0 -28
- data/templates/sitemap_index.builder +0 -23
- data/templates/xml_sitemap.builder +0 -38
data/README.md
CHANGED
@@ -192,7 +192,6 @@ Notes
|
|
192
192
|
Known Bugs
|
193
193
|
========
|
194
194
|
|
195
|
-
- Sitemaps.org [states][sitemaps_org] that no Sitemap XML file should be more than 10Mb uncompressed. The plugin will warn you about this, but does nothing to avoid it (like move some URLs into a later file).
|
196
195
|
- There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml].
|
197
196
|
- Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this.
|
198
197
|
|
data/Rakefile
CHANGED
@@ -22,6 +22,28 @@ rescue LoadError
|
|
22
22
|
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
23
23
|
end
|
24
24
|
|
25
|
+
#
|
26
|
+
# Helper methods
|
27
|
+
#
|
28
|
+
module Helpers
|
29
|
+
extend self
|
30
|
+
|
31
|
+
# Return a full local path to path fragment <tt>path</tt>
|
32
|
+
def local_path(path)
|
33
|
+
File.join(File.dirname(__FILE__), path)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Copy all of the local files into <tt>path</tt> after completely cleaning it
|
37
|
+
def prepare_path(path)
|
38
|
+
rm_rf path
|
39
|
+
mkdir_p path
|
40
|
+
cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Tasks
|
46
|
+
#
|
25
47
|
task :default => :test
|
26
48
|
|
27
49
|
namespace :test do
|
@@ -37,29 +59,19 @@ namespace :test do
|
|
37
59
|
namespace :prepare do
|
38
60
|
task :gem do
|
39
61
|
ENV["SITEMAP_RAILS"] = 'gem'
|
40
|
-
prepare_path(local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
|
41
|
-
rm_rf(local_path('spec/mock_app_gem/public/sitemap*'))
|
62
|
+
Helpers.prepare_path(Helpers.local_path('spec/mock_app_gem/vendor/gems/sitemap_generator-1.2.3'))
|
63
|
+
rm_rf(Helpers.local_path('spec/mock_app_gem/public/sitemap*'))
|
42
64
|
end
|
43
65
|
|
44
66
|
task :plugin do
|
45
67
|
ENV["SITEMAP_RAILS"] = 'plugin'
|
46
|
-
prepare_path(local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
|
47
|
-
rm_rf(local_path('spec/mock_app_plugin/public/sitemap*'))
|
68
|
+
Helpers.prepare_path(Helpers.local_path('spec/mock_app_plugin/vendor/plugins/sitemap_generator-1.2.3'))
|
69
|
+
rm_rf(Helpers.local_path('spec/mock_app_plugin/public/sitemap*'))
|
48
70
|
end
|
49
71
|
|
50
72
|
task :rails3 do
|
51
73
|
ENV["SITEMAP_RAILS"] = 'rails3'
|
52
|
-
rm_rf(local_path('spec/mock_rails3_gem/public/sitemap*'))
|
53
|
-
end
|
54
|
-
|
55
|
-
def local_path(path)
|
56
|
-
File.join(File.dirname(__FILE__), path)
|
57
|
-
end
|
58
|
-
|
59
|
-
def prepare_path(path)
|
60
|
-
rm_rf path
|
61
|
-
mkdir_p path
|
62
|
-
cp_r(FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}"], path)
|
74
|
+
rm_rf(Helpers.local_path('spec/mock_rails3_gem/public/sitemap*'))
|
63
75
|
end
|
64
76
|
end
|
65
77
|
end
|
@@ -86,4 +98,4 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
|
|
86
98
|
rdoc.options << '--line-numbers' << '--inline-source'
|
87
99
|
rdoc.rdoc_files.include('README.md')
|
88
100
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
89
|
-
end
|
101
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'sitemap_generator/builder/helper'
|
2
|
+
require 'builder'
|
3
|
+
require 'zlib'
|
4
|
+
|
5
|
+
module SitemapGenerator
|
6
|
+
module Builder
|
7
|
+
class SitemapFile
|
8
|
+
include SitemapGenerator::Builder::Helper
|
9
|
+
|
10
|
+
attr_accessor :sitemap_path, :public_path, :filesize, :link_count, :hostname
|
11
|
+
|
12
|
+
# <tt>public_path</tt> full path of the directory to write sitemaps in.
|
13
|
+
# Usually your Rails <tt>public/</tt> directory.
|
14
|
+
#
|
15
|
+
# <tt>sitemap_path</tt> relative path including filename of the sitemap
|
16
|
+
# file relative to <tt>public_path</tt>
|
17
|
+
#
|
18
|
+
# <tt>hostname</tt> hostname including protocol to use in all links
|
19
|
+
# e.g. http://en.google.ca
|
20
|
+
def initialize(public_path, sitemap_path, hostname)
|
21
|
+
self.sitemap_path = sitemap_path
|
22
|
+
self.public_path = public_path
|
23
|
+
self.hostname = hostname
|
24
|
+
self.link_count = 0
|
25
|
+
|
26
|
+
@xml_content = '' # XML urlset content
|
27
|
+
@xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
|
28
|
+
@xml_wrapper_end = %q[</urlset>]
|
29
|
+
self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
|
30
|
+
end
|
31
|
+
|
32
|
+
def lastmod
|
33
|
+
File.mtime(self.full_path) rescue nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def empty?
|
37
|
+
self.link_count == 0
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_url
|
41
|
+
URI.join(self.hostname, self.sitemap_path).to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
def full_path
|
45
|
+
@full_path ||= File.join(self.public_path, self.sitemap_path)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return a boolean indicating whether the sitemap file can fit another link
|
49
|
+
# of <tt>bytes</tt> bytes in size.
|
50
|
+
def file_can_fit?(bytes)
|
51
|
+
(self.filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && self.link_count < SitemapGenerator::MAX_SITEMAP_LINKS
|
52
|
+
end
|
53
|
+
|
54
|
+
# Add a link to the sitemap file and return a boolean indicating whether the
|
55
|
+
# link was added.
|
56
|
+
#
|
57
|
+
# If a link cannot be added, the file is too large or the link limit has been reached.
|
58
|
+
def add_link(link)
|
59
|
+
xml = build_xml(::Builder::XmlMarkup.new, link)
|
60
|
+
unless file_can_fit?(xml.bytesize)
|
61
|
+
self.finalize!
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
|
65
|
+
@xml_content << xml
|
66
|
+
self.filesize += xml.bytesize
|
67
|
+
self.link_count += 1
|
68
|
+
true
|
69
|
+
end
|
70
|
+
alias_method :<<, :add_link
|
71
|
+
|
72
|
+
# Return XML as a String
|
73
|
+
def build_xml(builder, link)
|
74
|
+
builder.url do
|
75
|
+
builder.loc link[:loc]
|
76
|
+
builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
77
|
+
builder.changefreq link[:changefreq] if link[:changefreq]
|
78
|
+
builder.priority link[:priority] if link[:priority]
|
79
|
+
|
80
|
+
unless link[:images].blank?
|
81
|
+
link[:images].each do |image|
|
82
|
+
builder.image:image do
|
83
|
+
builder.image :loc, image[:loc]
|
84
|
+
builder.image :caption, image[:caption] if image[:caption]
|
85
|
+
builder.image :geo_location, image[:geo_location] if image[:geo_location]
|
86
|
+
builder.image :title, image[:title] if image[:title]
|
87
|
+
builder.image :license, image[:license] if image[:license]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
builder << ''
|
93
|
+
end
|
94
|
+
|
95
|
+
# Insert the content into the XML "wrapper" and write and close the file.
|
96
|
+
#
|
97
|
+
# All the xml content in the instance is cleared, but attributes like
|
98
|
+
# <tt>filesize</tt> are still available.
|
99
|
+
def finalize!
|
100
|
+
return if self.frozen?
|
101
|
+
|
102
|
+
open(self.full_path, 'w') do |file|
|
103
|
+
gz = Zlib::GzipWriter.new(file)
|
104
|
+
gz.write @xml_wrapper_start
|
105
|
+
gz.write @xml_content
|
106
|
+
gz.write @xml_wrapper_end
|
107
|
+
gz.close
|
108
|
+
end
|
109
|
+
@xml_content = @xml_wrapper_start = @xml_wrapper_end = ''
|
110
|
+
self.freeze
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module SitemapGenerator
|
2
|
+
module Builder
|
3
|
+
class SitemapIndexFile < SitemapFile
|
4
|
+
|
5
|
+
def initialize(*args)
|
6
|
+
super(*args)
|
7
|
+
|
8
|
+
@ml_content = '' # XML urlset content
|
9
|
+
@xml_wrapper_start = %q[<?xml version="1.0" encoding="UTF-8"?><sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">]
|
10
|
+
@xml_wrapper_end = %q[</sitemapindex>]
|
11
|
+
self.filesize = @xml_wrapper_start.bytesize + @xml_wrapper_end.bytesize
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return XML as a String
|
15
|
+
def build_xml(builder, link)
|
16
|
+
builder.url do
|
17
|
+
builder.loc link[:loc]
|
18
|
+
builder.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
19
|
+
end
|
20
|
+
builder << ''
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module SitemapGenerator
|
2
|
+
|
3
|
+
# Evaluate a sitemap config file within the context of a class that includes the
|
4
|
+
# Rails URL helpers.
|
5
|
+
class Interpreter
|
6
|
+
|
7
|
+
if SitemapGenerator::Utilities.rails3?
|
8
|
+
include ::Rails.application.routes.url_helpers
|
9
|
+
else
|
10
|
+
require 'action_controller'
|
11
|
+
include ActionController::UrlWriter
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(sitemap_config_file=nil)
|
15
|
+
sitemap_config_file ||= File.join(::Rails.root, 'config/sitemap.rb')
|
16
|
+
eval(open(sitemap_config_file).read)
|
17
|
+
end
|
18
|
+
|
19
|
+
# KJV do we need this? We should be using path_* helpers.
|
20
|
+
# def self.default_url_options(options = nil)
|
21
|
+
# { :host => SitemapGenerator::Sitemap.default_host }
|
22
|
+
# end
|
23
|
+
|
24
|
+
def self.run
|
25
|
+
new
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -1,30 +1,35 @@
|
|
1
1
|
module SitemapGenerator
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
:changefreq => options[:changefreq],
|
11
|
-
:lastmod => options[:lastmod],
|
12
|
-
:host => options[:host],
|
13
|
-
:loc => URI.join(options[:host], path).to_s,
|
14
|
-
:images => prepare_images(options[:images], options[:host])
|
15
|
-
}
|
2
|
+
module Link
|
3
|
+
extend self
|
4
|
+
|
5
|
+
# Return a Hash of options suitable to pass to a SitemapGenerator::Builder::SitemapFile instance.
|
6
|
+
def generate(path, options = {})
|
7
|
+
if path.is_a?(SitemapGenerator::Builder::SitemapFile)
|
8
|
+
options.reverse_merge!(:host => path.hostname, :lastmod => path.lastmod)
|
9
|
+
path = path.sitemap_path
|
16
10
|
end
|
17
11
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
12
|
+
options.assert_valid_keys(:priority, :changefreq, :lastmod, :host, :images)
|
13
|
+
options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host, :images => [])
|
14
|
+
{
|
15
|
+
:path => path,
|
16
|
+
:priority => options[:priority],
|
17
|
+
:changefreq => options[:changefreq],
|
18
|
+
:lastmod => options[:lastmod],
|
19
|
+
:host => options[:host],
|
20
|
+
:loc => URI.join(options[:host], path).to_s,
|
21
|
+
:images => prepare_images(options[:images], options[:host])
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return an Array of image option Hashes suitable to be parsed by SitemapGenerator::Builder::SitemapFile
|
26
|
+
def prepare_images(images, host)
|
27
|
+
images.delete_if { |key,value| key[:loc] == nil }
|
28
|
+
images.each do |r|
|
29
|
+
r.assert_valid_keys(:loc, :caption, :geo_location, :title, :license)
|
30
|
+
r[:loc] = URI.join(host, r[:loc]).to_s
|
27
31
|
end
|
32
|
+
images[0..(SitemapGenerator::MAX_SITEMAP_IMAGES-1)]
|
28
33
|
end
|
29
34
|
end
|
30
35
|
end
|
@@ -1,166 +1,173 @@
|
|
1
1
|
require 'builder'
|
2
2
|
require 'action_view'
|
3
3
|
|
4
|
+
# A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
|
5
|
+
# which lists all the sitemap files written.
|
4
6
|
module SitemapGenerator
|
5
7
|
class LinkSet
|
6
|
-
include
|
7
|
-
include ActionView::Helpers::NumberHelper
|
8
|
-
|
9
|
-
attr_accessor :default_host, :yahoo_app_id, :links
|
10
|
-
attr_accessor :sitemaps
|
11
|
-
attr_accessor :max_entries
|
12
|
-
attr_accessor :link_count
|
13
|
-
|
14
|
-
alias :sitemap_files :sitemaps
|
15
|
-
|
16
|
-
# Create new link set instance.
|
17
|
-
def initialize
|
18
|
-
self.links = []
|
19
|
-
self.sitemaps = []
|
20
|
-
self.max_entries = SitemapGenerator::MAX_ENTRIES
|
21
|
-
self.link_count = 0
|
22
|
-
end
|
8
|
+
include ActionView::Helpers::NumberHelper # for number_with_delimiter
|
23
9
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
links.push Link.generate("/#{index_file}", :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
28
|
-
self.link_count += 2
|
29
|
-
end
|
10
|
+
attr_accessor :default_host, :public_path, :sitemaps_path
|
11
|
+
attr_accessor :sitemap, :sitemaps, :sitemap_index
|
12
|
+
attr_accessor :verbose, :yahoo_app_id
|
30
13
|
|
31
|
-
#
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
14
|
+
# Evaluate the sitemap config file and write all sitemaps.
|
15
|
+
#
|
16
|
+
# This should be refactored so that we can have multiple instances
|
17
|
+
# of LinkSet.
|
18
|
+
def create
|
19
|
+
require 'sitemap_generator/interpreter'
|
37
20
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
self.link_count += 1
|
43
|
-
end
|
21
|
+
start_time = Time.now
|
22
|
+
SitemapGenerator::Interpreter.run
|
23
|
+
finalize!
|
24
|
+
end_time = Time.now
|
44
25
|
|
45
|
-
|
46
|
-
def write
|
47
|
-
write_pending
|
26
|
+
puts "\nSitemap stats: #{number_with_delimiter(self.link_count)} links / #{self.sitemaps.size} files / " + ("%dm%02ds" % (end_time - start_time).divmod(60)) if verbose
|
48
27
|
end
|
49
28
|
|
50
|
-
#
|
51
|
-
|
52
|
-
|
29
|
+
# <tt>public_path</tt> (optional) full path to the directory to write sitemaps in.
|
30
|
+
# Defaults to your Rails <tt>public/</tt> directory.
|
31
|
+
#
|
32
|
+
# <tt>sitemaps_path</tt> (optional) path fragment within public to write sitemaps
|
33
|
+
# to e.g. 'en/'. Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
|
34
|
+
#
|
35
|
+
# <tt>default_host</tt> hostname including protocol to use in all sitemap links
|
36
|
+
# e.g. http://en.google.ca
|
37
|
+
def initialize(public_path = nil, sitemaps_path = nil, default_host = nil)
|
38
|
+
public_path = File.join(::Rails.root, 'public/') if public_path.nil?
|
39
|
+
self.default_host = default_host
|
40
|
+
self.public_path = public_path
|
41
|
+
self.sitemaps_path = sitemaps_path
|
42
|
+
|
43
|
+
# Completed sitemaps
|
44
|
+
self.sitemaps = []
|
53
45
|
end
|
54
46
|
|
55
|
-
|
56
|
-
|
57
|
-
write_upcoming
|
58
|
-
write_index
|
47
|
+
def link_count
|
48
|
+
self.sitemaps.map(&:link_count).inject(:+)
|
59
49
|
end
|
60
50
|
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
filename = File.join(Rails.root, "public", file)
|
68
|
-
write_file(filename, buffer)
|
69
|
-
show_progress("Sitemap", filename, buffer) if verbose
|
70
|
-
if slice_index==0
|
71
|
-
links.clear
|
72
|
-
else
|
73
|
-
links.slice! slice_index, links.size
|
74
|
-
end
|
51
|
+
# Called within the user's eval'ed sitemap config file. Add links to sitemap files
|
52
|
+
# passing a block.
|
53
|
+
#
|
54
|
+
# TODO: Refactor. The call chain is confusing and convoluted here.
|
55
|
+
def add_links
|
56
|
+
raise ArgumentError, "Default hostname not set" if default_host.blank?
|
75
57
|
|
76
|
-
|
77
|
-
|
58
|
+
# I'd rather have these calls in <tt>create</tt> but we have to wait
|
59
|
+
# for <tt>default_host</tt> to be set by the user's sitemap config
|
60
|
+
new_sitemap
|
61
|
+
add_default_links
|
78
62
|
|
79
|
-
|
80
|
-
def write_index
|
81
|
-
buffer = ""
|
82
|
-
xml = Builder::XmlMarkup.new(:target => buffer)
|
83
|
-
eval(SitemapGenerator.templates.sitemap_index, binding)
|
84
|
-
filename = File.join(Rails.root, "public", index_file)
|
85
|
-
write_file(filename, buffer)
|
86
|
-
show_progress("Sitemap Index", filename, buffer) if verbose
|
87
|
-
links.clear
|
88
|
-
sitemaps.clear
|
63
|
+
yield Mapper.new(self)
|
89
64
|
end
|
90
65
|
|
91
|
-
#
|
92
|
-
|
93
|
-
|
66
|
+
# Called from Mapper.
|
67
|
+
#
|
68
|
+
# Add a link to the current sitemap.
|
69
|
+
def add_link(link)
|
70
|
+
unless self.sitemap << link
|
71
|
+
new_sitemap
|
72
|
+
self.sitemap << link
|
73
|
+
end
|
94
74
|
end
|
95
75
|
|
96
|
-
#
|
97
|
-
|
98
|
-
|
99
|
-
|
76
|
+
# Add the current sitemap to the <tt>sitemaps</tt> Array and
|
77
|
+
# start a new sitemap.
|
78
|
+
#
|
79
|
+
# If the current sitemap is nil or empty it is not added.
|
80
|
+
def new_sitemap
|
81
|
+
unless self.sitemap_index
|
82
|
+
self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(public_path, sitemap_index_path, default_host)
|
83
|
+
end
|
100
84
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
end
|
85
|
+
unless self.sitemap
|
86
|
+
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
|
87
|
+
end
|
105
88
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
89
|
+
# Mark the sitemap as complete and add it to the sitemap index
|
90
|
+
unless self.sitemap.empty?
|
91
|
+
self.sitemap.finalize!
|
92
|
+
self.sitemap_index << Link.generate(self.sitemap)
|
93
|
+
self.sitemaps << self.sitemap
|
94
|
+
show_progress(self.sitemap) if verbose
|
110
95
|
|
111
|
-
|
112
|
-
|
113
|
-
!first_sitemap?
|
96
|
+
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
|
97
|
+
end
|
114
98
|
end
|
115
99
|
|
116
|
-
#
|
117
|
-
def
|
118
|
-
|
100
|
+
# Report progress line.
|
101
|
+
def show_progress(sitemap)
|
102
|
+
uncompressed_size = number_to_human_size(sitemap.filesize)
|
103
|
+
compressed_size = number_to_human_size(File.size?(sitemap.full_path))
|
104
|
+
puts "+ #{sitemap.sitemap_path} #{sitemap.link_count} links / #{uncompressed_size} / #{compressed_size} gzipped"
|
119
105
|
end
|
120
106
|
|
121
|
-
#
|
122
|
-
def
|
123
|
-
|
107
|
+
# Finalize all sitemap files
|
108
|
+
def finalize!
|
109
|
+
new_sitemap
|
110
|
+
self.sitemap_index.finalize!
|
124
111
|
end
|
125
112
|
|
126
|
-
#
|
127
|
-
|
128
|
-
|
129
|
-
|
113
|
+
# Ping search engines.
|
114
|
+
#
|
115
|
+
# @see http://en.wikipedia.org/wiki/Sitemap_index
|
116
|
+
def ping_search_engines
|
117
|
+
require 'open-uri'
|
118
|
+
|
119
|
+
sitemap_index_url = CGI.escape(self.sitemap_index.full_url)
|
120
|
+
search_engines = {
|
121
|
+
:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{sitemap_index_url}",
|
122
|
+
:yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{sitemap_index_url}&appid=#{yahoo_app_id}",
|
123
|
+
:ask => "http://submissions.ask.com/ping?sitemap=#{sitemap_index_url}",
|
124
|
+
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{sitemap_index_url}",
|
125
|
+
:sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{sitemap_index_url}"
|
126
|
+
}
|
127
|
+
|
128
|
+
puts "\n" if verbose
|
129
|
+
search_engines.each do |engine, link|
|
130
|
+
next if engine == :yahoo && !self.yahoo_app_id
|
131
|
+
begin
|
132
|
+
open(link)
|
133
|
+
puts "Successful ping of #{engine.to_s.titleize}" if verbose
|
134
|
+
rescue Timeout::Error, StandardError => e
|
135
|
+
puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
|
136
|
+
end
|
137
|
+
end
|
130
138
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
139
|
+
if !self.yahoo_app_id && verbose
|
140
|
+
puts "\n"
|
141
|
+
puts <<-END.gsub(/^\s+/, '')
|
142
|
+
To ping Yahoo you require a Yahoo AppID. Add it to your config/sitemap.rb with:
|
135
143
|
|
136
|
-
|
137
|
-
def enough_links?
|
138
|
-
!more_links?
|
139
|
-
end
|
144
|
+
SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
|
140
145
|
|
141
|
-
|
142
|
-
|
143
|
-
|
146
|
+
For more information see http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
|
147
|
+
END
|
148
|
+
end
|
144
149
|
end
|
145
150
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
151
|
+
protected
|
152
|
+
|
153
|
+
def add_default_links
|
154
|
+
self.sitemap << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
155
|
+
self.sitemap << Link.generate(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
|
150
156
|
end
|
151
157
|
|
152
|
-
#
|
153
|
-
|
154
|
-
|
158
|
+
# Return the current sitemap filename with index.
|
159
|
+
#
|
160
|
+
# The index depends on the length of the <tt>sitemaps</tt> array.
|
161
|
+
def new_sitemap_path
|
162
|
+
File.join(self.sitemaps_path || '', "sitemap#{self.sitemaps.length + 1}.xml.gz")
|
155
163
|
end
|
156
164
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
puts "Sitemap stats: #{number_with_delimiter(SitemapGenerator::Sitemap.link_count)} links, " + ("%dm%02ds" % (stop_time - start_time).divmod(60)) if verbose
|
165
|
+
# Return the current sitemap index filename.
|
166
|
+
#
|
167
|
+
# At the moment we only support one index file which can link to
|
168
|
+
# up to 50,000 sitemap files.
|
169
|
+
def sitemap_index_path
|
170
|
+
File.join(self.sitemaps_path || '', 'sitemap_index.xml.gz')
|
164
171
|
end
|
165
172
|
end
|
166
|
-
end
|
173
|
+
end
|
@@ -7,8 +7,6 @@ module SitemapGenerator
|
|
7
7
|
# Define an accessor method for each template file.
|
8
8
|
class Templates
|
9
9
|
FILES = {
|
10
|
-
:sitemap_index => 'sitemap_index.builder',
|
11
|
-
:sitemap_xml => 'xml_sitemap.builder',
|
12
10
|
:sitemap_sample => 'sitemap.rb',
|
13
11
|
}
|
14
12
|
|
@@ -28,7 +26,7 @@ module SitemapGenerator
|
|
28
26
|
|
29
27
|
# Return the full path to a template.
|
30
28
|
#
|
31
|
-
# <tt>file</tt> template symbol e.g. <tt>:
|
29
|
+
# <tt>file</tt> template symbol e.g. <tt>:sitemap_sample</tt>
|
32
30
|
def template_path(template)
|
33
31
|
File.join(@root, 'templates', self.class::FILES[template])
|
34
32
|
end
|
@@ -3,14 +3,14 @@ module SitemapGenerator
|
|
3
3
|
extend self
|
4
4
|
|
5
5
|
# Copy templates/sitemap.rb to config if not there yet.
|
6
|
-
def install_sitemap_rb
|
6
|
+
def install_sitemap_rb(verbose=false)
|
7
7
|
if File.exist?(File.join(RAILS_ROOT, 'config/sitemap.rb'))
|
8
|
-
puts "already exists: config/sitemap.rb, file not copied"
|
8
|
+
puts "already exists: config/sitemap.rb, file not copied" if verbose
|
9
9
|
else
|
10
10
|
FileUtils.cp(
|
11
11
|
SitemapGenerator.templates.template_path(:sitemap_sample),
|
12
12
|
File.join(RAILS_ROOT, 'config/sitemap.rb'))
|
13
|
-
puts "created: config/sitemap.rb"
|
13
|
+
puts "created: config/sitemap.rb" if verbose
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
@@ -25,5 +25,30 @@ module SitemapGenerator
|
|
25
25
|
def clean_files
|
26
26
|
FileUtils.rm(Dir[File.join(RAILS_ROOT, 'public/sitemap*.xml.gz')])
|
27
27
|
end
|
28
|
+
|
29
|
+
# Returns whether this environment is using ActionPack
|
30
|
+
# version 3.0.0 or greater.
|
31
|
+
#
|
32
|
+
# @return [Boolean]
|
33
|
+
def self.rails3?
|
34
|
+
# The ActionPack module is always loaded automatically in Rails >= 3
|
35
|
+
return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
|
36
|
+
|
37
|
+
version =
|
38
|
+
if defined?(ActionPack::VERSION::MAJOR)
|
39
|
+
ActionPack::VERSION::MAJOR
|
40
|
+
else
|
41
|
+
# Rails 1.2
|
42
|
+
ActionPack::VERSION::Major
|
43
|
+
end
|
44
|
+
|
45
|
+
# 3.0.0.beta1 acts more like ActionPack 2
|
46
|
+
# for purposes of this method
|
47
|
+
# (checking whether block helpers require = or -).
|
48
|
+
# This extra check can be removed when beta2 is out.
|
49
|
+
version >= 3 &&
|
50
|
+
!(defined?(ActionPack::VERSION::TINY) &&
|
51
|
+
ActionPack::VERSION::TINY == "0.beta")
|
52
|
+
end
|
28
53
|
end
|
29
54
|
end
|
data/lib/sitemap_generator.rb
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
+
require 'sitemap_generator/builder'
|
1
2
|
require 'sitemap_generator/mapper'
|
2
3
|
require 'sitemap_generator/link'
|
3
|
-
require 'sitemap_generator/rails_helper'
|
4
|
-
require 'sitemap_generator/helper'
|
5
4
|
require 'sitemap_generator/link_set'
|
6
|
-
require 'sitemap_generator/helper'
|
7
5
|
require 'sitemap_generator/templates'
|
8
6
|
require 'sitemap_generator/utilities'
|
9
|
-
|
10
|
-
require 'sitemap_generator/railtie' if SitemapGenerator::RailsHelper.rails3?
|
7
|
+
require 'sitemap_generator/railtie' if SitemapGenerator::Utilities.rails3?
|
11
8
|
|
12
9
|
module SitemapGenerator
|
13
10
|
silence_warnings do
|
14
11
|
VERSION = File.read(File.dirname(__FILE__) + "/../VERSION").strip
|
15
|
-
|
16
|
-
|
12
|
+
MAX_SITEMAP_FILES = 50_000 # max sitemap links per index file
|
13
|
+
MAX_SITEMAP_LINKS = 50_000 # max links per sitemap
|
14
|
+
MAX_SITEMAP_IMAGES = 1_000 # max images per url
|
15
|
+
MAX_SITEMAP_FILESIZE = 10.megabytes # bytes
|
16
|
+
|
17
17
|
Sitemap = LinkSet.new
|
18
18
|
end
|
19
19
|
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'zlib'
|
2
1
|
begin
|
3
2
|
require 'sitemap_generator'
|
4
3
|
rescue LoadError, NameError
|
@@ -8,7 +7,7 @@ end
|
|
8
7
|
namespace :sitemap do
|
9
8
|
desc "Install a default config/sitemap.rb file"
|
10
9
|
task :install do
|
11
|
-
SitemapGenerator::Utilities.install_sitemap_rb
|
10
|
+
SitemapGenerator::Utilities.install_sitemap_rb(verbose)
|
12
11
|
end
|
13
12
|
|
14
13
|
desc "Delete all Sitemap files in public/ directory"
|
@@ -25,19 +24,8 @@ namespace :sitemap do
|
|
25
24
|
task 'refresh:no_ping' => ['sitemap:create']
|
26
25
|
|
27
26
|
task :create => [:environment] do
|
28
|
-
|
29
|
-
|
30
|
-
if SitemapGenerator::RailsHelper.rails3?
|
31
|
-
SitemapGenerator::Sitemap.class_eval do
|
32
|
-
include Rails.application.routes.url_helpers
|
33
|
-
end
|
34
|
-
else
|
35
|
-
require 'action_controller'
|
36
|
-
SitemapGenerator::Sitemap.class_eval do
|
37
|
-
include ActionController::UrlWriter
|
38
|
-
end
|
39
|
-
end
|
40
|
-
SitemapGenerator::Sitemap.create_files
|
27
|
+
SitemapGenerator::Sitemap.verbose = verbose
|
28
|
+
SitemapGenerator::Sitemap.create
|
41
29
|
end
|
42
30
|
end
|
43
31
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 3
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Adam Salter
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-21 00:00:00 -07:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -44,11 +44,14 @@ files:
|
|
44
44
|
- Rakefile
|
45
45
|
- VERSION
|
46
46
|
- lib/sitemap_generator.rb
|
47
|
-
- lib/sitemap_generator/
|
47
|
+
- lib/sitemap_generator/builder.rb
|
48
|
+
- lib/sitemap_generator/builder/helper.rb
|
49
|
+
- lib/sitemap_generator/builder/sitemap_file.rb
|
50
|
+
- lib/sitemap_generator/builder/sitemap_index_file.rb
|
51
|
+
- lib/sitemap_generator/interpreter.rb
|
48
52
|
- lib/sitemap_generator/link.rb
|
49
53
|
- lib/sitemap_generator/link_set.rb
|
50
54
|
- lib/sitemap_generator/mapper.rb
|
51
|
-
- lib/sitemap_generator/rails_helper.rb
|
52
55
|
- lib/sitemap_generator/railtie.rb
|
53
56
|
- lib/sitemap_generator/tasks.rb
|
54
57
|
- lib/sitemap_generator/templates.rb
|
@@ -57,8 +60,6 @@ files:
|
|
57
60
|
- rails/uninstall.rb
|
58
61
|
- tasks/sitemap_generator_tasks.rake
|
59
62
|
- templates/sitemap.rb
|
60
|
-
- templates/sitemap_index.builder
|
61
|
-
- templates/xml_sitemap.builder
|
62
63
|
has_rdoc: true
|
63
64
|
homepage: http://github.com/kjvarga/sitemap_generator
|
64
65
|
licenses: []
|
@@ -1,55 +0,0 @@
|
|
1
|
-
module SitemapGenerator
|
2
|
-
# UrlHelpers are included by the rake tasks. This is not ideal, but should
|
3
|
-
# suffice until things are better organized.
|
4
|
-
module Helper
|
5
|
-
def self.included(base)
|
6
|
-
base.class_eval do
|
7
|
-
def self.default_url_options(options = nil)
|
8
|
-
{ :host => SitemapGenerator::Sitemap.default_host }
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def load_sitemap_rb
|
14
|
-
sitemap_mapper_file = File.join(Rails.root, 'config/sitemap.rb')
|
15
|
-
eval(open(sitemap_mapper_file).read)
|
16
|
-
end
|
17
|
-
|
18
|
-
def url_with_hostname(path)
|
19
|
-
URI.join(SitemapGenerator::Sitemap.default_host, path).to_s
|
20
|
-
end
|
21
|
-
|
22
|
-
def w3c_date(date)
|
23
|
-
date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
24
|
-
end
|
25
|
-
|
26
|
-
def ping_search_engines(sitemap_index)
|
27
|
-
require 'open-uri'
|
28
|
-
index_location = CGI.escape(url_with_hostname(sitemap_index))
|
29
|
-
# engines list from http://en.wikipedia.org/wiki/Sitemap_index
|
30
|
-
yahoo_app_id = SitemapGenerator::Sitemap.yahoo_app_id
|
31
|
-
{:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}",
|
32
|
-
:yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}&appid=#{yahoo_app_id}",
|
33
|
-
:ask => "http://submissions.ask.com/ping?sitemap=#{index_location}",
|
34
|
-
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}",
|
35
|
-
:sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link|
|
36
|
-
begin
|
37
|
-
unless SitemapGenerator::Sitemap.yahoo_app_id == false
|
38
|
-
open(link)
|
39
|
-
puts "Successful ping of #{engine.to_s.titleize}" if verbose
|
40
|
-
end
|
41
|
-
rescue Timeout::Error, StandardError => e
|
42
|
-
puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}" if verbose
|
43
|
-
puts <<-END if engine == :yahoo && verbose
|
44
|
-
Yahoo requires an 'AppID' for more than one ping per "timeframe", you can either:
|
45
|
-
- remove yahoo from the ping list (config/sitemap.rb):
|
46
|
-
SitemapGenerator::Sitemap.yahoo_app_id = false
|
47
|
-
- or add your Yahoo AppID to the generator (config/sitemap.rb):
|
48
|
-
SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
|
49
|
-
For more information: http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
|
50
|
-
END
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
module SitemapGenerator
|
2
|
-
module RailsHelper
|
3
|
-
# Returns whether this environment is using ActionPack
|
4
|
-
# version 3.0.0 or greater.
|
5
|
-
#
|
6
|
-
# @return [Boolean]
|
7
|
-
def self.rails3?
|
8
|
-
# The ActionPack module is always loaded automatically in Rails >= 3
|
9
|
-
return false unless defined?(ActionPack) && defined?(ActionPack::VERSION)
|
10
|
-
|
11
|
-
version =
|
12
|
-
if defined?(ActionPack::VERSION::MAJOR)
|
13
|
-
ActionPack::VERSION::MAJOR
|
14
|
-
else
|
15
|
-
# Rails 1.2
|
16
|
-
ActionPack::VERSION::Major
|
17
|
-
end
|
18
|
-
|
19
|
-
# 3.0.0.beta1 acts more like ActionPack 2
|
20
|
-
# for purposes of this method
|
21
|
-
# (checking whether block helpers require = or -).
|
22
|
-
# This extra check can be removed when beta2 is out.
|
23
|
-
version >= 3 &&
|
24
|
-
!(defined?(ActionPack::VERSION::TINY) &&
|
25
|
-
ActionPack::VERSION::TINY == "0.beta")
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
# <?xml version="1.0" encoding="UTF-8"?>
|
3
|
-
# <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
4
|
-
# <sitemap>
|
5
|
-
# <loc>http://www.example.com/sitemap1.xml.gz</loc>
|
6
|
-
# <lastmod>2004-10-01T18:23:17+00:00</lastmod>
|
7
|
-
# </sitemap>
|
8
|
-
# <sitemap>
|
9
|
-
# <loc>http://www.example.com/sitemap2.xml.gz</loc>
|
10
|
-
# <lastmod>2005-01-01</lastmod>
|
11
|
-
# </sitemap>
|
12
|
-
# </sitemapindex>
|
13
|
-
|
14
|
-
xml.instruct!
|
15
|
-
xml.sitemapindex "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
|
16
|
-
sitemap_files.each do |file|
|
17
|
-
xml.sitemap do
|
18
|
-
xml.loc url_with_hostname(File.basename(file))
|
19
|
-
xml.lastmod w3c_date(File.mtime(file))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
xml.instruct!
|
3
|
-
xml.urlset "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
|
4
|
-
"xsi:schemaLocation" => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd",
|
5
|
-
"xmlns:image" => "http://www.google.com/schemas/sitemap-image/1.1",
|
6
|
-
"xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do
|
7
|
-
|
8
|
-
links.each_with_index do |link,index|
|
9
|
-
buffer_url = ""
|
10
|
-
url = Builder::XmlMarkup.new(:target=>buffer_url)
|
11
|
-
url.url do
|
12
|
-
url.loc link[:loc]
|
13
|
-
url.lastmod w3c_date(link[:lastmod]) if link[:lastmod]
|
14
|
-
url.changefreq link[:changefreq] if link[:changefreq]
|
15
|
-
url.priority link[:priority] if link[:priority]
|
16
|
-
|
17
|
-
unless link[:images].blank?
|
18
|
-
link[:images].each do |image|
|
19
|
-
url.image:image do
|
20
|
-
url.image :loc, image[:loc]
|
21
|
-
url.image :caption, image[:caption] if image[:caption]
|
22
|
-
url.image :geo_location, image[:geo_location] if image[:geo_location]
|
23
|
-
url.image :title, image[:title] if image[:title]
|
24
|
-
url.image :license, image[:license] if image[:license]
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
if (buffer+buffer_url).size < 10.megabytes
|
31
|
-
xml << buffer_url
|
32
|
-
else
|
33
|
-
slice_index = index
|
34
|
-
break
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|